From 94749860c834dab896dbe4907cc6ecd28840bb20 Mon Sep 17 00:00:00 2001
From: Daniel Dresser <dresserd@gmail.com>
Date: Mon, 9 Jan 2023 15:47:51 -0800
Subject: [PATCH 01/20] RankFilter : Add perf tests

---
 python/GafferImageTest/DilateTest.py | 15 +++++++++++++++
 python/GafferImageTest/ErodeTest.py  | 15 +++++++++++++++
 python/GafferImageTest/MedianTest.py | 15 +++++++++++++++
 3 files changed, 45 insertions(+)

diff --git a/python/GafferImageTest/DilateTest.py b/python/GafferImageTest/DilateTest.py
index 0eca5f95580..5ccd861bc9d 100644
--- a/python/GafferImageTest/DilateTest.py
+++ b/python/GafferImageTest/DilateTest.py
@@ -138,5 +138,20 @@ def testDriverChannel( self ) :
 			# a master
 			self.assertImagesEqual( masterDilateSingleChannel["out"], defaultDilateSingleChannel["out"] )
 
+	@GafferTest.TestRunner.PerformanceTestMethod( repeat = 1 )
+	def testPerf( self ) :
+
+		imageReader = GafferImage.ImageReader()
+		imageReader["fileName"].setValue( self.imagesPath() / 'deepMergeReference.exr' )
+
+		GafferImageTest.processTiles( imageReader["out"] )
+
+		dilate = GafferImage.Dilate()
+		dilate["in"].setInput( imageReader["out"] )
+		dilate["radius"].setValue( imath.V2i( 128 ) )
+
+		with GafferTest.TestRunner.PerformanceScope() :
+			GafferImageTest.processTiles( dilate["out"] )
+
 if __name__ == "__main__":
 	unittest.main()
diff --git a/python/GafferImageTest/ErodeTest.py b/python/GafferImageTest/ErodeTest.py
index 32faaebd933..ed8b7170b88 100644
--- a/python/GafferImageTest/ErodeTest.py
+++ b/python/GafferImageTest/ErodeTest.py
@@ -140,5 +140,20 @@ def testDriverChannel( self ) :
 			# a master
 			self.assertImagesEqual( masterErodeSingleChannel["out"], defaultErodeSingleChannel["out"] )
 
+	@GafferTest.TestRunner.PerformanceTestMethod( repeat = 1 )
+	def testPerf( self ) :
+
+		imageReader = GafferImage.ImageReader()
+		imageReader["fileName"].setValue( self.imagesPath() / 'deepMergeReference.exr' )
+
+		GafferImageTest.processTiles( imageReader["out"] )
+
+		erode = GafferImage.Erode()
+		erode["in"].setInput( imageReader["out"] )
+		erode["radius"].setValue( imath.V2i( 128 ) )
+
+		with GafferTest.TestRunner.PerformanceScope() :
+			GafferImageTest.processTiles( erode["out"] )
+
 if __name__ == "__main__":
 	unittest.main()
diff --git a/python/GafferImageTest/MedianTest.py b/python/GafferImageTest/MedianTest.py
index c47b7b83708..9ebaf1c64ff 100644
--- a/python/GafferImageTest/MedianTest.py
+++ b/python/GafferImageTest/MedianTest.py
@@ -172,5 +172,20 @@ def testCancellation( self ) :
 		bt.cancelAndWait()
 		self.assertLess( time.time() - t, acceptableCancellationDelay )
 
+	@GafferTest.TestRunner.PerformanceTestMethod( repeat = 1 )
+	def testPerf( self ) :
+
+		imageReader = GafferImage.ImageReader()
+		imageReader["fileName"].setValue( self.imagesPath() / 'deepMergeReference.exr' )
+
+		GafferImageTest.processTiles( imageReader["out"] )
+
+		median = GafferImage.Median()
+		median["in"].setInput( imageReader["out"] )
+		median["radius"].setValue( imath.V2i( 128 ) )
+
+		with GafferTest.TestRunner.PerformanceScope() :
+			GafferImageTest.processTiles( median["out"] )
+
 if __name__ == "__main__":
 	unittest.main()

From 1bc8ae3b0b3e2df0c25a2cb19b02a661d527791f Mon Sep 17 00:00:00 2001
From: Daniel Dresser <danield@image-engine.com>
Date: Fri, 12 May 2023 15:44:55 -0700
Subject: [PATCH 02/20] Resample : Test perf

---
 python/GafferImageTest/ResampleTest.py | 84 +++++++++++++++++++++++++-
 1 file changed, 83 insertions(+), 1 deletion(-)

diff --git a/python/GafferImageTest/ResampleTest.py b/python/GafferImageTest/ResampleTest.py
index ac96c7f53ee..d262f33ca47 100644
--- a/python/GafferImageTest/ResampleTest.py
+++ b/python/GafferImageTest/ResampleTest.py
@@ -151,7 +151,8 @@ def __test( fileName, size, filter ) :
 		]
 
 		for args in tests :
-			__test( *args )
+			with self.subTest( fileName = args[0], size = args[1], ftilter = args[2] ):
+				__test( *args )
 
 	def testSincUpsize( self ) :
 
@@ -220,5 +221,86 @@ def testNonFlatThrows( self ) :
 
 		self.assertRaisesDeepNotSupported( resample )
 
+	@GafferTest.TestRunner.PerformanceTestMethod( repeat = 5 )
+	def testPerfHorizontal( self ) :
+
+		imageReader = GafferImage.ImageReader()
+		imageReader["fileName"].setValue( self.imagesPath() / 'deepMergeReference.exr' )
+
+		resize = GafferImage.Resize()
+		resize["in"].setInput( imageReader["out"] )
+		resize["format"].setValue( GafferImage.Format( 1920, 1080, 1.000 ) )
+
+		resample = GafferImage.Resample()
+		resample["in"].setInput( resize["out"] )
+		resample["filter"].setValue( 'box' )
+		resample["filterScale"].setValue( imath.V2f( 300.0 ) )
+
+		GafferImageTest.processTiles( resize["out"] )
+
+		with GafferTest.TestRunner.PerformanceScope() :
+			GafferImageTest.processTiles( resample["__horizontalPass"] )
+
+	@GafferTest.TestRunner.PerformanceTestMethod( repeat = 5 )
+	def testPerfVertical( self ) :
+
+		imageReader = GafferImage.ImageReader()
+		imageReader["fileName"].setValue( self.imagesPath() / 'deepMergeReference.exr' )
+
+		resize = GafferImage.Resize()
+		resize["in"].setInput( imageReader["out"] )
+		resize["format"].setValue( GafferImage.Format( 1920, 1080, 1.000 ) )
+
+		resample = GafferImage.Resample()
+		resample["in"].setInput( resize["out"] )
+		resample["filter"].setValue( 'box' )
+		resample["filterScale"].setValue( imath.V2f( 300.0 ) )
+
+		GafferImageTest.processTiles( resample["__horizontalPass"] )
+
+		with GafferTest.TestRunner.PerformanceScope() :
+			GafferImageTest.processTiles( resample["out"] )
+
+	@GafferTest.TestRunner.PerformanceTestMethod( repeat = 5 )
+	def testPerfSmallFilter( self ) :
+
+		imageReader = GafferImage.ImageReader()
+		imageReader["fileName"].setValue( self.imagesPath() / 'deepMergeReference.exr' )
+
+		resize = GafferImage.Resize()
+		resize["in"].setInput( imageReader["out"] )
+		resize["format"].setValue( GafferImage.Format( 4000, 4000, 1.000 ) )
+
+		resample = GafferImage.Resample()
+		resample["in"].setInput( resize["out"] )
+		resample["filter"].setValue( 'box' )
+		resample["filterScale"].setValue( imath.V2f( 4.0 ) )
+
+		GafferImageTest.processTiles( resize["out"] )
+
+		with GafferTest.TestRunner.PerformanceScope() :
+			GafferImageTest.processTiles( resample["out"] )
+
+	@GafferTest.TestRunner.PerformanceTestMethod( repeat = 5 )
+	def testPerfVerySmallFilter( self ) :
+
+		imageReader = GafferImage.ImageReader()
+		imageReader["fileName"].setValue( self.imagesPath() / 'deepMergeReference.exr' )
+
+		resize = GafferImage.Resize()
+		resize["in"].setInput( imageReader["out"] )
+		resize["format"].setValue( GafferImage.Format( 4000, 4000, 1.000 ) )
+
+		resample = GafferImage.Resample()
+		resample["in"].setInput( resize["out"] )
+		resample["filter"].setValue( 'box' )
+		resample["filterScale"].setValue( imath.V2f( 2.0 ) )
+
+		GafferImageTest.processTiles( resize["out"] )
+
+		with GafferTest.TestRunner.PerformanceScope() :
+			GafferImageTest.processTiles( resample["out"] )
+
+
 if __name__ == "__main__":
 	unittest.main()

From 7778e7463860e1e244175a1c9fac1639df264752 Mon Sep 17 00:00:00 2001
From: Daniel Dresser <danield@image-engine.com>
Date: Mon, 15 May 2023 08:57:20 -0700
Subject: [PATCH 03/20] Resize : Add perf tests

---
 python/GafferImageTest/ResizeTest.py | 30 ++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/python/GafferImageTest/ResizeTest.py b/python/GafferImageTest/ResizeTest.py
index 387e827611a..c57c1e7c84c 100644
--- a/python/GafferImageTest/ResizeTest.py
+++ b/python/GafferImageTest/ResizeTest.py
@@ -306,5 +306,35 @@ def testPixelAspectRatio( self ) :
 
 						self.assertEqual( r["out"]["dataWindow"].getValue(), r["out"]["format"].getValue().getDisplayWindow() )
 
+	@GafferTest.TestRunner.PerformanceTestMethod( repeat = 5 )
+	def testSimplestUpscalePerf( self ) :
+		imageReader = GafferImage.ImageReader()
+		imageReader["fileName"].setValue( self.imagesPath() / 'deepMergeReference.exr' )
+
+		r = GafferImage.Resize()
+		r["in"].setInput( imageReader["out"] )
+		r["format"].setValue( GafferImage.Format( 10000, 10000 ) )
+		r["filter"].setValue( 'box' )
+
+		GafferImageTest.processTiles( imageReader["out"] )
+
+		with GafferTest.TestRunner.PerformanceScope() :
+			GafferImageTest.processTiles( r["out"] )
+
+	@GafferTest.TestRunner.PerformanceTestMethod( repeat = 5 )
+	def testSimpleUpscalePerf( self ) :
+		imageReader = GafferImage.ImageReader()
+		imageReader["fileName"].setValue( self.imagesPath() / 'deepMergeReference.exr' )
+
+		r = GafferImage.Resize()
+		r["in"].setInput( imageReader["out"] )
+		r["format"].setValue( GafferImage.Format( 10000, 10000 ) )
+		r["filter"].setValue( 'triangle' )
+
+		GafferImageTest.processTiles( imageReader["out"] )
+
+		with GafferTest.TestRunner.PerformanceScope() :
+			GafferImageTest.processTiles( r["out"] )
+
 if __name__ == "__main__":
 	unittest.main()

From 6bf9ac4e5ff3c1231ea2bf3b7108e4f1d2dab1c6 Mon Sep 17 00:00:00 2001
From: Daniel Dresser <danield@image-engine.com>
Date: Fri, 3 Mar 2023 18:24:51 -0800
Subject: [PATCH 04/20] DilateTest : Add test that should make certain errors
 obvious

---
 python/GafferImageTest/DilateTest.py | 46 ++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/python/GafferImageTest/DilateTest.py b/python/GafferImageTest/DilateTest.py
index 5ccd861bc9d..2bb6faf3b6e 100644
--- a/python/GafferImageTest/DilateTest.py
+++ b/python/GafferImageTest/DilateTest.py
@@ -90,6 +90,52 @@ def testFilter( self ) :
 
 		self.assertEqual( s.sample( 112, 112 ), 1.0 )
 
+	def testShape( self ):
+		constant1 = GafferImage.Constant( "constant1" )
+		constant1["color"].setValue( imath.Color4f( 1, 1, 1, 1 ) )
+		crop1 = GafferImage.Crop( "crop1" )
+		crop1["in"].setInput( constant1["out"] )
+		crop1["area"].setValue( imath.Box2i( imath.V2i( 3, 7 ), imath.V2i( 4, 8 ) ) )
+		crop1["affectDisplayWindow"].setValue( False )
+
+		constant2 = GafferImage.Constant( "constant2" )
+		constant2["color"].setValue( imath.Color4f( 2, 2, 2, 1 ) )
+		crop2 = GafferImage.Crop( "crop2" )
+		crop2["in"].setInput( constant2["out"] )
+		crop2["area"].setValue( imath.Box2i( imath.V2i( 6, 4 ), imath.V2i( 7, 5 ) ) )
+		crop2["affectDisplayWindow"].setValue( False )
+
+		merge = GafferImage.Merge( "merge" )
+		merge["in"][0].setInput( crop1["out"] )
+		merge["in"][1].setInput( crop2["out"] )
+		merge["operation"].setValue( 8 )
+
+		dilate = GafferImage.Dilate( "dilate" )
+		dilate["in"].setInput( merge["out"] )
+		dilate["expandDataWindow"].setValue( True )
+
+		expectedResultsText = """
+0000000000 0000000000 0000000000
+0000000000 0000000000 0000000000
+0000000000 0000000000 0000222220
+0000000000 0000022200 0000222220
+0000002000 0000022200 0000222220
+0000000000 0000022200 0111222220
+0000000000 0011100000 0111222220
+0001000000 0011100000 0111110000
+0000000000 0011100000 0111110000
+0000000000 0000000000 0111110000
+"""
+		expectedResults = expectedResultsText.splitlines()[1:]
+
+		for r in [ 0, 1, 2 ]:
+			dilate["radius"].setValue( imath.V2i( r ) )
+
+			s = GafferImage.Sampler( dilate['out'], "R", imath.Box2i( imath.V2i( 0 ), imath.V2i( 10 ) ) )
+			for y in range( 10 ):
+				for x in range( 10 ):
+					self.assertEqual( s.sample( x, y ), int( expectedResults[ y ][ x + r * 11 ] ) )
+
 	def testDriverChannel( self ) :
 
 		rRaw = GafferImage.ImageReader()

From b63523abfaf4cea2c5bd3ef495b962929de5ccae Mon Sep 17 00:00:00 2001
From: Daniel Dresser <danield@image-engine.com>
Date: Wed, 17 May 2023 11:32:32 -0700
Subject: [PATCH 05/20] ResampleTest : Add perf tests for inseparable filters

---
 python/GafferImageTest/ResampleTest.py | 40 ++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/python/GafferImageTest/ResampleTest.py b/python/GafferImageTest/ResampleTest.py
index d262f33ca47..0f8f3ce39f0 100644
--- a/python/GafferImageTest/ResampleTest.py
+++ b/python/GafferImageTest/ResampleTest.py
@@ -301,6 +301,46 @@ def testPerfVerySmallFilter( self ) :
 		with GafferTest.TestRunner.PerformanceScope() :
 			GafferImageTest.processTiles( resample["out"] )
 
+	@GafferTest.TestRunner.PerformanceTestMethod( repeat = 1 )
+	def testPerfInseparableLanczos( self ) :
+
+		imageReader = GafferImage.ImageReader()
+		imageReader["fileName"].setValue( self.imagesPath() / 'deepMergeReference.exr' )
+
+		resize = GafferImage.Resize()
+		resize["in"].setInput( imageReader["out"] )
+		resize["format"].setValue( GafferImage.Format( 64, 64 ) )
+
+		resample = GafferImage.Resample()
+		resample["in"].setInput( resize["out"] )
+		resample["filter"].setValue( 'radial-lanczos3' )
+		resample["filterScale"].setValue( imath.V2f( 20.0 ) )
+
+		GafferImageTest.processTiles( resize["out"] )
+
+		with GafferTest.TestRunner.PerformanceScope() :
+			GafferImageTest.processTiles( resample["out"] )
+
+	@GafferTest.TestRunner.PerformanceTestMethod( repeat = 1 )
+	def testPerfInseparableDisk( self ) :
+
+		imageReader = GafferImage.ImageReader()
+		imageReader["fileName"].setValue( self.imagesPath() / 'deepMergeReference.exr' )
+
+		resize = GafferImage.Resize()
+		resize["in"].setInput( imageReader["out"] )
+		resize["format"].setValue( GafferImage.Format( 1920, 1920 ) )
+
+		resample = GafferImage.Resample()
+		resample["in"].setInput( resize["out"] )
+		resample["filter"].setValue( 'disk' )
+		resample["filterScale"].setValue( imath.V2f( 20.0 ) )
+
+		GafferImageTest.processTiles( resize["out"] )
+
+		with GafferTest.TestRunner.PerformanceScope() :
+			GafferImageTest.processTiles( resample["out"] )
+
 
 if __name__ == "__main__":
 	unittest.main()

From 3eb61f708229e9c4f25bb0ac15418a6bbca388c2 Mon Sep 17 00:00:00 2001
From: Daniel Dresser <danield@image-engine.com>
Date: Tue, 16 May 2023 14:53:52 -0700
Subject: [PATCH 06/20] Warp : Add support for "bilinear" filter

This is a very low quality, but fast approach ( it also offer a way of profiling the performance of Sampler::sample( float, float ), which previously wasn't used anywhere critical )
---
 python/GafferImageUI/WarpUI.py |  8 ++++++--
 src/GafferImage/Warp.cpp       | 33 ++++++++++++++++++++++++++-------
 2 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/python/GafferImageUI/WarpUI.py b/python/GafferImageUI/WarpUI.py
index 924890c1d11..8567c225dec 100644
--- a/python/GafferImageUI/WarpUI.py
+++ b/python/GafferImageUI/WarpUI.py
@@ -69,8 +69,10 @@
 			"""
 			The filter used to perform the resampling. The name
 			of any OIIO filter may be specified, but this UI
-			only exposes a limited range of 4 options which perform
-			well for warping, ordered from softest to sharpest.
+			only exposes a limited range of 5 options which perform
+			well for warping, ordered from softest to sharpest. Plus
+			the extra "bilinear" mode which is lower quality, but
+			fast.
 			""",
 
 			"plugValueWidget:type", "GafferUI.PresetsPlugValueWidget",
@@ -80,6 +82,7 @@
 			"preset:Keys", "keys",
 			"preset:Simon", "simon",
 			"preset:Rifman", "rifman",
+			"preset:Bilinear", "bilinear",
 
 		],
 
@@ -94,6 +97,7 @@
 			""",
 
 			"userDefault", False,
+			"layout:activator", lambda plug : plug.node()["filter"].getValue() != "bilinear",
 		],
 
 	}
diff --git a/src/GafferImage/Warp.cpp b/src/GafferImage/Warp.cpp
index 0503a29e34a..82c2feeb9e0 100644
--- a/src/GafferImage/Warp.cpp
+++ b/src/GafferImage/Warp.cpp
@@ -309,7 +309,8 @@ void Warp::hash( const Gaffer::ValuePlug *output, const Gaffer::Context *context
 		h.append( tileOrigin );
 		enginePlug()->hash( h );
 
-		bool useDerivatives = useDerivativesPlug()->getValue();
+		std::string filterName = filterPlug()->getValue();
+		bool useDerivatives = ( filterName != "bilinear" ) && useDerivativesPlug()->getValue();
 		h.append( useDerivatives );
 		if( useDerivatives )
 		{
@@ -331,7 +332,7 @@ void Warp::hash( const Gaffer::ValuePlug *output, const Gaffer::Context *context
 
 		// The sampleRegionsPlug() includes an overall bound for the tile which depends on the filter
 		// support width
-		filterPlug()->hash( h );
+		h.append( filterName );
 	}
 
 	FlatImageProcessor::hash( output, context, h );
@@ -359,8 +360,14 @@ void Warp::compute( Gaffer::ValuePlug *output, const Gaffer::Context *context )
 			dataWindow = outPlug()->dataWindowPlug()->getValue();
 		}
 
-		const OIIO::Filter2D *filter = FilterAlgo::acquireFilter( filterPlug()->getValue() );
-		const float filterWidth = filter->width();
+		std::string filterName = filterPlug()->getValue();
+		const OIIO::Filter2D *filter = nullptr;
+		float filterWidth = 1.0f;
+		if( filterName != "bilinear" )
+		{
+			filter = FilterAlgo::acquireFilter( filterPlug()->getValue() );
+			filterWidth = filter->width();
+		}
 
 		const V2i tileOrigin = context->get<V2i>( ImagePlug::tileOriginContextName );
 
@@ -417,7 +424,7 @@ void Warp::compute( Gaffer::ValuePlug *output, const Gaffer::Context *context )
 		std::vector< V2f > &pixelInputDerivatives = pixelInputDerivativesData->writable();
 		pixelInputDerivatives.reserve( ImagePlug::tileSize() * ImagePlug::tileSize() );
 
-		bool useDerivatives = useDerivativesPlug()->getValue();
+		bool useDerivatives = filter && useDerivativesPlug()->getValue();
 
 		if( useDerivatives )
 		{
@@ -650,7 +657,12 @@ IECore::ConstFloatVectorDataPtr Warp::computeChannelData( const std::string &cha
 	vector<float> &result = resultData->writable();
 	result.reserve( ImagePlug::tileSize() * ImagePlug::tileSize() );
 
-	const OIIO::Filter2D *filter = FilterAlgo::acquireFilter( filterPlug()->getValue() );
+	std::string filterName = filterPlug()->getValue();
+	const OIIO::Filter2D *filter = nullptr;
+	if( filterName != "bilinear" )
+	{
+		filter = FilterAlgo::acquireFilter( filterPlug()->getValue() );
+	}
 
 
 	const Box2i &tileInputBound = sampleRegions->member< Box2iData >( g_tileInputBoundName, true )->readable();
@@ -685,7 +697,14 @@ IECore::ConstFloatVectorDataPtr Warp::computeChannelData( const std::string &cha
 				const V2f &input = pixelInputPositions[i];
 				if( input != Engine::black )
 				{
-					v = FilterAlgo::sampleBox( sampler, input, pixelInputDerivatives[i].x, pixelInputDerivatives[i].y, filter, scratchMemory );
+					if( filter )
+					{
+						v = FilterAlgo::sampleBox( sampler, input, pixelInputDerivatives[i].x, pixelInputDerivatives[i].y, filter, scratchMemory );
+					}
+					else
+					{
+						v = sampler.sample( input.x, input.y );
+					}
 				}
 			}
 			result.push_back( v );

From b660cec5626f148b2909d9c63cc71816dd65d7a4 Mon Sep 17 00:00:00 2001
From: Daniel Dresser <danield@image-engine.com>
Date: Tue, 16 May 2023 14:55:11 -0700
Subject: [PATCH 07/20] VectorWarp : Add perf tests

---
 python/GafferImageTest/VectorWarpTest.py | 59 ++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/python/GafferImageTest/VectorWarpTest.py b/python/GafferImageTest/VectorWarpTest.py
index 5e149b752f8..4abd3c89451 100644
--- a/python/GafferImageTest/VectorWarpTest.py
+++ b/python/GafferImageTest/VectorWarpTest.py
@@ -170,6 +170,65 @@ def testWarpImage( self ):
 
 		self.assertImagesEqual( vectorWarp["out"], expectedReader["out"], maxDifference = 0.0005, ignoreMetadata = True )
 
+	def runPerfTest( self, sourceRes, resultRes, filter, useDerivs ):
+		warpPatternReader = GafferImage.ImageReader()
+		warpPatternReader["fileName"].setValue( self.imagesPath() / "warpPattern.exr" )
+
+		resize = GafferImage.Resize()
+		resize["in"].setInput( warpPatternReader["out"] )
+		resize['format']["displayWindow"]["min"].setValue( imath.V2i( 0 ) )
+		resize['format']["displayWindow"]["max"].setValue( imath.V2i( resultRes ) )
+
+		xRamp = GafferImage.Ramp()
+		xRamp["format"].setValue( GafferImage.Format( resultRes, resultRes ) )
+		xRamp["endPosition"].setValue( imath.V2f( resultRes, 0 ) )
+		xRamp["ramp"]["p1"]["y"].setValue( imath.Color4f( 1, 0, 0, 1 ) )
+		yRamp = GafferImage.Ramp()
+		yRamp["format"].setValue( GafferImage.Format( resultRes, resultRes ) )
+		yRamp["endPosition"].setValue( imath.V2f( 0, resultRes ) )
+		yRamp["ramp"]["p1"]["y"].setValue( imath.Color4f( 0, 1, 0, 1 ) )
+
+		toAbsoluteMerge = GafferImage.Merge()
+		toAbsoluteMerge["operation"].setValue( GafferImage.Merge.Operation.Add )
+		toAbsoluteMerge["in"]["in0"].setInput( xRamp["out"] )
+		toAbsoluteMerge["in"]["in1"].setInput( yRamp["out"] )
+		toAbsoluteMerge["in"]["in2"].setInput( resize["out"] )
+
+		dotGridReader = GafferImage.ImageReader()
+		dotGridReader["fileName"].setValue( self.imagesPath() / "dotGrid.300.exr" )
+
+		sourceResize = GafferImage.Resize()
+		sourceResize["in"].setInput( dotGridReader["out"] )
+		sourceResize['format']["displayWindow"]["min"].setValue( imath.V2i( 0 ) )
+		sourceResize['format']["displayWindow"]["max"].setValue( imath.V2i( sourceRes ) )
+
+		vectorWarp = GafferImage.VectorWarp()
+		vectorWarp["in"].setInput( sourceResize["out"] )
+		vectorWarp["vector"].setInput( toAbsoluteMerge["out"] )
+		vectorWarp["filter"].setValue( filter )
+		vectorWarp["useDerivatives"].setValue( useDerivs )
+
+		GafferImageTest.processTiles( toAbsoluteMerge["out"] )
+		GafferImageTest.processTiles( sourceResize["out"] )
+
+		with GafferTest.TestRunner.PerformanceScope() :
+			GafferImageTest.processTiles( vectorWarp["out"] )
+
+	@GafferTest.TestRunner.PerformanceTestMethod( repeat = 5 )
+	def testDerivsPerf( self ):
+		self.runPerfTest( 300, 2500, "cubic", True )
+
+	@GafferTest.TestRunner.PerformanceTestMethod( repeat = 5 )
+	def testNoDerivsPerf( self ):
+		self.runPerfTest( 300, 2500, "cubic", False )
+
+	@GafferTest.TestRunner.PerformanceTestMethod( repeat = 5 )
+	def testBilinearPerf( self ):
+		self.runPerfTest( 300, 2500, "bilinear", False )
+
+	@GafferTest.TestRunner.PerformanceTestMethod( repeat = 5 )
+	def testDownsamplePerf( self ):
+		self.runPerfTest( 6000, 300, "cubic", True )
 
 if __name__ == "__main__":
 	unittest.main()

From 378da659fc9df0cd1129bfb07cf8968f5fdbac40 Mon Sep 17 00:00:00 2001
From: Daniel Dresser <danield@image-engine.com>
Date: Tue, 20 Jun 2023 14:45:52 -0700
Subject: [PATCH 08/20] ImageTransformTest : Pre-cache input for performance
 tests

---
 python/GafferImageTest/ImageTransformTest.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/python/GafferImageTest/ImageTransformTest.py b/python/GafferImageTest/ImageTransformTest.py
index 69004d72aa2..e6872a97e71 100644
--- a/python/GafferImageTest/ImageTransformTest.py
+++ b/python/GafferImageTest/ImageTransformTest.py
@@ -356,6 +356,8 @@ def testRotationPerformance( self ) :
 		transform["transform"]["pivot"].setValue( imath.V2f( 1500 ) )
 		transform["transform"]["rotate"].setValue( 2.5 )
 
+		GafferImageTest.processTiles( checker["out"] )
+
 		with GafferTest.TestRunner.PerformanceScope() :
 			GafferImageTest.processTiles( transform["out"] )
 
@@ -369,6 +371,8 @@ def testTranslationPerformance( self ) :
 		transform["in"].setInput( checker["out"] )
 		transform["transform"]["translate"].setValue( imath.V2f( 2.2 ) )
 
+		GafferImageTest.processTiles( checker["out"] )
+
 		with GafferTest.TestRunner.PerformanceScope() :
 			GafferImageTest.processTiles( transform["out"] )
 
@@ -382,6 +386,8 @@ def testDownsizingPerformance( self ) :
 		transform["in"].setInput( checker["out"] )
 		transform["transform"]["scale"].setValue( imath.V2f( 0.1 ) )
 
+		GafferImageTest.processTiles( checker["out"] )
+
 		with GafferTest.TestRunner.PerformanceScope() :
 			GafferImageTest.processTiles( transform["out"] )
 
@@ -395,6 +401,8 @@ def testUpsizingPerformance( self ) :
 		transform["in"].setInput( checker["out"] )
 		transform["transform"]["scale"].setValue( imath.V2f( 3 ) )
 
+		GafferImageTest.processTiles( checker["out"] )
+
 		with GafferTest.TestRunner.PerformanceScope() :
 			GafferImageTest.processTiles( transform["out"] )
 
@@ -410,6 +418,8 @@ def testRotationAndScalingPerformance( self ) :
 		transform["transform"]["rotate"].setValue( 2.5 )
 		transform["transform"]["scale"].setValue( imath.V2f( 0.75 ) )
 
+		GafferImageTest.processTiles( checker["out"] )
+
 		with GafferTest.TestRunner.PerformanceScope() :
 			GafferImageTest.processTiles( transform["out"] )
 
@@ -428,6 +438,8 @@ def testConcatenationPerformance1( self ) :
 		transform2["in"].setInput( transform1["out"] )
 		transform2["transform"]["translate"].setValue( imath.V2f( 10 ) )
 
+		GafferImageTest.processTiles( checker["out"] )
+
 		with GafferTest.TestRunner.PerformanceScope() :
 			GafferImageTest.processTiles( transform2["out"] )
 
@@ -447,6 +459,8 @@ def testConcatenationPerformance2( self ) :
 		transform2["in"].setInput( transform1["out"] )
 		transform2["transform"]["translate"].setValue( imath.V2f( 10 ) )
 
+		GafferImageTest.processTiles( checker["out"] )
+
 		with GafferTest.TestRunner.PerformanceScope() :
 			GafferImageTest.processTiles( transform2["out"] )
 

From 74c01926bdf21f71e826f8a5faa4ddd97c02ce77 Mon Sep 17 00:00:00 2001
From: Daniel Dresser <danield@image-engine.com>
Date: Fri, 12 May 2023 18:49:34 -0700
Subject: [PATCH 09/20] ImagePlug : Make fundamental constants public constexpr

Might as well make it easy for people who want to do fast things with images?
---
 include/GafferImage/ImagePlug.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/GafferImage/ImagePlug.h b/include/GafferImage/ImagePlug.h
index 3923bbdae21..8a5862871f8 100644
--- a/include/GafferImage/ImagePlug.h
+++ b/include/GafferImage/ImagePlug.h
@@ -256,8 +256,8 @@ class GAFFERIMAGE_API ImagePlug : public Gaffer::ValuePlug
 		static const IECore::FloatVectorData *blackTile();
 		static const IECore::FloatVectorData *whiteTile();
 
-		static int tileSize() { return 1 << tileSizeLog2(); };
-		static int tilePixels() { return tileSize() * tileSize(); };
+		static constexpr int tileSize() { return 1 << tileSizeLog2(); };
+		static constexpr int tilePixels() { return tileSize() * tileSize(); };
 
 		/// Returns the index of the tile containing a point
 		/// This just means dividing by tile size ( always rounding down )
@@ -286,9 +286,9 @@ class GAFFERIMAGE_API ImagePlug : public Gaffer::ValuePlug
 		};
 		//@}
 
-	private :
+		static constexpr int tileSizeLog2() { return 7; };
 
-		static int tileSizeLog2() { return 7; };
+	private :
 
 		static void compoundObjectToCompoundData( const IECore::CompoundObject *object, IECore::CompoundData *data );
 

From 564c9fb950c6d93a9a9de67d2572426cfec6ad01 Mon Sep 17 00:00:00 2001
From: Daniel Dresser <danield@image-engine.com>
Date: Mon, 15 May 2023 18:42:43 -0700
Subject: [PATCH 10/20] Sampler : Internal optimizations

---
 include/GafferImage/Sampler.h   |  5 +++--
 include/GafferImage/Sampler.inl | 26 +++++++++++---------------
 src/GafferImage/Sampler.cpp     |  6 ++++++
 3 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/include/GafferImage/Sampler.h b/include/GafferImage/Sampler.h
index bdb7bbd03b8..837d3d56718 100644
--- a/include/GafferImage/Sampler.h
+++ b/include/GafferImage/Sampler.h
@@ -106,8 +106,8 @@ class GAFFERIMAGE_API Sampler
 		/// Cached data access
 		/// @param p Any point within the cache that we wish to retrieve the data for.
 		/// @param tileData Is set to the tile's channel data.
-		/// @param tilePixelIndex XY indices that can be used to access the colour value of point 'p' from tileData.
-		void cachedData( Imath::V2i p, const float *& tileData, Imath::V2i &tilePixelIndex );
+		/// @param tilePixelIndex Is set to the index used to access the colour value of point 'p' from tileData.
+		void cachedData( Imath::V2i p, const float *& tileData, int &tilePixelIndex );
 
 		const ImagePlug *m_plug;
 		const std::string m_channelName;
@@ -117,6 +117,7 @@ class GAFFERIMAGE_API Sampler
 		std::vector< IECore::ConstFloatVectorDataPtr > m_dataCache;
 		std::vector< const float * > m_dataCacheRaw;
 		Imath::Box2i m_cacheWindow;
+		int m_cacheOriginIndex;
 		int m_cacheWidth;
 
 		int m_boundingMode;
diff --git a/include/GafferImage/Sampler.inl b/include/GafferImage/Sampler.inl
index dd8bc940d30..f153a7371cd 100644
--- a/include/GafferImage/Sampler.inl
+++ b/include/GafferImage/Sampler.inl
@@ -65,18 +65,14 @@ inline float Sampler::sample( int x, int y )
 		}
 		else
 		{
-			if( BufferAlgo::empty( m_dataWindow ) )
-			{
-				return 0.0f;
-			}
 			p = BufferAlgo::clamp( p, m_dataWindow );
 		}
 	}
 
 	const float *tileData;
-	Imath::V2i tileIndex;
-	cachedData( p, tileData, tileIndex );
-	return *(tileData + tileIndex.y * ImagePlug::tileSize() + tileIndex.x);
+	int tilePixelIndex;
+	cachedData( p, tileData, tilePixelIndex );
+	return *( tileData + tilePixelIndex );
 }
 
 inline float Sampler::sample( float x, float y )
@@ -94,23 +90,23 @@ inline float Sampler::sample( float x, float y )
 	return OIIO::bilerp( x0y0, x1y0, x0y1, x1y1, xf, yf );
 }
 
-inline void Sampler::cachedData( Imath::V2i p, const float *& tileData, Imath::V2i &tilePixelIndex )
+inline void Sampler::cachedData( Imath::V2i p, const float *& tileData, int &tilePixelIndex )
 {
 	// Get the smart pointer to the tile we want.
-	Imath::V2i relP = p - m_cacheWindow.min;
-	Imath::V2i cacheIndex = ImagePlug::tileIndex( relP );
 
-	tilePixelIndex = relP - cacheIndex * ImagePlug::tileSize();
+	constexpr int lowMask = ( 1 << ImagePlug::tileSizeLog2() ) - 1;
+	int cacheIndex = ( p.x >> ImagePlug::tileSizeLog2() ) + m_cacheWidth * ( p.y >> ImagePlug::tileSizeLog2() ) - m_cacheOriginIndex;
+
+	tilePixelIndex = ( p.x & lowMask ) + ( ( p.y & lowMask ) << ImagePlug::tileSizeLog2() );
 
-	int cacheI = cacheIndex.x + cacheIndex.y * m_cacheWidth;
-	const float *&cacheTileRawPtr = m_dataCacheRaw[cacheI];
+	const float *&cacheTileRawPtr = m_dataCacheRaw[cacheIndex];
 
 	if ( cacheTileRawPtr == nullptr )
 	{
 		// Get the origin of the tile we want.
-		Imath::V2i tileOrigin = ( ImagePlug::tileIndex( m_cacheWindow.min ) + cacheIndex ) * ImagePlug::tileSize();
+		Imath::V2i tileOrigin( p.x & ~( ImagePlug::tileSize() - 1 ), p.y & ~( ImagePlug::tileSize() - 1 ) );
 
-		IECore::ConstFloatVectorDataPtr &cacheTilePtr = m_dataCache[ cacheI ];
+		IECore::ConstFloatVectorDataPtr &cacheTilePtr = m_dataCache[ cacheIndex ];
 		cacheTilePtr = m_plug->channelData( m_channelName, tileOrigin );
 		cacheTileRawPtr = &cacheTilePtr->readable()[0];
 	}
diff --git a/src/GafferImage/Sampler.cpp b/src/GafferImage/Sampler.cpp
index f4301711a62..c7efc09ec15 100644
--- a/src/GafferImage/Sampler.cpp
+++ b/src/GafferImage/Sampler.cpp
@@ -73,6 +73,10 @@ Sampler::Sampler( const GafferImage::ImagePlug *plug, const std::string &channel
 		// all bounds checking.
 		m_boundingMode = -1;
 	}
+	else if( BufferAlgo::empty( m_dataWindow ) )
+	{
+		m_boundingMode = Black;
+	}
 
 	// Compute the area we need to cache in order to
 	// be able to service calls within m_sampleWindow
@@ -105,6 +109,8 @@ Sampler::Sampler( const GafferImage::ImagePlug *plug, const std::string &channel
 	int cacheHeight = int( ceil( float( m_cacheWindow.size().y ) / ImagePlug::tileSize() ) );
 	m_dataCache.resize( m_cacheWidth * cacheHeight, nullptr );
 	m_dataCacheRaw.resize( m_cacheWidth * cacheHeight, nullptr );
+
+	m_cacheOriginIndex = ( m_cacheWindow.min.x >> ImagePlug::tileSizeLog2() ) + m_cacheWidth * ( m_cacheWindow.min.y >> ImagePlug::tileSizeLog2() );
 }
 
 void Sampler::hash( IECore::MurmurHash &h ) const

From 57cc62261a2dc188cde968e1ce71f75f7b35fc3a Mon Sep 17 00:00:00 2001
From: Daniel Dresser <danield@image-engine.com>
Date: Mon, 15 May 2023 08:57:58 -0700
Subject: [PATCH 11/20] GafferImage::Sampler : Add visitPixels()

---
 include/GafferImage/Sampler.h   |   8 ++
 include/GafferImage/Sampler.inl | 185 ++++++++++++++++++++++++++++++++
 2 files changed, 193 insertions(+)

diff --git a/include/GafferImage/Sampler.h b/include/GafferImage/Sampler.h
index 837d3d56718..382483b8548 100644
--- a/include/GafferImage/Sampler.h
+++ b/include/GafferImage/Sampler.h
@@ -94,6 +94,14 @@ class GAFFERIMAGE_API Sampler
 		/// 0.5, 0.5.
 		float sample( float x, float y );
 
+		/// Call a functor for all pixels in the region.
+		/// Much faster than calling sample(int,int) repeatedly for every pixel in the
+		/// region, up to 5 times faster in practical cases.
+		/// The signature of the functor must be `F( float value, int x, int y )`.
+		/// Each pixel is visited in order of increasing X, then increasing Y.
+		template<typename F>
+		inline void visitPixels( const Imath::Box2i &region, F &&lambda );
+
 		/// Appends a hash that represent all the pixel
 		/// values within the requested sample area.
 		void hash( IECore::MurmurHash &h ) const;
diff --git a/include/GafferImage/Sampler.inl b/include/GafferImage/Sampler.inl
index f153a7371cd..30adb1a5721 100644
--- a/include/GafferImage/Sampler.inl
+++ b/include/GafferImage/Sampler.inl
@@ -90,6 +90,191 @@ inline float Sampler::sample( float x, float y )
 	return OIIO::bilerp( x0y0, x1y0, x0y1, x1y1, xf, yf );
 }
 
+template<typename F>
+inline void Sampler::visitPixels( const Imath::Box2i &region, F &&visitor )
+{
+	int x = region.min.x;
+	int y = region.min.y;
+
+	if( region.max.x == region.min.x + 1 )
+	{
+		// The general principle of visitPixels is to find contiguous spans in X, precompute the length
+		// of the span, and then process the whole span in a very tight inner loop with minimal branching
+		// where moving to the next pixel takes just an increment. When the region is just
+		// one pixel wide, however, looking for X spans is really not helping, so we have a big special
+		// case to instead look for vertical spans.
+		while( y < region.max.y )
+		{
+			// Our goal is to prepare these 3 variables:
+			// valuePointer or constantValue will hold the data, and count will hold how many consecutive pixels
+			// we can visit without recomputing valuePointer/constantValue.
+			// The default is that valuePointer is null, and constantValue is 0, meaning that visited pixels
+			// will be treated as 0 unless we set one of the values.
+
+			int count;
+			const float *valuePointer = nullptr;
+			float constantValue = 0.0f;
+
+			// If we are clamping, then we will always treat x as if it is within the nearest edge of the data
+			// window.
+			int clampedX = x;
+			if( m_boundingMode == Clamp )
+			{
+				clampedX = std::clamp( x, m_dataWindow.min.x, m_dataWindow.max.x - 1 );
+			}
+
+			if( clampedX < m_dataWindow.min.x || clampedX >= m_dataWindow.max.x )
+			{
+				// If we're outside, and haven't been clamped, then just use the default constantValue of 0.
+				count = region.max.y - y;
+			}
+			else if( y < m_dataWindow.min.y )
+			{
+				// Everything before the dataWindow gets the first value in the data window, or 0 if not clamped
+				if( m_boundingMode == Clamp )
+				{
+					const float *tileData;
+					int tilePixelIndex;
+					cachedData( Imath::V2i( clampedX, m_dataWindow.min.y ), tileData, tilePixelIndex );
+					constantValue = tileData[ tilePixelIndex ];
+				}
+				count = std::min( region.max.y, m_dataWindow.min.y ) - y;
+			}
+			else if( y >= m_dataWindow.max.y )
+			{
+				// Everything after the dataWindow gets the last value in the data window, or 0 if not clamped
+				if( m_boundingMode == Clamp )
+				{
+					const float *tileData;
+					int tilePixelIndex;
+					cachedData( Imath::V2i( clampedX, m_dataWindow.max.y - 1 ), tileData, tilePixelIndex );
+					constantValue = tileData[ tilePixelIndex ];
+				}
+				count = region.max.y - y;
+			}
+			else
+			{
+				// We're actually in the data window ( or clamped to the side of it ). Need to actually
+				// set valuePointer
+				const float *tileData;
+				int tilePixelIndex;
+				cachedData( Imath::V2i( clampedX, y ), tileData, tilePixelIndex );
+				int pixelY = tilePixelIndex >> ImagePlug::tileSizeLog2();
+				count = std::min( std::min( m_dataWindow.max.y, region.max.y ) - y, ImagePlug::tileSize() - pixelY );
+				valuePointer = &tileData[ tilePixelIndex ];
+			}
+
+			// Now we can do the nice tight inner loop where we call visitor repeatedly with valuePointer, or
+			// constantValue
+			if( valuePointer )
+			{
+				for( int i = 0; i < count; i++ )
+				{
+					visitor( valuePointer[i << ImagePlug::tileSizeLog2()], x, y + i );
+				}
+			}
+			else
+			{
+				for( int i = 0; i < count; i++ )
+				{
+					visitor( constantValue, x, y + i );
+				}
+			}
+
+			y += count;
+		}
+		return;
+	}
+
+	// We didn't take the vertical special case above, so we're going to visit horizontal scanlines
+	while( y < region.max.y )
+	{
+		// As above, our goal is to prepare these 3 variables:
+		// valuePointer or constantValue will hold the data, and count will hold how many consecutive pixels
+		// we can visit without recomputing valuePointer/constantValue.
+		// The default is that valuePointer is null, and constantValue is 0, meaning that visited pixels
+		// will be treated as 0 unless we set one of the values.
+		int count;
+		const float *valuePointer = nullptr;
+		float constantValue = 0.0f;
+
+		// If we are clamping, then we will always treat y as if it is within the nearest edge of the data
+		// window ( reading a scanline below the data window yields the same result as reading a scanline
+		// of the bottom edge of the data window )
+		int clampedY = y;
+		if( m_boundingMode == Clamp )
+		{
+			clampedY = std::clamp( y, m_dataWindow.min.y, m_dataWindow.max.y - 1 );
+		}
+
+		if( clampedY < m_dataWindow.min.y ||  clampedY >= m_dataWindow.max.y )
+		{
+			// If we're outside, and haven't been clamped, then just use the default constantValue of 0.
+			count = region.max.x - x;
+		}
+		else if( x < m_dataWindow.min.x )
+		{
+			// Everything left of the dataWindow gets the first value in the data window on this row, or 0 if not clamped
+			if( m_boundingMode == Clamp )
+			{
+				const float *tileData;
+				int tilePixelIndex;
+				cachedData( Imath::V2i( m_dataWindow.min.x, clampedY ), tileData, tilePixelIndex );
+				constantValue = tileData[ tilePixelIndex ];
+			}
+			count = std::min( region.max.x, m_dataWindow.min.x ) - x;
+		}
+		else if( x >= m_dataWindow.max.x )
+		{
+			// Everything right of the dataWindow gets the last value in the data window on this row, or 0 if not clamped
+			if( m_boundingMode == Clamp )
+			{
+				const float *tileData;
+				int tilePixelIndex;
+				cachedData( Imath::V2i( m_dataWindow.max.x - 1, clampedY ), tileData, tilePixelIndex );
+				constantValue = tileData[ tilePixelIndex ];
+			}
+			count = region.max.x - x;
+		}
+		else
+		{
+			// We're actually in the data window ( or clamped to the top or bottom of it ). Need to actually
+			// set valuePointer
+			const float *tileData;
+			int tilePixelIndex;
+			cachedData( Imath::V2i( x, clampedY ), tileData, tilePixelIndex );
+			int tileX = tilePixelIndex & ( ImagePlug::tileSize() - 1 );
+			count = std::min( std::min( m_dataWindow.max.x, region.max.x ) - x, ImagePlug::tileSize() - tileX );
+			valuePointer = &tileData[ tilePixelIndex ];
+		}
+
+		// Now we can do the nice tight inner loop where we call visitor repeatedly with valuePointer, or
+		// constantValue
+		if( valuePointer )
+		{
+			for( int i = 0; i < count; i++ )
+			{
+				visitor( valuePointer[i], x + i, y );
+			}
+		}
+		else
+		{
+			for( int i = 0; i < count; i++ )
+			{
+				visitor( constantValue, x + i, y );
+			}
+		}
+
+		// Advance in the scanline, and advance to the next scanline if we've finished
+		x += count;
+		if( x == region.max.x )
+		{
+			y++;
+			x = region.min.x;
+		}
+	}
+}
+
 inline void Sampler::cachedData( Imath::V2i p, const float *& tileData, int &tilePixelIndex )
 {
 	// Get the smart pointer to the tile we want.

From 22e069f06bf41ae4ea3d36a2f7d2b84c321b1fe3 Mon Sep 17 00:00:00 2001
From: Daniel Dresser <danield@image-engine.com>
Date: Mon, 15 May 2023 16:03:14 -0700
Subject: [PATCH 12/20] GafferImageTest : Add validateVisitPixels

---
 SConstruct                                    |  4 +-
 .../GafferImageTestModule.cpp                 | 40 +++++++++++++++++++
 2 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/SConstruct b/SConstruct
index a4aec95216e..3c3cb4b4cac 100644
--- a/SConstruct
+++ b/SConstruct
@@ -1078,10 +1078,10 @@ libraries = {
 
 	"GafferImageTest" : {
 		"envAppends" : {
-			"LIBS" : [ "Gaffer", "GafferImage", "OpenImageIO$OIIO_LIB_SUFFIX",  ],
+			"LIBS" : [ "Gaffer", "GafferImage", "OpenImageIO$OIIO_LIB_SUFFIX" ],
 		},
 		"pythonEnvAppends" : {
-			"LIBS" : [ "GafferImage", "GafferImageTest" ],
+			"LIBS" : [ "GafferImage", "GafferImageTest", "fmt" ],
 		},
 		"additionalFiles" :
 			glob.glob( "python/GafferImageTest/scripts/*" ) + glob.glob( "python/GafferImageTest/images/*" ) +
diff --git a/src/GafferImageTestModule/GafferImageTestModule.cpp b/src/GafferImageTestModule/GafferImageTestModule.cpp
index 2eace90ef3a..5bdd81d7acb 100644
--- a/src/GafferImageTestModule/GafferImageTestModule.cpp
+++ b/src/GafferImageTestModule/GafferImageTestModule.cpp
@@ -35,6 +35,7 @@
 //////////////////////////////////////////////////////////////////////////
 
 #include "boost/python.hpp"
+#include "fmt/format.h"
 
 #include "GafferImageTest/ContextSanitiser.h"
 
@@ -43,6 +44,7 @@
 #include "GafferImage/ImageAlgo.h"
 #include "GafferImage/ImagePlug.h"
 #include "GafferImage/Format.h"
+#include "GafferImage/Sampler.h"
 
 #include "Gaffer/Node.h"
 
@@ -118,6 +120,43 @@ void testEditableScopeForFormat()
 	);
 }
 
+void validateVisitPixels( GafferImage::Sampler &sampler, const Imath::Box2i &region )
+{
+	int i = 0;
+	int sizeX = region.size().x;
+	sampler.visitPixels( region,
+		[&region, &sampler, &i, sizeX ] ( float value, int x, int y )
+		{
+			int expectedX = ( i % sizeX ) + region.min.x;
+			int expectedY = ( i / sizeX ) + region.min.y;
+			if( x != expectedX || y != expectedY )
+			{
+				throw IECore::Exception( fmt::format (
+					"visitPixels passed incorrect coordinate - expected {},{}, received {},{}",
+					expectedX, expectedY, x, y
+				) );
+			}
+
+			float expectedValue = sampler.sample( x, y );
+			if( value != expectedValue )
+			{
+				throw IECore::Exception( fmt::format(
+					"visitPixels passed incorrect value for pixel {},{} - expected {} received {}",
+					x, y, expectedValue, value
+				) );
+			}
+			i++;
+		}
+	);
+	if( i != region.size().x * region.size().y )
+	{
+		throw IECore::Exception( fmt::format(
+			"visitPixels processed wrong number of pixels: visited {} in region of size {},{}",
+			i, region.size().x, region.size().y
+		) );
+	}
+}
+
 } // namespace
 
 BOOST_PYTHON_MODULE( _GafferImageTest )
@@ -129,4 +168,5 @@ BOOST_PYTHON_MODULE( _GafferImageTest )
 	def( "processTiles", &processTilesWrapper );
 	def( "connectProcessTilesToPlugDirtiedSignal", &connectProcessTilesToPlugDirtiedSignal );
 	def( "testEditableScopeForFormat", &testEditableScopeForFormat );
+	def( "validateVisitPixels", &validateVisitPixels );
 }

From e0c223a0d7119122d377380ac215107256e4538a Mon Sep 17 00:00:00 2001
From: Daniel Dresser <danield@image-engine.com>
Date: Mon, 15 May 2023 16:03:37 -0700
Subject: [PATCH 13/20] Sampler : Test visitPixels

---
 python/GafferImageTest/SamplerTest.py | 66 ++++++++++++++++++++++++++-
 1 file changed, 64 insertions(+), 2 deletions(-)

diff --git a/python/GafferImageTest/SamplerTest.py b/python/GafferImageTest/SamplerTest.py
index 8106ee6378c..2be9528ec89 100644
--- a/python/GafferImageTest/SamplerTest.py
+++ b/python/GafferImageTest/SamplerTest.py
@@ -53,7 +53,7 @@ def testOutOfBoundsSampleModeBlack( self ) :
 		c = GafferImage.Constant()
 		c["color"].setValue( imath.Color4f( 1 ) )
 
-		dw = c["out"]["dataWindow"].getValue();
+		dw = c["out"]["dataWindow"].getValue()
 		s = GafferImage.Sampler( c["out"], "R", dw, GafferImage.Sampler.BoundingMode.Black )
 
 		# Check integer sampling.
@@ -97,7 +97,7 @@ def testOutOfBoundsSampleModeClamp( self ) :
 		r = GafferImage.ImageReader()
 		r["fileName"].setValue( self.fileName )
 
-		dw = r["out"]["dataWindow"].getValue();
+		dw = r["out"]["dataWindow"].getValue()
 		s = GafferImage.Sampler( r["out"], "R", dw, GafferImage.Sampler.BoundingMode.Clamp )
 
 		# Get the exact values of the corner pixels.
@@ -220,5 +220,67 @@ def testExceptionOnDeepData( self ) :
 		with self.assertRaises( RuntimeError ) :
 			sampler = GafferImage.Sampler( merge["out"], "R", imath.Box2i( imath.V2i( 0 ), imath.V2i( 200 ) ), boundingMode = GafferImage.Sampler.BoundingMode.Black )
 
+	def testVisitPixels( self ):
+
+		ts = GafferImage.ImagePlug.tileSize()
+		ramp = GafferImage.Ramp()
+		ramp["startPosition"].setValue( imath.V2f( 0.5 ) )
+
+		offset = GafferImage.Offset()
+		offset["in"].setInput( ramp["out"] )
+
+		# Test for a bunch of different images sizes
+		for width in [ 1, 6, ts + 6, 2 * ts + 6 ]:
+			for height in [ 1, 9, ts + 9, 2 * ts + 9 ]:
+
+				# Set up a ramp where each pixel gets a unique value based on its index
+				ramp["format"].setValue( GafferImage.Format( width, height, 1.000 ) )
+				rampScale = ( width * height - 1 ) / ( width * width + 1 )
+				ramp["endPosition"].setValue( imath.V2f( 0.5 + 1 * rampScale, 0.5 + width * rampScale ) )
+				ramp['ramp'].setValue( Gaffer.SplineDefinitionfColor4f( ( ( 0, imath.Color4f( 0 ) ), ( 1, imath.Color4f( width * height - 1 ) )), Gaffer.SplineDefinitionInterpolation.Linear ) )
+
+				center = imath.V2i( width // 2, height // 2 )
+
+				# Test for a bunch of different offsets, so we're hitting different ways the datawindow
+				# may be aligned to a tile boundary
+				for dataAlignment in [ imath.V2i(0), -center, imath.V2i( ts // 2 ) - center  ]:
+					offset["offset"].setValue( dataAlignment )
+					dataWindow = offset["out"].dataWindow()
+
+					for bm in [ GafferImage.Sampler.BoundingMode.Black, GafferImage.Sampler.BoundingMode.Clamp ]:
+
+						# Make one big sampler that can handle all our queries. Making this larger than the
+						# data window doesn't actually have an impact on the current implementation, but it
+						# means that our queries outside the datawindow are officially legal.
+						oversizedSampleWindow = imath.Box2i( dataWindow.min() - ts * 3, dataWindow.max() + ts * 3 )
+
+						sampler = GafferImage.Sampler( offset["out"], "R", oversizedSampleWindow, boundingMode = bm )
+
+						# Test possible ways for the sampling region to line up with the data window: corners,
+						# edges, center, or completely outside
+						for regionAlignment in [ imath.V2i( regionAlignmentX, regionAlignmentY )
+							for regionAlignmentX in [ 0, width // 2, width - 1 ]
+							for regionAlignmentY in [ 0, height // 2, height - 1 ]
+						] + [ imath.V2i( -ts ), dataWindow.max() + imath.V2i( ts ) ]:
+
+								for regionSize in [
+										imath.V2i( 1, 1 ),
+										imath.V2i( 5, 7 ),
+										imath.V2i( 7, 5 ),
+										imath.V2i( ts + 7, 1 ),
+										imath.V2i( 1, ts + 7 ),
+										imath.V2i( 2 * ts + 7, 1 ),
+										imath.V2i( 1, 2 * ts + 7 ),
+										imath.V2i( 2 * ts + 7, 2 ),
+										imath.V2i( 2, 2 * ts + 7 ),
+										imath.V2i( 2 * ts + 5, 2 * ts + 7 ),
+									]:
+
+									regionMin = dataAlignment + regionAlignment - regionSize / 2
+									region = imath.Box2i( regionMin, regionMin + regionSize )
+
+									with self.subTest( dataWindow = dataWindow, region = region ):
+										GafferImageTest.validateVisitPixels( sampler, region )
+
 if __name__ == "__main__":
 	unittest.main()

From c53eed3c47f616819f7be5d14a957f7d638e2522 Mon Sep 17 00:00:00 2001
From: Daniel Dresser <danield@image-engine.com>
Date: Fri, 12 May 2023 17:35:31 -0700
Subject: [PATCH 14/20] GafferImage::Resample : Optimize using visitPixels

---
 src/GafferImage/Resample.cpp | 39 ++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/src/GafferImage/Resample.cpp b/src/GafferImage/Resample.cpp
index 0cc398f2cea..2d82333db42 100644
--- a/src/GafferImage/Resample.cpp
+++ b/src/GafferImage/Resample.cpp
@@ -639,20 +639,20 @@ IECore::ConstFloatVectorDataPtr Resample::computeChannelData( const std::string
 				iX = ( oP.x + 0.5 ) / ratio.x + offset.x;
 				OIIO::floorfrac( iX, &iXI );
 
-				int fX; // relative filter position
 				float v = 0.0f;
 				float totalW = 0.0f;
-				for( fX = -filterRadius.x; fX<= filterRadius.x; ++fX )
-				{
-					const float w = *wIt++;
-					if( w == 0.0f )
+
+				sampler.visitPixels( Imath::Box2i(
+						Imath::V2i( iXI - filterRadius.x, oP.y ),
+						Imath::V2i( iXI + filterRadius.x + 1, oP.y + 1 )
+					),
+					[&wIt, &v, &totalW]( float cur, int x, int y )
 					{
-						continue;
+						const float w = *wIt++;
+						v += w * cur;
+						totalW += w;
 					}
-
-					v += w * sampler.sample( iXI + fX, oP.y );
-					totalW += w;
-				}
+				);
 
 				if( totalW != 0.0f )
 				{
@@ -683,21 +683,20 @@ IECore::ConstFloatVectorDataPtr Resample::computeChannelData( const std::string
 
 			for( oP.x = tileBound.min.x; oP.x < tileBound.max.x; ++oP.x )
 			{
-				int fY; // relative filter position
 				float v = 0.0f;
 				float totalW = 0.0f;
 				std::vector<float>::const_iterator wIt = weights.begin() + ( oP.y - tileBound.min.y ) * ( filterRadius.y * 2 + 1);
-				for( fY = -filterRadius.y; fY<= filterRadius.y; ++fY )
-				{
-					const float w = *wIt++;
-					if( w == 0.0f )
+				sampler.visitPixels( Imath::Box2i(
+						Imath::V2i( oP.x, iYI - filterRadius.y ),
+						Imath::V2i( oP.x + 1, iYI + filterRadius.y + 1 )
+					),
+					[&wIt, &v, &totalW]( float cur, int x, int y )
 					{
-						continue;
+						const float w = *wIt++;
+						v += w * cur;
+						totalW += w;
 					}
-
-					v += w * sampler.sample( oP.x, iYI + fY );
-					totalW += w;
-				}
+				);
 
 				if( totalW != 0.0f )
 				{

From d4bbbaab1c7f2be8f9631d4e271d1f948f121c4a Mon Sep 17 00:00:00 2001
From: Daniel Dresser <danield@image-engine.com>
Date: Fri, 16 Jun 2023 19:16:16 -0700
Subject: [PATCH 15/20] SConstruct : Hack around compiler perfomance bug

---
 SConstruct | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/SConstruct b/SConstruct
index 3c3cb4b4cac..27e834d8e38 100644
--- a/SConstruct
+++ b/SConstruct
@@ -491,6 +491,14 @@ if env["PLATFORM"] != "win32" :
 		# Turn off the parts of `-Wextra` that we don't like.
 		env.Append( CXXFLAGS = [ "-Wno-cast-function-type", "-Wno-unused-parameter" ] )
 
+		# Set this weird compiler flag that in general is expected to cause compiled code to be about
+		# half a percent slower, but works around this ridiculous bug:
+		# https://gcc.gnu.org/bugzilla//show_bug.cgi?id=51041
+		# It's a 10 year old bug that sometimes causes important inner loops to get 4X slower for no
+		# reason ( it affects use of visitPixels depending on exact register usage patterns at the call
+		# site ), and there appears to be no real solution ... maybe we should be moving away from GCC?
+		env.Append( CXXFLAGS = [ "-fira-region=all" ] )
+
 		env.Append(
 			CXXFLAGS = [ "-pthread" ],
 			SHLINKFLAGS = [ "-pthread", "-Wl,--no-undefined" ],

From 81b06b295ce70d69b2827c97fcf4fce4fb65884e Mon Sep 17 00:00:00 2001
From: Daniel Dresser <danield@image-engine.com>
Date: Wed, 17 May 2023 11:41:08 -0700
Subject: [PATCH 16/20] GafferImage::Resample : Optimize inseparable filters

---
 src/GafferImage/Resample.cpp | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/src/GafferImage/Resample.cpp b/src/GafferImage/Resample.cpp
index 2d82333db42..ae114da3471 100644
--- a/src/GafferImage/Resample.cpp
+++ b/src/GafferImage/Resample.cpp
@@ -580,27 +580,24 @@ IECore::ConstFloatVectorDataPtr Resample::computeChannelData( const std::string
 				// seperable case.  Once that is done, we should probably also hoist the multiply by
 				// filterCoordinateMult out of the loop.
 
-				V2i fP; // relative filter position
 				float v = 0.0f;
 				float totalW = 0.0f;
-				for( fP.y = -filterRadius.y; fP.y<= filterRadius.y; ++fP.y )
-				{
-					for( fP.x = -filterRadius.x; fP.x<= filterRadius.x; ++fP.x )
+				V2f filterOrigin = filterCoordinateMult.x * ( iPF - V2f( 0.5f ) );
+				sampler.visitPixels( Imath::Box2i(
+						iPI - filterRadius,
+						iPI + filterRadius + Imath::V2i( 1 )
+					),
+					[&filter, &filterOrigin, &filterCoordinateMult, &iPI, &v, &totalW]( float cur, int x, int y )
 					{
 						const float w = (*filter)(
-							filterCoordinateMult.x * (fP.x - ( iPF.x - 0.5f )),
-							filterCoordinateMult.y * (fP.y - ( iPF.y - 0.5f ))
+							filterOrigin.x + filterCoordinateMult.x * ( x - iPI.x ),
+							filterOrigin.y + filterCoordinateMult.y * ( y - iPI.y )
 						);
 
-						if( w == 0.0f )
-						{
-							continue;
-						}
-
-						v += w * sampler.sample( iPI.x + fP.x, iPI.y + fP.y );
+						v += w * cur;
 						totalW += w;
 					}
-				}
+				);
 
 				if( totalW != 0.0f )
 				{

From 52d68f9854368fa37d0d001783b6e1e351f38969 Mon Sep 17 00:00:00 2001
From: Daniel Dresser <danield@image-engine.com>
Date: Fri, 12 May 2023 18:44:34 -0700
Subject: [PATCH 17/20] RankFilter : Optimize using visitPixels

---
 src/GafferImage/RankFilter.cpp | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/GafferImage/RankFilter.cpp b/src/GafferImage/RankFilter.cpp
index 72b97544fd1..95aab22f643 100644
--- a/src/GafferImage/RankFilter.cpp
+++ b/src/GafferImage/RankFilter.cpp
@@ -237,15 +237,14 @@ void RankFilter::compute( Gaffer::ValuePlug *output, const Gaffer::Context *cont
 				IECore::Canceller::check( context->canceller() );
 
 				// Fill array with all nearby samples
-				V2i o;
 				vector<float>::iterator pixelsIt = pixels.begin();
-				for( o.y = -radius.y; o.y <= radius.y; ++o.y )
-				{
-					for( o.x = -radius.x; o.x <= radius.x; ++o.x )
+				sampler.visitPixels(
+					Imath::Box2i( p - radius, p + radius + Imath::V2i( 1 ) ),
+					[&pixelsIt] ( float v, int x, int y )
 					{
-						*pixelsIt++ = sampler.sample( p.x + o.x, p.y + o.y );
+						*pixelsIt++ = v;
 					}
-				}
+				);
 
 				switch( m_mode )
 				{
@@ -269,6 +268,7 @@ void RankFilter::compute( Gaffer::ValuePlug *output, const Gaffer::Context *cont
 
 				int closestMatch = INT_MAX;
 				pixelsIt = pixels.begin();
+				V2i o;
 				for( o.y = -radius.y; o.y <= radius.y; ++o.y )
 				{
 					for( o.x = -radius.x; o.x <= radius.x; ++o.x )
@@ -408,15 +408,14 @@ IECore::ConstFloatVectorDataPtr RankFilter::computeChannelData( const std::strin
 		{
 			IECore::Canceller::check( context->canceller() );
 
-			V2i o;
 			vector<float>::iterator pixelsIt = pixels.begin();
-			for( o.y = -radius.y; o.y <= radius.y; ++o.y )
-			{
-				for( o.x = -radius.x; o.x <= radius.x; ++o.x )
+			sampler.visitPixels(
+				Imath::Box2i( p - radius, p + radius + Imath::V2i( 1 ) ),
+				[&pixelsIt] ( float v, int x, int y )
 				{
-					*pixelsIt++ = sampler.sample( p.x + o.x, p.y + o.y );
+					*pixelsIt++ = v;
 				}
-			}
+			);
 			switch( m_mode )
 			{
 				case MedianRank:

From 885c075c6f281151ceb07a6ca1647ad27943f15e Mon Sep 17 00:00:00 2001
From: Daniel Dresser <danield@image-engine.com>
Date: Tue, 16 May 2023 16:26:59 -0700
Subject: [PATCH 18/20] Sampler : Optimize common case where we are within data
 tile

Seems to give a 15% performance improvement in common cases
---
 include/GafferImage/Sampler.inl | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/include/GafferImage/Sampler.inl b/include/GafferImage/Sampler.inl
index 30adb1a5721..f564163b305 100644
--- a/include/GafferImage/Sampler.inl
+++ b/include/GafferImage/Sampler.inl
@@ -82,6 +82,30 @@ inline float Sampler::sample( float x, float y )
 	int yi;
 	float yf = OIIO::floorfrac( y - 0.5, &yi );
 
+	constexpr int tileLowMask = ImagePlug::tileSize() - 1;
+	if(
+		( xi & tileLowMask ) != tileLowMask &&
+		( yi & tileLowMask ) != tileLowMask &&
+		xi >= m_dataWindow.min.x && xi < m_dataWindow.max.x - 1 &&
+		yi >= m_dataWindow.min.y && yi < m_dataWindow.max.y - 1
+	)
+	{
+		const float *tileData;
+		int tilePixelIndex;
+		cachedData( Imath::V2i( xi, yi ), tileData, tilePixelIndex );
+		return OIIO::bilerp(
+			tileData[tilePixelIndex], tileData[tilePixelIndex + 1],
+			tileData[tilePixelIndex + ImagePlug::tileSize()], tileData[tilePixelIndex + ImagePlug::tileSize() + 1],
+			xf, yf
+		);
+	}
+	// Note that if we care about performance in the case of accessing outside the data window, we
+	// could add more special cases here. If boundingMode isn't clamped, this is trivial: check
+	// if fully outside bound, return 0.0 without even computing xf and yf. Clamped is trickier -
+	// above or below you need just xf, to the left or right you just need yf, and in the corners
+	// you need neither
+
+
 	float x0y0 = sample( xi, yi );
 	float x1y0 = sample( xi + 1, yi );
 	float x0y1 = sample( xi, yi + 1 );

From 44f314c962a2c6aab1e8e0c75d785e24d91ede7d Mon Sep 17 00:00:00 2001
From: Daniel Dresser <danield@image-engine.com>
Date: Tue, 16 May 2023 18:30:44 -0700
Subject: [PATCH 19/20] FilterAlgo::sampleBox : Optimize using visitPixels

This is a 70% performance improvement when using a VectorWarp with useDerivatives on to downsample an image.
---
 src/GafferImage/FilterAlgo.cpp | 37 +++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/src/GafferImage/FilterAlgo.cpp b/src/GafferImage/FilterAlgo.cpp
index 0602e8f23e8..33be4318151 100644
--- a/src/GafferImage/FilterAlgo.cpp
+++ b/src/GafferImage/FilterAlgo.cpp
@@ -408,35 +408,40 @@ float GafferImage::FilterAlgo::sampleBox( Sampler &sampler, const V2f &p, float
 			scratchMemory[i] = filter->xfilt( ( (pixelBounds.min.x + i) + 0.5f - p.x ) * xscale );
 		}
 
+		Imath::Box2i visitBounds = pixelBounds;
 		for( int y = pixelBounds.min.y; y < pixelBounds.max.y; y++ )
 		{
+			visitBounds.min.y = y;
+			visitBounds.max.y = y + 1;
 			float yFilterWeight = filter->yfilt( ( y + 0.5f - p.y ) * yscale );
-			for( int x = pixelBounds.min.x; x < pixelBounds.max.x; x++ )
-			{
-				float w = scratchMemory[ x - pixelBounds.min.x ] * yFilterWeight;
+			sampler.visitPixels(
+				visitBounds,
+				[ &totalW, &v, &p, &pixelBounds, &scratchMemory, &yFilterWeight ] ( float value, int x, int y )
+				{
+					float w = scratchMemory[ x - pixelBounds.min.x ] * yFilterWeight;
 
-				// \todo : I can't think of any way to keep this around cleanly for testing, since
-				// it's right down in this inner loop, but replacing the filter with one value for
-				// pixels within the bounding box, and another value for pixels actually touched
-				// by the filter, is a good way to check that the bounding box is correct
-				//w = w != 0.0f ? 1.0f : 0.1f;
+					// \todo : I can't think of any way to keep this around cleanly for testing, since
+					// it's right down in this inner loop, but replacing the filter with one value for
+					// pixels within the bounding box, and another value for pixels actually touched
+					// by the filter, is a good way to check that the bounding box is correct
+					//w = w != 0.0f ? 1.0f : 0.1f;
 
-				totalW += w;
-				v += w * sampler.sample( x, y );
-			}
+					totalW += w;
+					v += w * value;
+				}
+			);
 		}
 	}
 	else
 	{
-		for( int y = pixelBounds.min.y; y < pixelBounds.max.y; y++ )
-		{
-			for( int x = pixelBounds.min.x; x < pixelBounds.max.x; x++ )
+		sampler.visitPixels( pixelBounds,
+			[ &totalW, &v, &p, &xscale, &yscale, &filter ] ( float value, int x, int y )
 			{
 				float w = (*filter)( ( x + 0.5f - p.x ) * xscale, ( y + 0.5f - p.y ) * yscale );
 				totalW += w;
-				v += w * sampler.sample( x, y );
+				v += w * value;
 			}
-		}
+		);
 	}
 
 	if( totalW != 0.0f )

From 8eec8bdff4f95bbd8ab5023f89a3a374b5a2ec42 Mon Sep 17 00:00:00 2001
From: Daniel Dresser <danield@image-engine.com>
Date: Mon, 19 Jun 2023 21:39:04 -0700
Subject: [PATCH 20/20] Prototype some scripts for graphing performance results
 in contrib

---
 contrib/scripts/graphPerformanceResults.py | 81 ++++++++++++++++++++++
 contrib/scripts/runTestsForCommits.py      | 46 ++++++++++++
 2 files changed, 127 insertions(+)
 create mode 100755 contrib/scripts/graphPerformanceResults.py
 create mode 100755 contrib/scripts/runTestsForCommits.py

diff --git a/contrib/scripts/graphPerformanceResults.py b/contrib/scripts/graphPerformanceResults.py
new file mode 100755
index 00000000000..a9ac5a5df4f
--- /dev/null
+++ b/contrib/scripts/graphPerformanceResults.py
@@ -0,0 +1,81 @@
+#! /usr/bin/env python
+
+import argparse
+import inspect
+import subprocess
+import os
+import json
+import matplotlib.pyplot as plt
+import matplotlib.ticker
+
+parser = argparse.ArgumentParser(
+	description = inspect.cleandoc(
+	"""
+	Graph the performance results for a folder of json files.
+	""" )
+)
+
+parser.add_argument(
+	'jsonFolder',
+	help='Folder containing json files'
+)
+
+args = parser.parse_args()
+
+jsonFolder = vars( args )["jsonFolder"]
+
+jsonFileNames = [ i for i in os.listdir( jsonFolder ) if i.endswith( ".json" ) ]
+
+results = []
+for n in jsonFileNames:
+	f = open( "%s/%s" % ( jsonFolder, n ) )
+	d = json.load( f )
+	f.close()
+
+	r = {}
+	for key, value in d.items():
+		if "timings" in value:
+			r[ key.split()[1].split('.')[-1][:-1] + "." + key.split()[0]] = ( min( value["timings"] ), max( value["timings"] ) )
+
+	resultName = n[:-5]
+	resultOrder = resultName
+
+	try:
+		resultOrder = int( subprocess.check_output( [ "git", "rev-list", "--count", resultName ] ) )
+		resultName = subprocess.check_output( [ "git", "log", "--format=%B", "-n 1", resultName ] ).splitlines()[0].decode( "UTF-8" )
+	except:
+		pass
+
+	results.append( [resultOrder, resultName, r] )
+
+results.sort()
+results = [r[1:] for r in results ]
+
+ax = plt.subplot(111)
+
+curveNames = results[0][1].keys()
+curveNames = sorted( curveNames, key = lambda k : -results[-1][1][k][0] )
+for k in curveNames:
+	ax.plot( [ j[1][k][0] for j in results ], label = k )
+	# Use this line instead if you want to see variability
+	#ax.fill_between( range( len( results ) ), [ j[1][k][0] for j in results ], [ j[1][k][1] for j in results ], label = k )
+
+plt.yscale( "log", base = 2 )
+
+# Force labelling of every power of two
+plt.yticks( [2**i for i in range( -4, 4 )] )
+
+ax.xaxis.set_major_formatter( matplotlib.ticker.FuncFormatter( lambda i, pos : results[int(i)][0] ) )
+plt.xticks( rotation = 45, ha = "right" )
+
+box = ax.get_position()
+
+# Create margin
+ax.set_position([box.x0, box.y0 + box.height * 0.2, box.width * 0.8, box.height * 0.8])
+
+# Put a legend to the right of the current axis
+ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
+
+# Currently hacking things up to work with very old matplotlib
+#plt.show_all()
+plt.show()
diff --git a/contrib/scripts/runTestsForCommits.py b/contrib/scripts/runTestsForCommits.py
new file mode 100755
index 00000000000..b435e58f8e0
--- /dev/null
+++ b/contrib/scripts/runTestsForCommits.py
@@ -0,0 +1,46 @@
+#! /usr/bin/env python
+
+import argparse
+import inspect
+import subprocess
+import os
+
+parser = argparse.ArgumentParser(
+	description = inspect.cleandoc(
+	"""
+	Run a selection of Gaffer tests for a specified set of commits,
+	outputting a separate json file for the tests for each commit.
+	""" )
+)
+
+parser.add_argument(
+	'--commits', action='store', nargs='+',
+	help='Hashes of commits to build'
+)
+
+parser.add_argument(
+	'--tests', action='store', nargs='+',
+	help='Names of tests to run'
+)
+
+parser.add_argument(
+	'--outputFolder', action='store', required = True,
+	help='Folder to put output json files to'
+)
+
+args = parser.parse_args()
+
+outputFolder = vars( args )["outputFolder"]
+try:
+	os.makedirs( outputFolder )
+except:
+	pass
+
+currentBranch = subprocess.check_output( [ "git", "stat", "-s", "-b" ] ).splitlines()[0].split()[1]
+print( currentBranch )
+for c in vars( args )["commits"]:
+	subprocess.check_call( [ "git", "checkout", c ] )
+	subprocess.check_call( [ "scons", "-j 16", "build" ] )
+	subprocess.check_call( [ "gaffer", "test" ] + vars( args )["tests"] + [ "-outputFile", "%s/%s.json" % ( outputFolder, c ) ] )
+
+subprocess.check_call( [ "git", "checkout", currentBranch ] )