From 94749860c834dab896dbe4907cc6ecd28840bb20 Mon Sep 17 00:00:00 2001 From: Daniel Dresser Date: Mon, 9 Jan 2023 15:47:51 -0800 Subject: [PATCH 01/20] RankFilter : Add perf tests --- python/GafferImageTest/DilateTest.py | 15 +++++++++++++++ python/GafferImageTest/ErodeTest.py | 15 +++++++++++++++ python/GafferImageTest/MedianTest.py | 15 +++++++++++++++ 3 files changed, 45 insertions(+) diff --git a/python/GafferImageTest/DilateTest.py b/python/GafferImageTest/DilateTest.py index 0eca5f95580..5ccd861bc9d 100644 --- a/python/GafferImageTest/DilateTest.py +++ b/python/GafferImageTest/DilateTest.py @@ -138,5 +138,20 @@ def testDriverChannel( self ) : # a master self.assertImagesEqual( masterDilateSingleChannel["out"], defaultDilateSingleChannel["out"] ) + @GafferTest.TestRunner.PerformanceTestMethod( repeat = 1 ) + def testPerf( self ) : + + imageReader = GafferImage.ImageReader() + imageReader["fileName"].setValue( self.imagesPath() / 'deepMergeReference.exr' ) + + GafferImageTest.processTiles( imageReader["out"] ) + + dilate = GafferImage.Dilate() + dilate["in"].setInput( imageReader["out"] ) + dilate["radius"].setValue( imath.V2i( 128 ) ) + + with GafferTest.TestRunner.PerformanceScope() : + GafferImageTest.processTiles( dilate["out"] ) + if __name__ == "__main__": unittest.main() diff --git a/python/GafferImageTest/ErodeTest.py b/python/GafferImageTest/ErodeTest.py index 32faaebd933..ed8b7170b88 100644 --- a/python/GafferImageTest/ErodeTest.py +++ b/python/GafferImageTest/ErodeTest.py @@ -140,5 +140,20 @@ def testDriverChannel( self ) : # a master self.assertImagesEqual( masterErodeSingleChannel["out"], defaultErodeSingleChannel["out"] ) + @GafferTest.TestRunner.PerformanceTestMethod( repeat = 1 ) + def testPerf( self ) : + + imageReader = GafferImage.ImageReader() + imageReader["fileName"].setValue( self.imagesPath() / 'deepMergeReference.exr' ) + + GafferImageTest.processTiles( imageReader["out"] ) + + erode = GafferImage.Erode() + erode["in"].setInput( imageReader["out"] ) + erode["radius"].setValue( imath.V2i( 128 ) ) + + with GafferTest.TestRunner.PerformanceScope() : + GafferImageTest.processTiles( erode["out"] ) + if __name__ == "__main__": unittest.main() diff --git a/python/GafferImageTest/MedianTest.py b/python/GafferImageTest/MedianTest.py index c47b7b83708..9ebaf1c64ff 100644 --- a/python/GafferImageTest/MedianTest.py +++ b/python/GafferImageTest/MedianTest.py @@ -172,5 +172,20 @@ def testCancellation( self ) : bt.cancelAndWait() self.assertLess( time.time() - t, acceptableCancellationDelay ) + @GafferTest.TestRunner.PerformanceTestMethod( repeat = 1 ) + def testPerf( self ) : + + imageReader = GafferImage.ImageReader() + imageReader["fileName"].setValue( self.imagesPath() / 'deepMergeReference.exr' ) + + GafferImageTest.processTiles( imageReader["out"] ) + + median = GafferImage.Median() + median["in"].setInput( imageReader["out"] ) + median["radius"].setValue( imath.V2i( 128 ) ) + + with GafferTest.TestRunner.PerformanceScope() : + GafferImageTest.processTiles( median["out"] ) + if __name__ == "__main__": unittest.main() From 1bc8ae3b0b3e2df0c25a2cb19b02a661d527791f Mon Sep 17 00:00:00 2001 From: Daniel Dresser Date: Fri, 12 May 2023 15:44:55 -0700 Subject: [PATCH 02/20] Resample : Test perf --- python/GafferImageTest/ResampleTest.py | 84 +++++++++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) diff --git a/python/GafferImageTest/ResampleTest.py b/python/GafferImageTest/ResampleTest.py index ac96c7f53ee..d262f33ca47 100644 --- a/python/GafferImageTest/ResampleTest.py +++ b/python/GafferImageTest/ResampleTest.py @@ -151,7 +151,8 @@ def __test( fileName, size, filter ) : ] for args in tests : - __test( *args ) + with self.subTest( fileName = args[0], size = args[1], ftilter = args[2] ): + __test( *args ) def testSincUpsize( self ) : @@ -220,5 +221,86 @@ def testNonFlatThrows( self ) : self.assertRaisesDeepNotSupported( resample ) + @GafferTest.TestRunner.PerformanceTestMethod( repeat = 5 ) + def testPerfHorizontal( self ) : + + imageReader = GafferImage.ImageReader() + imageReader["fileName"].setValue( self.imagesPath() / 'deepMergeReference.exr' ) + + resize = GafferImage.Resize() + resize["in"].setInput( imageReader["out"] ) + resize["format"].setValue( GafferImage.Format( 1920, 1080, 1.000 ) ) + + resample = GafferImage.Resample() + resample["in"].setInput( resize["out"] ) + resample["filter"].setValue( 'box' ) + resample["filterScale"].setValue( imath.V2f( 300.0 ) ) + + GafferImageTest.processTiles( resize["out"] ) + + with GafferTest.TestRunner.PerformanceScope() : + GafferImageTest.processTiles( resample["__horizontalPass"] ) + + @GafferTest.TestRunner.PerformanceTestMethod( repeat = 5 ) + def testPerfVertical( self ) : + + imageReader = GafferImage.ImageReader() + imageReader["fileName"].setValue( self.imagesPath() / 'deepMergeReference.exr' ) + + resize = GafferImage.Resize() + resize["in"].setInput( imageReader["out"] ) + resize["format"].setValue( GafferImage.Format( 1920, 1080, 1.000 ) ) + + resample = GafferImage.Resample() + resample["in"].setInput( resize["out"] ) + resample["filter"].setValue( 'box' ) + resample["filterScale"].setValue( imath.V2f( 300.0 ) ) + + GafferImageTest.processTiles( resample["__horizontalPass"] ) + + with GafferTest.TestRunner.PerformanceScope() : + GafferImageTest.processTiles( resample["out"] ) + + @GafferTest.TestRunner.PerformanceTestMethod( repeat = 5 ) + def testPerfSmallFilter( self ) : + + imageReader = GafferImage.ImageReader() + imageReader["fileName"].setValue( self.imagesPath() / 'deepMergeReference.exr' ) + + resize = GafferImage.Resize() + resize["in"].setInput( imageReader["out"] ) + resize["format"].setValue( GafferImage.Format( 4000, 4000, 1.000 ) ) + + resample = GafferImage.Resample() + resample["in"].setInput( resize["out"] ) + resample["filter"].setValue( 'box' ) + resample["filterScale"].setValue( imath.V2f( 4.0 ) ) + + GafferImageTest.processTiles( resize["out"] ) + + with GafferTest.TestRunner.PerformanceScope() : + GafferImageTest.processTiles( resample["out"] ) + + @GafferTest.TestRunner.PerformanceTestMethod( repeat = 5 ) + def testPerfVerySmallFilter( self ) : + + imageReader = GafferImage.ImageReader() + imageReader["fileName"].setValue( self.imagesPath() / 'deepMergeReference.exr' ) + + resize = GafferImage.Resize() + resize["in"].setInput( imageReader["out"] ) + resize["format"].setValue( GafferImage.Format( 4000, 4000, 1.000 ) ) + + resample = GafferImage.Resample() + resample["in"].setInput( resize["out"] ) + resample["filter"].setValue( 'box' ) + resample["filterScale"].setValue( imath.V2f( 2.0 ) ) + + GafferImageTest.processTiles( resize["out"] ) + + with GafferTest.TestRunner.PerformanceScope() : + GafferImageTest.processTiles( resample["out"] ) + + if __name__ == "__main__": unittest.main() From 7778e7463860e1e244175a1c9fac1639df264752 Mon Sep 17 00:00:00 2001 From: Daniel Dresser Date: Mon, 15 May 2023 08:57:20 -0700 Subject: [PATCH 03/20] Resize : Add perf tests --- python/GafferImageTest/ResizeTest.py | 30 ++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/python/GafferImageTest/ResizeTest.py b/python/GafferImageTest/ResizeTest.py index 387e827611a..c57c1e7c84c 100644 --- a/python/GafferImageTest/ResizeTest.py +++ b/python/GafferImageTest/ResizeTest.py @@ -306,5 +306,35 @@ def testPixelAspectRatio( self ) : self.assertEqual( r["out"]["dataWindow"].getValue(), r["out"]["format"].getValue().getDisplayWindow() ) + @GafferTest.TestRunner.PerformanceTestMethod( repeat = 5 ) + def testSimplestUpscalePerf( self ) : + imageReader = GafferImage.ImageReader() + imageReader["fileName"].setValue( self.imagesPath() / 'deepMergeReference.exr' ) + + r = GafferImage.Resize() + r["in"].setInput( imageReader["out"] ) + r["format"].setValue( GafferImage.Format( 10000, 10000 ) ) + r["filter"].setValue( 'box' ) + + GafferImageTest.processTiles( imageReader["out"] ) + + with GafferTest.TestRunner.PerformanceScope() : + GafferImageTest.processTiles( r["out"] ) + + @GafferTest.TestRunner.PerformanceTestMethod( repeat = 5 ) + def testSimpleUpscalePerf( self ) : + imageReader = GafferImage.ImageReader() + imageReader["fileName"].setValue( self.imagesPath() / 'deepMergeReference.exr' ) + + r = GafferImage.Resize() + r["in"].setInput( imageReader["out"] ) + r["format"].setValue( GafferImage.Format( 10000, 10000 ) ) + r["filter"].setValue( 'triangle' ) + + GafferImageTest.processTiles( imageReader["out"] ) + + with GafferTest.TestRunner.PerformanceScope() : + GafferImageTest.processTiles( r["out"] ) + if __name__ == "__main__": unittest.main() From 6bf9ac4e5ff3c1231ea2bf3b7108e4f1d2dab1c6 Mon Sep 17 00:00:00 2001 From: Daniel Dresser Date: Fri, 3 Mar 2023 18:24:51 -0800 Subject: [PATCH 04/20] DilateTest : Add test that should make certain errors obvious --- python/GafferImageTest/DilateTest.py | 46 ++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/python/GafferImageTest/DilateTest.py b/python/GafferImageTest/DilateTest.py index 5ccd861bc9d..2bb6faf3b6e 100644 --- a/python/GafferImageTest/DilateTest.py +++ b/python/GafferImageTest/DilateTest.py @@ -90,6 +90,52 @@ def testFilter( self ) : self.assertEqual( s.sample( 112, 112 ), 1.0 ) + def testShape( self ): + constant1 = GafferImage.Constant( "constant1" ) + constant1["color"].setValue( imath.Color4f( 1, 1, 1, 1 ) ) + crop1 = GafferImage.Crop( "crop1" ) + crop1["in"].setInput( constant1["out"] ) + crop1["area"].setValue( imath.Box2i( imath.V2i( 3, 7 ), imath.V2i( 4, 8 ) ) ) + crop1["affectDisplayWindow"].setValue( False ) + + constant2 = GafferImage.Constant( "constant2" ) + constant2["color"].setValue( imath.Color4f( 2, 2, 2, 1 ) ) + crop2 = GafferImage.Crop( "crop2" ) + crop2["in"].setInput( constant2["out"] ) + crop2["area"].setValue( imath.Box2i( imath.V2i( 6, 4 ), imath.V2i( 7, 5 ) ) ) + crop2["affectDisplayWindow"].setValue( False ) + + merge = GafferImage.Merge( "merge" ) + merge["in"][0].setInput( crop1["out"] ) + merge["in"][1].setInput( crop2["out"] ) + merge["operation"].setValue( 8 ) + + dilate = GafferImage.Dilate( "dilate" ) + dilate["in"].setInput( merge["out"] ) + dilate["expandDataWindow"].setValue( True ) + + expectedResultsText = """ +0000000000 0000000000 0000000000 +0000000000 0000000000 0000000000 +0000000000 0000000000 0000222220 +0000000000 0000022200 0000222220 +0000002000 0000022200 0000222220 +0000000000 0000022200 0111222220 +0000000000 0011100000 0111222220 +0001000000 0011100000 0111110000 +0000000000 0011100000 0111110000 +0000000000 0000000000 0111110000 +""" + expectedResults = expectedResultsText.splitlines()[1:] + + for r in [ 0, 1, 2 ]: + dilate["radius"].setValue( imath.V2i( r ) ) + + s = GafferImage.Sampler( dilate['out'], "R", imath.Box2i( imath.V2i( 0 ), imath.V2i( 10 ) ) ) + for y in range( 10 ): + for x in range( 10 ): + self.assertEqual( s.sample( x, y ), int( expectedResults[ y ][ x + r * 11 ] ) ) + def testDriverChannel( self ) : rRaw = GafferImage.ImageReader() From b63523abfaf4cea2c5bd3ef495b962929de5ccae Mon Sep 17 00:00:00 2001 From: Daniel Dresser Date: Wed, 17 May 2023 11:32:32 -0700 Subject: [PATCH 05/20] ResampleTest : Add perf tests for inseparable filters --- python/GafferImageTest/ResampleTest.py | 40 ++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/python/GafferImageTest/ResampleTest.py b/python/GafferImageTest/ResampleTest.py index d262f33ca47..0f8f3ce39f0 100644 --- a/python/GafferImageTest/ResampleTest.py +++ b/python/GafferImageTest/ResampleTest.py @@ -301,6 +301,46 @@ def testPerfVerySmallFilter( self ) : with GafferTest.TestRunner.PerformanceScope() : GafferImageTest.processTiles( resample["out"] ) + @GafferTest.TestRunner.PerformanceTestMethod( repeat = 1 ) + def testPerfInseparableLanczos( self ) : + + imageReader = GafferImage.ImageReader() + imageReader["fileName"].setValue( self.imagesPath() / 'deepMergeReference.exr' ) + + resize = GafferImage.Resize() + resize["in"].setInput( imageReader["out"] ) + resize["format"].setValue( GafferImage.Format( 64, 64 ) ) + + resample = GafferImage.Resample() + resample["in"].setInput( resize["out"] ) + resample["filter"].setValue( 'radial-lanczos3' ) + resample["filterScale"].setValue( imath.V2f( 20.0 ) ) + + GafferImageTest.processTiles( resize["out"] ) + + with GafferTest.TestRunner.PerformanceScope() : + GafferImageTest.processTiles( resample["out"] ) + + @GafferTest.TestRunner.PerformanceTestMethod( repeat = 1 ) + def testPerfInseparableDisk( self ) : + + imageReader = GafferImage.ImageReader() + imageReader["fileName"].setValue( self.imagesPath() / 'deepMergeReference.exr' ) + + resize = GafferImage.Resize() + resize["in"].setInput( imageReader["out"] ) + resize["format"].setValue( GafferImage.Format( 1920, 1920 ) ) + + resample = GafferImage.Resample() + resample["in"].setInput( resize["out"] ) + resample["filter"].setValue( 'disk' ) + resample["filterScale"].setValue( imath.V2f( 20.0 ) ) + + GafferImageTest.processTiles( resize["out"] ) + + with GafferTest.TestRunner.PerformanceScope() : + GafferImageTest.processTiles( resample["out"] ) + if __name__ == "__main__": unittest.main() From 3eb61f708229e9c4f25bb0ac15418a6bbca388c2 Mon Sep 17 00:00:00 2001 From: Daniel Dresser Date: Tue, 16 May 2023 14:53:52 -0700 Subject: [PATCH 06/20] Warp : Add support for "bilinear" filter This is a very low quality, but fast approach ( it also offer a way of profiling the performance of Sampler::sample( float, float ), which previously wasn't used anywhere critical ) --- python/GafferImageUI/WarpUI.py | 8 ++++++-- src/GafferImage/Warp.cpp | 33 ++++++++++++++++++++++++++------- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/python/GafferImageUI/WarpUI.py b/python/GafferImageUI/WarpUI.py index 924890c1d11..8567c225dec 100644 --- a/python/GafferImageUI/WarpUI.py +++ b/python/GafferImageUI/WarpUI.py @@ -69,8 +69,10 @@ """ The filter used to perform the resampling. The name of any OIIO filter may be specified, but this UI - only exposes a limited range of 4 options which perform - well for warping, ordered from softest to sharpest. + only exposes a limited range of 5 options which perform + well for warping, ordered from softest to sharpest. Plus + the extra "bilinear" mode which is lower quality, but + fast. """, "plugValueWidget:type", "GafferUI.PresetsPlugValueWidget", @@ -80,6 +82,7 @@ "preset:Keys", "keys", "preset:Simon", "simon", "preset:Rifman", "rifman", + "preset:Bilinear", "bilinear", ], @@ -94,6 +97,7 @@ """, "userDefault", False, + "layout:activator", lambda plug : plug.node()["filter"].getValue() != "bilinear", ], } diff --git a/src/GafferImage/Warp.cpp b/src/GafferImage/Warp.cpp index 0503a29e34a..82c2feeb9e0 100644 --- a/src/GafferImage/Warp.cpp +++ b/src/GafferImage/Warp.cpp @@ -309,7 +309,8 @@ void Warp::hash( const Gaffer::ValuePlug *output, const Gaffer::Context *context h.append( tileOrigin ); enginePlug()->hash( h ); - bool useDerivatives = useDerivativesPlug()->getValue(); + std::string filterName = filterPlug()->getValue(); + bool useDerivatives = ( filterName != "bilinear" ) && useDerivativesPlug()->getValue(); h.append( useDerivatives ); if( useDerivatives ) { @@ -331,7 +332,7 @@ void Warp::hash( const Gaffer::ValuePlug *output, const Gaffer::Context *context // The sampleRegionsPlug() includes an overall bound for the tile which depends on the filter // support width - filterPlug()->hash( h ); + h.append( filterName ); } FlatImageProcessor::hash( output, context, h ); @@ -359,8 +360,14 @@ void Warp::compute( Gaffer::ValuePlug *output, const Gaffer::Context *context ) dataWindow = outPlug()->dataWindowPlug()->getValue(); } - const OIIO::Filter2D *filter = FilterAlgo::acquireFilter( filterPlug()->getValue() ); - const float filterWidth = filter->width(); + std::string filterName = filterPlug()->getValue(); + const OIIO::Filter2D *filter = nullptr; + float filterWidth = 1.0f; + if( filterName != "bilinear" ) + { + filter = FilterAlgo::acquireFilter( filterPlug()->getValue() ); + filterWidth = filter->width(); + } const V2i tileOrigin = context->get( ImagePlug::tileOriginContextName ); @@ -417,7 +424,7 @@ void Warp::compute( Gaffer::ValuePlug *output, const Gaffer::Context *context ) std::vector< V2f > &pixelInputDerivatives = pixelInputDerivativesData->writable(); pixelInputDerivatives.reserve( ImagePlug::tileSize() * ImagePlug::tileSize() ); - bool useDerivatives = useDerivativesPlug()->getValue(); + bool useDerivatives = filter && useDerivativesPlug()->getValue(); if( useDerivatives ) { @@ -650,7 +657,12 @@ IECore::ConstFloatVectorDataPtr Warp::computeChannelData( const std::string &cha vector &result = resultData->writable(); result.reserve( ImagePlug::tileSize() * ImagePlug::tileSize() ); - const OIIO::Filter2D *filter = FilterAlgo::acquireFilter( filterPlug()->getValue() ); + std::string filterName = filterPlug()->getValue(); + const OIIO::Filter2D *filter = nullptr; + if( filterName != "bilinear" ) + { + filter = FilterAlgo::acquireFilter( filterPlug()->getValue() ); + } const Box2i &tileInputBound = sampleRegions->member< Box2iData >( g_tileInputBoundName, true )->readable(); @@ -685,7 +697,14 @@ IECore::ConstFloatVectorDataPtr Warp::computeChannelData( const std::string &cha const V2f &input = pixelInputPositions[i]; if( input != Engine::black ) { - v = FilterAlgo::sampleBox( sampler, input, pixelInputDerivatives[i].x, pixelInputDerivatives[i].y, filter, scratchMemory ); + if( filter ) + { + v = FilterAlgo::sampleBox( sampler, input, pixelInputDerivatives[i].x, pixelInputDerivatives[i].y, filter, scratchMemory ); + } + else + { + v = sampler.sample( input.x, input.y ); + } } } result.push_back( v ); From b660cec5626f148b2909d9c63cc71816dd65d7a4 Mon Sep 17 00:00:00 2001 From: Daniel Dresser Date: Tue, 16 May 2023 14:55:11 -0700 Subject: [PATCH 07/20] VectorWarp : Add perf tests --- python/GafferImageTest/VectorWarpTest.py | 59 ++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/python/GafferImageTest/VectorWarpTest.py b/python/GafferImageTest/VectorWarpTest.py index 5e149b752f8..4abd3c89451 100644 --- a/python/GafferImageTest/VectorWarpTest.py +++ b/python/GafferImageTest/VectorWarpTest.py @@ -170,6 +170,65 @@ def testWarpImage( self ): self.assertImagesEqual( vectorWarp["out"], expectedReader["out"], maxDifference = 0.0005, ignoreMetadata = True ) + def runPerfTest( self, sourceRes, resultRes, filter, useDerivs ): + warpPatternReader = GafferImage.ImageReader() + warpPatternReader["fileName"].setValue( self.imagesPath() / "warpPattern.exr" ) + + resize = GafferImage.Resize() + resize["in"].setInput( warpPatternReader["out"] ) + resize['format']["displayWindow"]["min"].setValue( imath.V2i( 0 ) ) + resize['format']["displayWindow"]["max"].setValue( imath.V2i( resultRes ) ) + + xRamp = GafferImage.Ramp() + xRamp["format"].setValue( GafferImage.Format( resultRes, resultRes ) ) + xRamp["endPosition"].setValue( imath.V2f( resultRes, 0 ) ) + xRamp["ramp"]["p1"]["y"].setValue( imath.Color4f( 1, 0, 0, 1 ) ) + yRamp = GafferImage.Ramp() + yRamp["format"].setValue( GafferImage.Format( resultRes, resultRes ) ) + yRamp["endPosition"].setValue( imath.V2f( 0, resultRes ) ) + yRamp["ramp"]["p1"]["y"].setValue( imath.Color4f( 0, 1, 0, 1 ) ) + + toAbsoluteMerge = GafferImage.Merge() + toAbsoluteMerge["operation"].setValue( GafferImage.Merge.Operation.Add ) + toAbsoluteMerge["in"]["in0"].setInput( xRamp["out"] ) + toAbsoluteMerge["in"]["in1"].setInput( yRamp["out"] ) + toAbsoluteMerge["in"]["in2"].setInput( resize["out"] ) + + dotGridReader = GafferImage.ImageReader() + dotGridReader["fileName"].setValue( self.imagesPath() / "dotGrid.300.exr" ) + + sourceResize = GafferImage.Resize() + sourceResize["in"].setInput( dotGridReader["out"] ) + sourceResize['format']["displayWindow"]["min"].setValue( imath.V2i( 0 ) ) + sourceResize['format']["displayWindow"]["max"].setValue( imath.V2i( sourceRes ) ) + + vectorWarp = GafferImage.VectorWarp() + vectorWarp["in"].setInput( sourceResize["out"] ) + vectorWarp["vector"].setInput( toAbsoluteMerge["out"] ) + vectorWarp["filter"].setValue( filter ) + vectorWarp["useDerivatives"].setValue( useDerivs ) + + GafferImageTest.processTiles( toAbsoluteMerge["out"] ) + GafferImageTest.processTiles( sourceResize["out"] ) + + with GafferTest.TestRunner.PerformanceScope() : + GafferImageTest.processTiles( vectorWarp["out"] ) + + @GafferTest.TestRunner.PerformanceTestMethod( repeat = 5 ) + def testDerivsPerf( self ): + self.runPerfTest( 300, 2500, "cubic", True ) + + @GafferTest.TestRunner.PerformanceTestMethod( repeat = 5 ) + def testNoDerivsPerf( self ): + self.runPerfTest( 300, 2500, "cubic", False ) + + @GafferTest.TestRunner.PerformanceTestMethod( repeat = 5 ) + def testBilinearPerf( self ): + self.runPerfTest( 300, 2500, "bilinear", False ) + + @GafferTest.TestRunner.PerformanceTestMethod( repeat = 5 ) + def testDownsamplePerf( self ): + self.runPerfTest( 6000, 300, "cubic", True ) if __name__ == "__main__": unittest.main() From 378da659fc9df0cd1129bfb07cf8968f5fdbac40 Mon Sep 17 00:00:00 2001 From: Daniel Dresser Date: Tue, 20 Jun 2023 14:45:52 -0700 Subject: [PATCH 08/20] ImageTransformTest : Pre-cache input for performance tests --- python/GafferImageTest/ImageTransformTest.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/python/GafferImageTest/ImageTransformTest.py b/python/GafferImageTest/ImageTransformTest.py index 69004d72aa2..e6872a97e71 100644 --- a/python/GafferImageTest/ImageTransformTest.py +++ b/python/GafferImageTest/ImageTransformTest.py @@ -356,6 +356,8 @@ def testRotationPerformance( self ) : transform["transform"]["pivot"].setValue( imath.V2f( 1500 ) ) transform["transform"]["rotate"].setValue( 2.5 ) + GafferImageTest.processTiles( checker["out"] ) + with GafferTest.TestRunner.PerformanceScope() : GafferImageTest.processTiles( transform["out"] ) @@ -369,6 +371,8 @@ def testTranslationPerformance( self ) : transform["in"].setInput( checker["out"] ) transform["transform"]["translate"].setValue( imath.V2f( 2.2 ) ) + GafferImageTest.processTiles( checker["out"] ) + with GafferTest.TestRunner.PerformanceScope() : GafferImageTest.processTiles( transform["out"] ) @@ -382,6 +386,8 @@ def testDownsizingPerformance( self ) : transform["in"].setInput( checker["out"] ) transform["transform"]["scale"].setValue( imath.V2f( 0.1 ) ) + GafferImageTest.processTiles( checker["out"] ) + with GafferTest.TestRunner.PerformanceScope() : GafferImageTest.processTiles( transform["out"] ) @@ -395,6 +401,8 @@ def testUpsizingPerformance( self ) : transform["in"].setInput( checker["out"] ) transform["transform"]["scale"].setValue( imath.V2f( 3 ) ) + GafferImageTest.processTiles( checker["out"] ) + with GafferTest.TestRunner.PerformanceScope() : GafferImageTest.processTiles( transform["out"] ) @@ -410,6 +418,8 @@ def testRotationAndScalingPerformance( self ) : transform["transform"]["rotate"].setValue( 2.5 ) transform["transform"]["scale"].setValue( imath.V2f( 0.75 ) ) + GafferImageTest.processTiles( checker["out"] ) + with GafferTest.TestRunner.PerformanceScope() : GafferImageTest.processTiles( transform["out"] ) @@ -428,6 +438,8 @@ def testConcatenationPerformance1( self ) : transform2["in"].setInput( transform1["out"] ) transform2["transform"]["translate"].setValue( imath.V2f( 10 ) ) + GafferImageTest.processTiles( checker["out"] ) + with GafferTest.TestRunner.PerformanceScope() : GafferImageTest.processTiles( transform2["out"] ) @@ -447,6 +459,8 @@ def testConcatenationPerformance2( self ) : transform2["in"].setInput( transform1["out"] ) transform2["transform"]["translate"].setValue( imath.V2f( 10 ) ) + GafferImageTest.processTiles( checker["out"] ) + with GafferTest.TestRunner.PerformanceScope() : GafferImageTest.processTiles( transform2["out"] ) From 74c01926bdf21f71e826f8a5faa4ddd97c02ce77 Mon Sep 17 00:00:00 2001 From: Daniel Dresser Date: Fri, 12 May 2023 18:49:34 -0700 Subject: [PATCH 09/20] ImagePlug : Make fundamental constants public constexpr Might as well make it easy for people who want to do fast things with images? --- include/GafferImage/ImagePlug.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/GafferImage/ImagePlug.h b/include/GafferImage/ImagePlug.h index 3923bbdae21..8a5862871f8 100644 --- a/include/GafferImage/ImagePlug.h +++ b/include/GafferImage/ImagePlug.h @@ -256,8 +256,8 @@ class GAFFERIMAGE_API ImagePlug : public Gaffer::ValuePlug static const IECore::FloatVectorData *blackTile(); static const IECore::FloatVectorData *whiteTile(); - static int tileSize() { return 1 << tileSizeLog2(); }; - static int tilePixels() { return tileSize() * tileSize(); }; + static constexpr int tileSize() { return 1 << tileSizeLog2(); }; + static constexpr int tilePixels() { return tileSize() * tileSize(); }; /// Returns the index of the tile containing a point /// This just means dividing by tile size ( always rounding down ) @@ -286,9 +286,9 @@ class GAFFERIMAGE_API ImagePlug : public Gaffer::ValuePlug }; //@} - private : + static constexpr int tileSizeLog2() { return 7; }; - static int tileSizeLog2() { return 7; }; + private : static void compoundObjectToCompoundData( const IECore::CompoundObject *object, IECore::CompoundData *data ); From 564c9fb950c6d93a9a9de67d2572426cfec6ad01 Mon Sep 17 00:00:00 2001 From: Daniel Dresser Date: Mon, 15 May 2023 18:42:43 -0700 Subject: [PATCH 10/20] Sampler : Internal optimizations --- include/GafferImage/Sampler.h | 5 +++-- include/GafferImage/Sampler.inl | 26 +++++++++++--------------- src/GafferImage/Sampler.cpp | 6 ++++++ 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/include/GafferImage/Sampler.h b/include/GafferImage/Sampler.h index bdb7bbd03b8..837d3d56718 100644 --- a/include/GafferImage/Sampler.h +++ b/include/GafferImage/Sampler.h @@ -106,8 +106,8 @@ class GAFFERIMAGE_API Sampler /// Cached data access /// @param p Any point within the cache that we wish to retrieve the data for. /// @param tileData Is set to the tile's channel data. - /// @param tilePixelIndex XY indices that can be used to access the colour value of point 'p' from tileData. - void cachedData( Imath::V2i p, const float *& tileData, Imath::V2i &tilePixelIndex ); + /// @param tilePixelIndex Is set to the index used to access the colour value of point 'p' from tileData. + void cachedData( Imath::V2i p, const float *& tileData, int &tilePixelIndex ); const ImagePlug *m_plug; const std::string m_channelName; @@ -117,6 +117,7 @@ class GAFFERIMAGE_API Sampler std::vector< IECore::ConstFloatVectorDataPtr > m_dataCache; std::vector< const float * > m_dataCacheRaw; Imath::Box2i m_cacheWindow; + int m_cacheOriginIndex; int m_cacheWidth; int m_boundingMode; diff --git a/include/GafferImage/Sampler.inl b/include/GafferImage/Sampler.inl index dd8bc940d30..f153a7371cd 100644 --- a/include/GafferImage/Sampler.inl +++ b/include/GafferImage/Sampler.inl @@ -65,18 +65,14 @@ inline float Sampler::sample( int x, int y ) } else { - if( BufferAlgo::empty( m_dataWindow ) ) - { - return 0.0f; - } p = BufferAlgo::clamp( p, m_dataWindow ); } } const float *tileData; - Imath::V2i tileIndex; - cachedData( p, tileData, tileIndex ); - return *(tileData + tileIndex.y * ImagePlug::tileSize() + tileIndex.x); + int tilePixelIndex; + cachedData( p, tileData, tilePixelIndex ); + return *( tileData + tilePixelIndex ); } inline float Sampler::sample( float x, float y ) @@ -94,23 +90,23 @@ inline float Sampler::sample( float x, float y ) return OIIO::bilerp( x0y0, x1y0, x0y1, x1y1, xf, yf ); } -inline void Sampler::cachedData( Imath::V2i p, const float *& tileData, Imath::V2i &tilePixelIndex ) +inline void Sampler::cachedData( Imath::V2i p, const float *& tileData, int &tilePixelIndex ) { // Get the smart pointer to the tile we want. - Imath::V2i relP = p - m_cacheWindow.min; - Imath::V2i cacheIndex = ImagePlug::tileIndex( relP ); - tilePixelIndex = relP - cacheIndex * ImagePlug::tileSize(); + constexpr int lowMask = ( 1 << ImagePlug::tileSizeLog2() ) - 1; + int cacheIndex = ( p.x >> ImagePlug::tileSizeLog2() ) + m_cacheWidth * ( p.y >> ImagePlug::tileSizeLog2() ) - m_cacheOriginIndex; + + tilePixelIndex = ( p.x & lowMask ) + ( ( p.y & lowMask ) << ImagePlug::tileSizeLog2() ); - int cacheI = cacheIndex.x + cacheIndex.y * m_cacheWidth; - const float *&cacheTileRawPtr = m_dataCacheRaw[cacheI]; + const float *&cacheTileRawPtr = m_dataCacheRaw[cacheIndex]; if ( cacheTileRawPtr == nullptr ) { // Get the origin of the tile we want. - Imath::V2i tileOrigin = ( ImagePlug::tileIndex( m_cacheWindow.min ) + cacheIndex ) * ImagePlug::tileSize(); + Imath::V2i tileOrigin( p.x & ~( ImagePlug::tileSize() - 1 ), p.y & ~( ImagePlug::tileSize() - 1 ) ); - IECore::ConstFloatVectorDataPtr &cacheTilePtr = m_dataCache[ cacheI ]; + IECore::ConstFloatVectorDataPtr &cacheTilePtr = m_dataCache[ cacheIndex ]; cacheTilePtr = m_plug->channelData( m_channelName, tileOrigin ); cacheTileRawPtr = &cacheTilePtr->readable()[0]; } diff --git a/src/GafferImage/Sampler.cpp b/src/GafferImage/Sampler.cpp index f4301711a62..c7efc09ec15 100644 --- a/src/GafferImage/Sampler.cpp +++ b/src/GafferImage/Sampler.cpp @@ -73,6 +73,10 @@ Sampler::Sampler( const GafferImage::ImagePlug *plug, const std::string &channel // all bounds checking. m_boundingMode = -1; } + else if( BufferAlgo::empty( m_dataWindow ) ) + { + m_boundingMode = Black; + } // Compute the area we need to cache in order to // be able to service calls within m_sampleWindow @@ -105,6 +109,8 @@ Sampler::Sampler( const GafferImage::ImagePlug *plug, const std::string &channel int cacheHeight = int( ceil( float( m_cacheWindow.size().y ) / ImagePlug::tileSize() ) ); m_dataCache.resize( m_cacheWidth * cacheHeight, nullptr ); m_dataCacheRaw.resize( m_cacheWidth * cacheHeight, nullptr ); + + m_cacheOriginIndex = ( m_cacheWindow.min.x >> ImagePlug::tileSizeLog2() ) + m_cacheWidth * ( m_cacheWindow.min.y >> ImagePlug::tileSizeLog2() ); } void Sampler::hash( IECore::MurmurHash &h ) const From 57cc62261a2dc188cde968e1ce71f75f7b35fc3a Mon Sep 17 00:00:00 2001 From: Daniel Dresser Date: Mon, 15 May 2023 08:57:58 -0700 Subject: [PATCH 11/20] GafferImage::Sampler : Add visitPixels() --- include/GafferImage/Sampler.h | 8 ++ include/GafferImage/Sampler.inl | 185 ++++++++++++++++++++++++++++++++ 2 files changed, 193 insertions(+) diff --git a/include/GafferImage/Sampler.h b/include/GafferImage/Sampler.h index 837d3d56718..382483b8548 100644 --- a/include/GafferImage/Sampler.h +++ b/include/GafferImage/Sampler.h @@ -94,6 +94,14 @@ class GAFFERIMAGE_API Sampler /// 0.5, 0.5. float sample( float x, float y ); + /// Call a functor for all pixels in the region. + /// Much faster than calling sample(int,int) repeatedly for every pixel in the + /// region, up to 5 times faster in practical cases. + /// The signature of the functor must be `F( float value, int x, int y )`. + /// Each pixel is visited in order of increasing X, then increasing Y. + template + inline void visitPixels( const Imath::Box2i ®ion, F &&lambda ); + /// Appends a hash that represent all the pixel /// values within the requested sample area. void hash( IECore::MurmurHash &h ) const; diff --git a/include/GafferImage/Sampler.inl b/include/GafferImage/Sampler.inl index f153a7371cd..30adb1a5721 100644 --- a/include/GafferImage/Sampler.inl +++ b/include/GafferImage/Sampler.inl @@ -90,6 +90,191 @@ inline float Sampler::sample( float x, float y ) return OIIO::bilerp( x0y0, x1y0, x0y1, x1y1, xf, yf ); } +template +inline void Sampler::visitPixels( const Imath::Box2i ®ion, F &&visitor ) +{ + int x = region.min.x; + int y = region.min.y; + + if( region.max.x == region.min.x + 1 ) + { + // The general principle of visitPixels is to find contiguous spans in X, precompute the length + // of the span, and then process the whole span in a very tight inner loop with minimal branching + // where moving to the next pixel takes just an increment. When the region is just + // one pixel wide, however, looking for X spans is really not helping, so we have a big special + // case to instead look for vertical spans. + while( y < region.max.y ) + { + // Our goal is to prepare these 3 variables: + // valuePointer or constantValue will hold the data, and count will hold how many consecutive pixels + // we can visit without recomputing valuePointer/constantValue. + // The default is that valuePointer is null, and constantValue is 0, meaning that visited pixels + // will be treated as 0 unless we set one of the values. + + int count; + const float *valuePointer = nullptr; + float constantValue = 0.0f; + + // If we are clamping, then we will always treat x as if it is within the nearest edge of the data + // window. + int clampedX = x; + if( m_boundingMode == Clamp ) + { + clampedX = std::clamp( x, m_dataWindow.min.x, m_dataWindow.max.x - 1 ); + } + + if( clampedX < m_dataWindow.min.x || clampedX >= m_dataWindow.max.x ) + { + // If we're outside, and haven't been clamped, then just use the default constantValue of 0. + count = region.max.y - y; + } + else if( y < m_dataWindow.min.y ) + { + // Everything before the dataWindow gets the first value in the data window, or 0 if not clamped + if( m_boundingMode == Clamp ) + { + const float *tileData; + int tilePixelIndex; + cachedData( Imath::V2i( clampedX, m_dataWindow.min.y ), tileData, tilePixelIndex ); + constantValue = tileData[ tilePixelIndex ]; + } + count = std::min( region.max.y, m_dataWindow.min.y ) - y; + } + else if( y >= m_dataWindow.max.y ) + { + // Everything after the dataWindow gets the last value in the data window, or 0 if not clamped + if( m_boundingMode == Clamp ) + { + const float *tileData; + int tilePixelIndex; + cachedData( Imath::V2i( clampedX, m_dataWindow.max.y - 1 ), tileData, tilePixelIndex ); + constantValue = tileData[ tilePixelIndex ]; + } + count = region.max.y - y; + } + else + { + // We're actually in the data window ( or clamped to the side of it ). Need to actually + // set valuePointer + const float *tileData; + int tilePixelIndex; + cachedData( Imath::V2i( clampedX, y ), tileData, tilePixelIndex ); + int pixelY = tilePixelIndex >> ImagePlug::tileSizeLog2(); + count = std::min( std::min( m_dataWindow.max.y, region.max.y ) - y, ImagePlug::tileSize() - pixelY ); + valuePointer = &tileData[ tilePixelIndex ]; + } + + // Now we can do the nice tight inner loop where we call visitor repeatedly with valuePointer, or + // constantValue + if( valuePointer ) + { + for( int i = 0; i < count; i++ ) + { + visitor( valuePointer[i << ImagePlug::tileSizeLog2()], x, y + i ); + } + } + else + { + for( int i = 0; i < count; i++ ) + { + visitor( constantValue, x, y + i ); + } + } + + y += count; + } + return; + } + + // We didn't take the vertical special case above, so we're going to visit horizontal scanlines + while( y < region.max.y ) + { + // As above, our goal is to prepare these 3 variables: + // valuePointer or constantValue will hold the data, and count will hold how many consecutive pixels + // we can visit without recomputing valuePointer/constantValue. + // The default is that valuePointer is null, and constantValue is 0, meaning that visited pixels + // will be treated as 0 unless we set one of the values. + int count; + const float *valuePointer = nullptr; + float constantValue = 0.0f; + + // If we are clamping, then we will always treat y as if it is within the nearest edge of the data + // window ( reading a scanline below the data window yields the same result as reading a scanline + // of the bottom edge of the data window ) + int clampedY = y; + if( m_boundingMode == Clamp ) + { + clampedY = std::clamp( y, m_dataWindow.min.y, m_dataWindow.max.y - 1 ); + } + + if( clampedY < m_dataWindow.min.y || clampedY >= m_dataWindow.max.y ) + { + // If we're outside, and haven't been clamped, then just use the default constantValue of 0. + count = region.max.x - x; + } + else if( x < m_dataWindow.min.x ) + { + // Everything left of the dataWindow gets the first value in the data window on this row, or 0 if not clamped + if( m_boundingMode == Clamp ) + { + const float *tileData; + int tilePixelIndex; + cachedData( Imath::V2i( m_dataWindow.min.x, clampedY ), tileData, tilePixelIndex ); + constantValue = tileData[ tilePixelIndex ]; + } + count = std::min( region.max.x, m_dataWindow.min.x ) - x; + } + else if( x >= m_dataWindow.max.x ) + { + // Everything right of the dataWindow gets the last value in the data window on this row, or 0 if not clamped + if( m_boundingMode == Clamp ) + { + const float *tileData; + int tilePixelIndex; + cachedData( Imath::V2i( m_dataWindow.max.x - 1, clampedY ), tileData, tilePixelIndex ); + constantValue = tileData[ tilePixelIndex ]; + } + count = region.max.x - x; + } + else + { + // We're actually in the data window ( or clamped to the top or bottom of it ). Need to actually + // set valuePointer + const float *tileData; + int tilePixelIndex; + cachedData( Imath::V2i( x, clampedY ), tileData, tilePixelIndex ); + int tileX = tilePixelIndex & ( ImagePlug::tileSize() - 1 ); + count = std::min( std::min( m_dataWindow.max.x, region.max.x ) - x, ImagePlug::tileSize() - tileX ); + valuePointer = &tileData[ tilePixelIndex ]; + } + + // Now we can do the nice tight inner loop where we call visitor repeatedly with valuePointer, or + // constantValue + if( valuePointer ) + { + for( int i = 0; i < count; i++ ) + { + visitor( valuePointer[i], x + i, y ); + } + } + else + { + for( int i = 0; i < count; i++ ) + { + visitor( constantValue, x + i, y ); + } + } + + // Advance in the scanline, and advance to the next scanline if we've finished + x += count; + if( x == region.max.x ) + { + y++; + x = region.min.x; + } + } +} + inline void Sampler::cachedData( Imath::V2i p, const float *& tileData, int &tilePixelIndex ) { // Get the smart pointer to the tile we want. From 22e069f06bf41ae4ea3d36a2f7d2b84c321b1fe3 Mon Sep 17 00:00:00 2001 From: Daniel Dresser Date: Mon, 15 May 2023 16:03:14 -0700 Subject: [PATCH 12/20] GafferImageTest : Add validateVisitPixels --- SConstruct | 4 +- .../GafferImageTestModule.cpp | 40 +++++++++++++++++++ 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/SConstruct b/SConstruct index a4aec95216e..3c3cb4b4cac 100644 --- a/SConstruct +++ b/SConstruct @@ -1078,10 +1078,10 @@ libraries = { "GafferImageTest" : { "envAppends" : { - "LIBS" : [ "Gaffer", "GafferImage", "OpenImageIO$OIIO_LIB_SUFFIX", ], + "LIBS" : [ "Gaffer", "GafferImage", "OpenImageIO$OIIO_LIB_SUFFIX" ], }, "pythonEnvAppends" : { - "LIBS" : [ "GafferImage", "GafferImageTest" ], + "LIBS" : [ "GafferImage", "GafferImageTest", "fmt" ], }, "additionalFiles" : glob.glob( "python/GafferImageTest/scripts/*" ) + glob.glob( "python/GafferImageTest/images/*" ) + diff --git a/src/GafferImageTestModule/GafferImageTestModule.cpp b/src/GafferImageTestModule/GafferImageTestModule.cpp index 2eace90ef3a..5bdd81d7acb 100644 --- a/src/GafferImageTestModule/GafferImageTestModule.cpp +++ b/src/GafferImageTestModule/GafferImageTestModule.cpp @@ -35,6 +35,7 @@ ////////////////////////////////////////////////////////////////////////// #include "boost/python.hpp" +#include "fmt/format.h" #include "GafferImageTest/ContextSanitiser.h" @@ -43,6 +44,7 @@ #include "GafferImage/ImageAlgo.h" #include "GafferImage/ImagePlug.h" #include "GafferImage/Format.h" +#include "GafferImage/Sampler.h" #include "Gaffer/Node.h" @@ -118,6 +120,43 @@ void testEditableScopeForFormat() ); } +void validateVisitPixels( GafferImage::Sampler &sampler, const Imath::Box2i ®ion ) +{ + int i = 0; + int sizeX = region.size().x; + sampler.visitPixels( region, + [®ion, &sampler, &i, sizeX ] ( float value, int x, int y ) + { + int expectedX = ( i % sizeX ) + region.min.x; + int expectedY = ( i / sizeX ) + region.min.y; + if( x != expectedX || y != expectedY ) + { + throw IECore::Exception( fmt::format ( + "visitPixels passed incorrect coordinate - expected {},{}, received {},{}", + expectedX, expectedY, x, y + ) ); + } + + float expectedValue = sampler.sample( x, y ); + if( value != expectedValue ) + { + throw IECore::Exception( fmt::format( + "visitPixels passed incorrect value for pixel {},{} - expected {} received {}", + x, y, expectedValue, value + ) ); + } + i++; + } + ); + if( i != region.size().x * region.size().y ) + { + throw IECore::Exception( fmt::format( + "visitPixels processed wrong number of pixels: visited {} in region of size {},{}", + i, region.size().x, region.size().y + ) ); + } +} + } // namespace BOOST_PYTHON_MODULE( _GafferImageTest ) @@ -129,4 +168,5 @@ BOOST_PYTHON_MODULE( _GafferImageTest ) def( "processTiles", &processTilesWrapper ); def( "connectProcessTilesToPlugDirtiedSignal", &connectProcessTilesToPlugDirtiedSignal ); def( "testEditableScopeForFormat", &testEditableScopeForFormat ); + def( "validateVisitPixels", &validateVisitPixels ); } From e0c223a0d7119122d377380ac215107256e4538a Mon Sep 17 00:00:00 2001 From: Daniel Dresser Date: Mon, 15 May 2023 16:03:37 -0700 Subject: [PATCH 13/20] Sampler : Test visitPixels --- python/GafferImageTest/SamplerTest.py | 66 ++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 2 deletions(-) diff --git a/python/GafferImageTest/SamplerTest.py b/python/GafferImageTest/SamplerTest.py index 8106ee6378c..2be9528ec89 100644 --- a/python/GafferImageTest/SamplerTest.py +++ b/python/GafferImageTest/SamplerTest.py @@ -53,7 +53,7 @@ def testOutOfBoundsSampleModeBlack( self ) : c = GafferImage.Constant() c["color"].setValue( imath.Color4f( 1 ) ) - dw = c["out"]["dataWindow"].getValue(); + dw = c["out"]["dataWindow"].getValue() s = GafferImage.Sampler( c["out"], "R", dw, GafferImage.Sampler.BoundingMode.Black ) # Check integer sampling. @@ -97,7 +97,7 @@ def testOutOfBoundsSampleModeClamp( self ) : r = GafferImage.ImageReader() r["fileName"].setValue( self.fileName ) - dw = r["out"]["dataWindow"].getValue(); + dw = r["out"]["dataWindow"].getValue() s = GafferImage.Sampler( r["out"], "R", dw, GafferImage.Sampler.BoundingMode.Clamp ) # Get the exact values of the corner pixels. @@ -220,5 +220,67 @@ def testExceptionOnDeepData( self ) : with self.assertRaises( RuntimeError ) : sampler = GafferImage.Sampler( merge["out"], "R", imath.Box2i( imath.V2i( 0 ), imath.V2i( 200 ) ), boundingMode = GafferImage.Sampler.BoundingMode.Black ) + def testVisitPixels( self ): + + ts = GafferImage.ImagePlug.tileSize() + ramp = GafferImage.Ramp() + ramp["startPosition"].setValue( imath.V2f( 0.5 ) ) + + offset = GafferImage.Offset() + offset["in"].setInput( ramp["out"] ) + + # Test for a bunch of different images sizes + for width in [ 1, 6, ts + 6, 2 * ts + 6 ]: + for height in [ 1, 9, ts + 9, 2 * ts + 9 ]: + + # Set up a ramp where each pixel gets a unique value based on its index + ramp["format"].setValue( GafferImage.Format( width, height, 1.000 ) ) + rampScale = ( width * height - 1 ) / ( width * width + 1 ) + ramp["endPosition"].setValue( imath.V2f( 0.5 + 1 * rampScale, 0.5 + width * rampScale ) ) + ramp['ramp'].setValue( Gaffer.SplineDefinitionfColor4f( ( ( 0, imath.Color4f( 0 ) ), ( 1, imath.Color4f( width * height - 1 ) )), Gaffer.SplineDefinitionInterpolation.Linear ) ) + + center = imath.V2i( width // 2, height // 2 ) + + # Test for a bunch of different offsets, so we're hitting different ways the datawindow + # may be aligned to a tile boundary + for dataAlignment in [ imath.V2i(0), -center, imath.V2i( ts // 2 ) - center ]: + offset["offset"].setValue( dataAlignment ) + dataWindow = offset["out"].dataWindow() + + for bm in [ GafferImage.Sampler.BoundingMode.Black, GafferImage.Sampler.BoundingMode.Clamp ]: + + # Make one big sampler that can handle all our queries. Making this larger than the + # data window doesn't actually have an impact on the current implementation, but it + # means that our queries outside the datawindow are officially legal. + oversizedSampleWindow = imath.Box2i( dataWindow.min() - ts * 3, dataWindow.max() + ts * 3 ) + + sampler = GafferImage.Sampler( offset["out"], "R", oversizedSampleWindow, boundingMode = bm ) + + # Test possible ways for the sampling region to line up with the data window: corners, + # edges, center, or completely outside + for regionAlignment in [ imath.V2i( regionAlignmentX, regionAlignmentY ) + for regionAlignmentX in [ 0, width // 2, width - 1 ] + for regionAlignmentY in [ 0, height // 2, height - 1 ] + ] + [ imath.V2i( -ts ), dataWindow.max() + imath.V2i( ts ) ]: + + for regionSize in [ + imath.V2i( 1, 1 ), + imath.V2i( 5, 7 ), + imath.V2i( 7, 5 ), + imath.V2i( ts + 7, 1 ), + imath.V2i( 1, ts + 7 ), + imath.V2i( 2 * ts + 7, 1 ), + imath.V2i( 1, 2 * ts + 7 ), + imath.V2i( 2 * ts + 7, 2 ), + imath.V2i( 2, 2 * ts + 7 ), + imath.V2i( 2 * ts + 5, 2 * ts + 7 ), + ]: + + regionMin = dataAlignment + regionAlignment - regionSize / 2 + region = imath.Box2i( regionMin, regionMin + regionSize ) + + with self.subTest( dataWindow = dataWindow, region = region ): + GafferImageTest.validateVisitPixels( sampler, region ) + if __name__ == "__main__": unittest.main() From c53eed3c47f616819f7be5d14a957f7d638e2522 Mon Sep 17 00:00:00 2001 From: Daniel Dresser Date: Fri, 12 May 2023 17:35:31 -0700 Subject: [PATCH 14/20] GafferImage::Resample : Optimize using visitPixels --- src/GafferImage/Resample.cpp | 39 ++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/src/GafferImage/Resample.cpp b/src/GafferImage/Resample.cpp index 0cc398f2cea..2d82333db42 100644 --- a/src/GafferImage/Resample.cpp +++ b/src/GafferImage/Resample.cpp @@ -639,20 +639,20 @@ IECore::ConstFloatVectorDataPtr Resample::computeChannelData( const std::string iX = ( oP.x + 0.5 ) / ratio.x + offset.x; OIIO::floorfrac( iX, &iXI ); - int fX; // relative filter position float v = 0.0f; float totalW = 0.0f; - for( fX = -filterRadius.x; fX<= filterRadius.x; ++fX ) - { - const float w = *wIt++; - if( w == 0.0f ) + + sampler.visitPixels( Imath::Box2i( + Imath::V2i( iXI - filterRadius.x, oP.y ), + Imath::V2i( iXI + filterRadius.x + 1, oP.y + 1 ) + ), + [&wIt, &v, &totalW]( float cur, int x, int y ) { - continue; + const float w = *wIt++; + v += w * cur; + totalW += w; } - - v += w * sampler.sample( iXI + fX, oP.y ); - totalW += w; - } + ); if( totalW != 0.0f ) { @@ -683,21 +683,20 @@ IECore::ConstFloatVectorDataPtr Resample::computeChannelData( const std::string for( oP.x = tileBound.min.x; oP.x < tileBound.max.x; ++oP.x ) { - int fY; // relative filter position float v = 0.0f; float totalW = 0.0f; std::vector::const_iterator wIt = weights.begin() + ( oP.y - tileBound.min.y ) * ( filterRadius.y * 2 + 1); - for( fY = -filterRadius.y; fY<= filterRadius.y; ++fY ) - { - const float w = *wIt++; - if( w == 0.0f ) + sampler.visitPixels( Imath::Box2i( + Imath::V2i( oP.x, iYI - filterRadius.y ), + Imath::V2i( oP.x + 1, iYI + filterRadius.y + 1 ) + ), + [&wIt, &v, &totalW]( float cur, int x, int y ) { - continue; + const float w = *wIt++; + v += w * cur; + totalW += w; } - - v += w * sampler.sample( oP.x, iYI + fY ); - totalW += w; - } + ); if( totalW != 0.0f ) { From d4bbbaab1c7f2be8f9631d4e271d1f948f121c4a Mon Sep 17 00:00:00 2001 From: Daniel Dresser Date: Fri, 16 Jun 2023 19:16:16 -0700 Subject: [PATCH 15/20] SConstruct : Hack around compiler perfomance bug --- SConstruct | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/SConstruct b/SConstruct index 3c3cb4b4cac..27e834d8e38 100644 --- a/SConstruct +++ b/SConstruct @@ -491,6 +491,14 @@ if env["PLATFORM"] != "win32" : # Turn off the parts of `-Wextra` that we don't like. env.Append( CXXFLAGS = [ "-Wno-cast-function-type", "-Wno-unused-parameter" ] ) + # Set this weird compiler flag that in general is expected to cause compiled code to be about + # half a percent slower, but works around this ridiculous bug: + # https://gcc.gnu.org/bugzilla//show_bug.cgi?id=51041 + # It's a 10 year old bug that sometimes causes important inner loops to get 4X slower for no + # reason ( it affects use of visitPixels depending on exact register usage patterns at the call + # site ), and there appears to be no real solution ... maybe we should be moving away from GCC? + env.Append( CXXFLAGS = [ "-fira-region=all" ] ) + env.Append( CXXFLAGS = [ "-pthread" ], SHLINKFLAGS = [ "-pthread", "-Wl,--no-undefined" ], From 81b06b295ce70d69b2827c97fcf4fce4fb65884e Mon Sep 17 00:00:00 2001 From: Daniel Dresser Date: Wed, 17 May 2023 11:41:08 -0700 Subject: [PATCH 16/20] GafferImage::Resample : Optimize inseparable filters --- src/GafferImage/Resample.cpp | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/GafferImage/Resample.cpp b/src/GafferImage/Resample.cpp index 2d82333db42..ae114da3471 100644 --- a/src/GafferImage/Resample.cpp +++ b/src/GafferImage/Resample.cpp @@ -580,27 +580,24 @@ IECore::ConstFloatVectorDataPtr Resample::computeChannelData( const std::string // seperable case. Once that is done, we should probably also hoist the multiply by // filterCoordinateMult out of the loop. - V2i fP; // relative filter position float v = 0.0f; float totalW = 0.0f; - for( fP.y = -filterRadius.y; fP.y<= filterRadius.y; ++fP.y ) - { - for( fP.x = -filterRadius.x; fP.x<= filterRadius.x; ++fP.x ) + V2f filterOrigin = filterCoordinateMult.x * ( iPF - V2f( 0.5f ) ); + sampler.visitPixels( Imath::Box2i( + iPI - filterRadius, + iPI + filterRadius + Imath::V2i( 1 ) + ), + [&filter, &filterOrigin, &filterCoordinateMult, &iPI, &v, &totalW]( float cur, int x, int y ) { const float w = (*filter)( - filterCoordinateMult.x * (fP.x - ( iPF.x - 0.5f )), - filterCoordinateMult.y * (fP.y - ( iPF.y - 0.5f )) + filterOrigin.x + filterCoordinateMult.x * ( x - iPI.x ), + filterOrigin.y + filterCoordinateMult.y * ( y - iPI.y ) ); - if( w == 0.0f ) - { - continue; - } - - v += w * sampler.sample( iPI.x + fP.x, iPI.y + fP.y ); + v += w * cur; totalW += w; } - } + ); if( totalW != 0.0f ) { From 52d68f9854368fa37d0d001783b6e1e351f38969 Mon Sep 17 00:00:00 2001 From: Daniel Dresser Date: Fri, 12 May 2023 18:44:34 -0700 Subject: [PATCH 17/20] RankFilter : Optimize using visitPixels --- src/GafferImage/RankFilter.cpp | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/GafferImage/RankFilter.cpp b/src/GafferImage/RankFilter.cpp index 72b97544fd1..95aab22f643 100644 --- a/src/GafferImage/RankFilter.cpp +++ b/src/GafferImage/RankFilter.cpp @@ -237,15 +237,14 @@ void RankFilter::compute( Gaffer::ValuePlug *output, const Gaffer::Context *cont IECore::Canceller::check( context->canceller() ); // Fill array with all nearby samples - V2i o; vector::iterator pixelsIt = pixels.begin(); - for( o.y = -radius.y; o.y <= radius.y; ++o.y ) - { - for( o.x = -radius.x; o.x <= radius.x; ++o.x ) + sampler.visitPixels( + Imath::Box2i( p - radius, p + radius + Imath::V2i( 1 ) ), + [&pixelsIt] ( float v, int x, int y ) { - *pixelsIt++ = sampler.sample( p.x + o.x, p.y + o.y ); + *pixelsIt++ = v; } - } + ); switch( m_mode ) { @@ -269,6 +268,7 @@ void RankFilter::compute( Gaffer::ValuePlug *output, const Gaffer::Context *cont int closestMatch = INT_MAX; pixelsIt = pixels.begin(); + V2i o; for( o.y = -radius.y; o.y <= radius.y; ++o.y ) { for( o.x = -radius.x; o.x <= radius.x; ++o.x ) @@ -408,15 +408,14 @@ IECore::ConstFloatVectorDataPtr RankFilter::computeChannelData( const std::strin { IECore::Canceller::check( context->canceller() ); - V2i o; vector::iterator pixelsIt = pixels.begin(); - for( o.y = -radius.y; o.y <= radius.y; ++o.y ) - { - for( o.x = -radius.x; o.x <= radius.x; ++o.x ) + sampler.visitPixels( + Imath::Box2i( p - radius, p + radius + Imath::V2i( 1 ) ), + [&pixelsIt] ( float v, int x, int y ) { - *pixelsIt++ = sampler.sample( p.x + o.x, p.y + o.y ); + *pixelsIt++ = v; } - } + ); switch( m_mode ) { case MedianRank: From 885c075c6f281151ceb07a6ca1647ad27943f15e Mon Sep 17 00:00:00 2001 From: Daniel Dresser Date: Tue, 16 May 2023 16:26:59 -0700 Subject: [PATCH 18/20] Sampler : Optimize common case where we are within data tile Seems to give a 15% performance improvement in common cases --- include/GafferImage/Sampler.inl | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/include/GafferImage/Sampler.inl b/include/GafferImage/Sampler.inl index 30adb1a5721..f564163b305 100644 --- a/include/GafferImage/Sampler.inl +++ b/include/GafferImage/Sampler.inl @@ -82,6 +82,30 @@ inline float Sampler::sample( float x, float y ) int yi; float yf = OIIO::floorfrac( y - 0.5, &yi ); + constexpr int tileLowMask = ImagePlug::tileSize() - 1; + if( + ( xi & tileLowMask ) != tileLowMask && + ( yi & tileLowMask ) != tileLowMask && + xi >= m_dataWindow.min.x && xi < m_dataWindow.max.x - 1 && + yi >= m_dataWindow.min.y && yi < m_dataWindow.max.y - 1 + ) + { + const float *tileData; + int tilePixelIndex; + cachedData( Imath::V2i( xi, yi ), tileData, tilePixelIndex ); + return OIIO::bilerp( + tileData[tilePixelIndex], tileData[tilePixelIndex + 1], + tileData[tilePixelIndex + ImagePlug::tileSize()], tileData[tilePixelIndex + ImagePlug::tileSize() + 1], + xf, yf + ); + } + // Note that if we care about performance in the case of accessing outside the data window, we + // could add more special cases here. If boundingMode isn't clamped, this is trivial: check + // if fully outside bound, return 0.0 without even computing xf and yf. Clamped is trickier - + // above or below you need just xf, to the left or right you just need yf, and in the corners + // you need neither + + float x0y0 = sample( xi, yi ); float x1y0 = sample( xi + 1, yi ); float x0y1 = sample( xi, yi + 1 ); From 44f314c962a2c6aab1e8e0c75d785e24d91ede7d Mon Sep 17 00:00:00 2001 From: Daniel Dresser Date: Tue, 16 May 2023 18:30:44 -0700 Subject: [PATCH 19/20] FilterAlgo::sampleBox : Optimize using visitPixels This is a 70% performance improvement when using a VectorWarp with useDerivatives on to downsample an image. --- src/GafferImage/FilterAlgo.cpp | 37 +++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/src/GafferImage/FilterAlgo.cpp b/src/GafferImage/FilterAlgo.cpp index 0602e8f23e8..33be4318151 100644 --- a/src/GafferImage/FilterAlgo.cpp +++ b/src/GafferImage/FilterAlgo.cpp @@ -408,35 +408,40 @@ float GafferImage::FilterAlgo::sampleBox( Sampler &sampler, const V2f &p, float scratchMemory[i] = filter->xfilt( ( (pixelBounds.min.x + i) + 0.5f - p.x ) * xscale ); } + Imath::Box2i visitBounds = pixelBounds; for( int y = pixelBounds.min.y; y < pixelBounds.max.y; y++ ) { + visitBounds.min.y = y; + visitBounds.max.y = y + 1; float yFilterWeight = filter->yfilt( ( y + 0.5f - p.y ) * yscale ); - for( int x = pixelBounds.min.x; x < pixelBounds.max.x; x++ ) - { - float w = scratchMemory[ x - pixelBounds.min.x ] * yFilterWeight; + sampler.visitPixels( + visitBounds, + [ &totalW, &v, &p, &pixelBounds, &scratchMemory, &yFilterWeight ] ( float value, int x, int y ) + { + float w = scratchMemory[ x - pixelBounds.min.x ] * yFilterWeight; - // \todo : I can't think of any way to keep this around cleanly for testing, since - // it's right down in this inner loop, but replacing the filter with one value for - // pixels within the bounding box, and another value for pixels actually touched - // by the filter, is a good way to check that the bounding box is correct - //w = w != 0.0f ? 1.0f : 0.1f; + // \todo : I can't think of any way to keep this around cleanly for testing, since + // it's right down in this inner loop, but replacing the filter with one value for + // pixels within the bounding box, and another value for pixels actually touched + // by the filter, is a good way to check that the bounding box is correct + //w = w != 0.0f ? 1.0f : 0.1f; - totalW += w; - v += w * sampler.sample( x, y ); - } + totalW += w; + v += w * value; + } + ); } } else { - for( int y = pixelBounds.min.y; y < pixelBounds.max.y; y++ ) - { - for( int x = pixelBounds.min.x; x < pixelBounds.max.x; x++ ) + sampler.visitPixels( pixelBounds, + [ &totalW, &v, &p, &xscale, &yscale, &filter ] ( float value, int x, int y ) { float w = (*filter)( ( x + 0.5f - p.x ) * xscale, ( y + 0.5f - p.y ) * yscale ); totalW += w; - v += w * sampler.sample( x, y ); + v += w * value; } - } + ); } if( totalW != 0.0f ) From 8eec8bdff4f95bbd8ab5023f89a3a374b5a2ec42 Mon Sep 17 00:00:00 2001 From: Daniel Dresser Date: Mon, 19 Jun 2023 21:39:04 -0700 Subject: [PATCH 20/20] Prototype some scripts for graphing performance results in contrib --- contrib/scripts/graphPerformanceResults.py | 81 ++++++++++++++++++++++ contrib/scripts/runTestsForCommits.py | 46 ++++++++++++ 2 files changed, 127 insertions(+) create mode 100755 contrib/scripts/graphPerformanceResults.py create mode 100755 contrib/scripts/runTestsForCommits.py diff --git a/contrib/scripts/graphPerformanceResults.py b/contrib/scripts/graphPerformanceResults.py new file mode 100755 index 00000000000..a9ac5a5df4f --- /dev/null +++ b/contrib/scripts/graphPerformanceResults.py @@ -0,0 +1,81 @@ +#! /usr/bin/env python + +import argparse +import inspect +import subprocess +import os +import json +import matplotlib.pyplot as plt +import matplotlib.ticker + +parser = argparse.ArgumentParser( + description = inspect.cleandoc( + """ + Graph the performance results for a folder of json files. + """ ) +) + +parser.add_argument( + 'jsonFolder', + help='Folder containing json files' +) + +args = parser.parse_args() + +jsonFolder = vars( args )["jsonFolder"] + +jsonFileNames = [ i for i in os.listdir( jsonFolder ) if i.endswith( ".json" ) ] + +results = [] +for n in jsonFileNames: + f = open( "%s/%s" % ( jsonFolder, n ) ) + d = json.load( f ) + f.close() + + r = {} + for key, value in d.items(): + if "timings" in value: + r[ key.split()[1].split('.')[-1][:-1] + "." + key.split()[0]] = ( min( value["timings"] ), max( value["timings"] ) ) + + resultName = n[:-5] + resultOrder = resultName + + try: + resultOrder = int( subprocess.check_output( [ "git", "rev-list", "--count", resultName ] ) ) + resultName = subprocess.check_output( [ "git", "log", "--format=%B", "-n 1", resultName ] ).splitlines()[0].decode( "UTF-8" ) + except: + pass + + results.append( [resultOrder, resultName, r] ) + +results.sort() +results = [r[1:] for r in results ] + +ax = plt.subplot(111) + +curveNames = results[0][1].keys() +curveNames = sorted( curveNames, key = lambda k : -results[-1][1][k][0] ) +for k in curveNames: + ax.plot( [ j[1][k][0] for j in results ], label = k ) + # Use this line instead if you want to see variability + #ax.fill_between( range( len( results ) ), [ j[1][k][0] for j in results ], [ j[1][k][1] for j in results ], label = k ) + +plt.yscale( "log", base = 2 ) + +# Force labelling of every power of two +plt.yticks( [2**i for i in range( -4, 4 )] ) + +ax.xaxis.set_major_formatter( matplotlib.ticker.FuncFormatter( lambda i, pos : results[int(i)][0] ) ) +plt.xticks( rotation = 45, ha = "right" ) + +box = ax.get_position() + +# Create margin +ax.set_position([box.x0, box.y0 + box.height * 0.2, box.width * 0.8, box.height * 0.8]) + +# Put a legend to the right of the current axis +ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) + +# Currently hacking things up to work with very old matplotlib +#plt.show_all() +plt.show() diff --git a/contrib/scripts/runTestsForCommits.py b/contrib/scripts/runTestsForCommits.py new file mode 100755 index 00000000000..b435e58f8e0 --- /dev/null +++ b/contrib/scripts/runTestsForCommits.py @@ -0,0 +1,46 @@ +#! /usr/bin/env python + +import argparse +import inspect +import subprocess +import os + +parser = argparse.ArgumentParser( + description = inspect.cleandoc( + """ + Run a selection of Gaffer tests for a specified set of commits, + outputting a separate json file for the tests for each commit. + """ ) +) + +parser.add_argument( + '--commits', action='store', nargs='+', + help='Hashes of commits to build' +) + +parser.add_argument( + '--tests', action='store', nargs='+', + help='Names of tests to run' +) + +parser.add_argument( + '--outputFolder', action='store', required = True, + help='Folder to put output json files to' +) + +args = parser.parse_args() + +outputFolder = vars( args )["outputFolder"] +try: + os.makedirs( outputFolder ) +except: + pass + +currentBranch = subprocess.check_output( [ "git", "stat", "-s", "-b" ] ).splitlines()[0].split()[1] +print( currentBranch ) +for c in vars( args )["commits"]: + subprocess.check_call( [ "git", "checkout", c ] ) + subprocess.check_call( [ "scons", "-j 16", "build" ] ) + subprocess.check_call( [ "gaffer", "test" ] + vars( args )["tests"] + [ "-outputFile", "%s/%s.json" % ( outputFolder, c ) ] ) + +subprocess.check_call( [ "git", "checkout", currentBranch ] )