diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 000000000..f1062df19 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,29 @@ +name: Generate Documentation + +on: + push: + branches: [ main ] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v1 + - name: Generate Documentation + uses: karwa/swift-doc@linux-fixes + with: + inputs: "Sources/WebURL" + module-name: WebURL + output: "Documentation" + base-url: "/swift-url/" + format: html + excluded-symbols: docs-excluded-symbols + - name: Fix permissions + run: 'sudo chown --recursive $USER Documentation' + - name: Publish documentation to GitHub Pages + uses: JamesIves/github-pages-deploy-action@4.1.1 + with: + branch: gh-pages + folder: Documentation + single-commit: true \ No newline at end of file diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml new file mode 100644 index 000000000..3be661cc2 --- /dev/null +++ b/.github/workflows/linux.yml @@ -0,0 +1,37 @@ +name: Swift package tests (Linux) + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build: + + runs-on: ubuntu-20.04 + + steps: + - uses: actions/checkout@v2 + - name: Download Swift 5.3.3 + run: wget -q https://swift.org/builds/swift-5.3.3-release/ubuntu2004/swift-5.3.3-RELEASE/swift-5.3.3-RELEASE-ubuntu20.04.tar.gz + - name: Extract Swift 5.3.3 + run: tar xzf swift-5.3.3-RELEASE-ubuntu20.04.tar.gz + - name: Add Swift toolchain to PATH + run: | + echo "$GITHUB_WORKSPACE/swift-5.3.3-RELEASE-ubuntu20.04/usr/bin" >> $GITHUB_PATH + - name: Build + run: swift build -v + - name: Run tests + run: SWIFT_URL_REPORT_PATH=/tmp/swift-url-report/ swift test --enable-test-discovery -v + - name: Build benchmarks + run: | + cd Benchmarks + swift build -v + - name: Upload report files + uses: actions/upload-artifact@v2 + if: always() + with: + name: test-reports + path: /tmp/swift-url-report/* + if-no-files-found: warn \ No newline at end of file diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml new file mode 100644 index 000000000..4a7041598 --- /dev/null +++ b/.github/workflows/macos.yml @@ -0,0 +1,30 @@ +name: Swift package tests (macOS) + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build: + + runs-on: macos-latest + + steps: + - uses: actions/checkout@v2 + - name: Build + run: swift build -v + - name: Run tests + run: SWIFT_URL_REPORT_PATH=/private/var/tmp/swift-url-report/ swift test -v + - name: Build benchmarks + run: | + cd Benchmarks + swift build -v + - name: Upload report files + uses: actions/upload-artifact@v2 + if: always() + with: + name: test-reports + path: /private/var/tmp/swift-url-report/* + if-no-files-found: warn \ No newline at end of file diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml new file mode 100644 index 000000000..86e07e3c1 --- /dev/null +++ b/.github/workflows/windows.yml @@ -0,0 +1,48 @@ +name: Swift package tests (Windows) + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build: + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + - uses: seanmiddleditch/gha-setup-vsdevenv@master + + - name: Install Swift DEVELOPMENT-SNAPSHOT-2021-03-25 + run: | + Install-Binary -Url "https://swift.org/builds/development/windows10/swift-DEVELOPMENT-SNAPSHOT-03-25-a/swift-DEVELOPMENT-SNAPSHOT-03-25-a-windows10.exe" -Name "installer.exe" -ArgumentList ("-q") + - name: Set Environment Variables + run: | + echo "SDKROOT=C:\Library\Developer\Platforms\Windows.platform\Developer\SDKs\Windows.sdk" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append + echo "DEVELOPER_DIR=C:\Library\Developer" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append + - name: Adjust Paths + run: | + echo "C:\Library\Developer\Toolchains\unknown-Asserts-development.xctoolchain\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + echo "C:\Library\Swift-development\bin;C:\Library\icu-67\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + - name: Install Supporting Files + run: | + Copy-Item "$env:SDKROOT\usr\share\ucrt.modulemap" -destination "$env:UniversalCRTSdkDir\Include\$env:UCRTVersion\ucrt\module.modulemap" + Copy-Item "$env:SDKROOT\usr\share\visualc.modulemap" -destination "$env:VCToolsInstallDir\include\module.modulemap" + Copy-Item "$env:SDKROOT\usr\share\visualc.apinotes" -destination "$env:VCToolsInstallDir\include\visualc.apinotes" + Copy-Item "$env:SDKROOT\usr\share\winsdk.modulemap" -destination "$env:UniversalCRTSdkDir\Include\$env:UCRTVersion\um\module.modulemap" + - name: Check installation + run: swift --version + + - name: Build + run: swift build -v + - name: Set test report destination + run: echo "SWIFT_URL_REPORT_PATH=C:\tmp\swift-url-report" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append + - name: Run tests + run: swift test --enable-test-discovery -v + - name: Upload report files + uses: actions/upload-artifact@v2 + if: always() + with: + name: test-reports + path: C:\tmp\swift-url-report\* + if-no-files-found: warn diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..4c741c059 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +.DS_Store +/.build +/.swiftpm +/Packages +/*.xcodeproj +xcuserdata/ +Package.resolved \ No newline at end of file diff --git a/.swift-format b/.swift-format new file mode 100644 index 000000000..a7d363ae6 --- /dev/null +++ b/.swift-format @@ -0,0 +1,54 @@ +{ + "lineLength" : 120, + "maximumBlankLines" : 2, + + "blankLineBetweenMembers" : { + "ignoreSingleLineProperties" : true + }, + "indentation" : { + "spaces" : 2 + }, + "indentConditionalCompilationBlocks" : true, + "lineBreakBeforeControlFlowKeywords" : false, + "lineBreakBeforeEachArgument" : false, + "respectsExistingLineBreaks" : true, + "rules" : { + "AllPublicDeclarationsHaveDocumentation" : true, + "AlwaysUseLowerCamelCase" : true, + "AmbiguousTrailingClosureOverload" : true, + "BeginDocumentationCommentWithOneLineSummary" : true, + "BlankLineBetweenMembers" : true, + "CaseIndentLevelEqualsSwitch" : true, + "DoNotUseSemicolons" : true, + "DontRepeatTypeInStaticProperties" : true, + "FullyIndirectEnum" : true, + "GroupNumericLiterals" : true, + "IdentifiersMustBeASCII" : true, + "MultiLineTrailingCommas" : true, + "NeverForceUnwrap" : true, + "NeverUseForceTry" : true, + "NeverUseImplicitlyUnwrappedOptionals" : true, + "NoAccessLevelOnExtensionDeclaration" : true, + "NoBlockComments" : true, + "NoCasesWithOnlyFallthrough" : true, + "NoEmptyTrailingClosureParentheses" : true, + "NoLabelsInCasePatterns" : true, + "NoLeadingUnderscores" : true, + "NoParensAroundConditions" : true, + "NoVoidReturnOnFunctionSignature" : true, + "OneCasePerLine" : true, + "OneVariableDeclarationPerLine" : true, + "OnlyOneTrailingClosureArgument" : true, + "OrderedImports" : true, + "ReturnVoidInsteadOfEmptyTuple" : true, + "UseEnumForNamespacing" : true, + "UseLetInEveryBoundCaseVariable" : true, + "UseShorthandTypeNames" : true, + "UseSingleLinePropertyGetter" : true, + "UseSynthesizedInitializer" : true, + "UseTripleSlashForDocumentationComments" : true, + "ValidateDocumentationComments" : true + }, + "tabWidth" : 8, + "version" : 1 +} diff --git a/Benchmarks/.gitignore b/Benchmarks/.gitignore new file mode 100644 index 000000000..4c741c059 --- /dev/null +++ b/Benchmarks/.gitignore @@ -0,0 +1,7 @@ +.DS_Store +/.build +/.swiftpm +/Packages +/*.xcodeproj +xcuserdata/ +Package.resolved \ No newline at end of file diff --git a/Benchmarks/Package.swift b/Benchmarks/Package.swift new file mode 100644 index 000000000..f4f6e761e --- /dev/null +++ b/Benchmarks/Package.swift @@ -0,0 +1,38 @@ +// swift-tools-version:5.3 +// The swift-tools-version declares the minimum version of Swift required to build this package. + +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import PackageDescription + +let package = Package( + name: "swift-url-benchmark", + products: [ + .executable(name: "WebURLBenchmark", targets: ["WebURLBenchmark"]) + ], + dependencies: [ + .package(name: "Benchmark", url: "https://github.com/google/swift-benchmark", from: "0.1.0"), + .package(name: "swift-url", path: ".."), + ], + targets: [ + .target( + name: "WebURLBenchmark", + dependencies: [ + .product(name: "WebURL", package: "swift-url"), + .product(name: "Benchmark", package: "Benchmark") + ] + ) + ] +) diff --git a/Benchmarks/Sources/WebURLBenchmark/Constructor+SpecialNonFile.swift b/Benchmarks/Sources/WebURLBenchmark/Constructor+SpecialNonFile.swift new file mode 100644 index 000000000..799712aed --- /dev/null +++ b/Benchmarks/Sources/WebURLBenchmark/Constructor+SpecialNonFile.swift @@ -0,0 +1,320 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Benchmark +import WebURL + +/// Benchmarks the `WebURL.init(String)` constructor for URLs with special schemes (except file). +/// The same performance trends should apply across http, https, ftp, ws, wss, schemes. +/// +let constructor_specialNonFile = BenchmarkSuite(name: "Constructor.SpecialNonFile") { suite in + + // Simple http(s) URLs which have the same basic structure: + // + // - A couple of path components of varying lengths, no '.' or '..' components. + // - A query parameter with a couple of key-value pairs. + // - Nothing needs percent-encoding, path does not need simplifying. + // - Less than 255 characters. + // - Essentially, the average URL you might find on a webpage like reddit or Wikipedia. + + let average_strings = [ + #"http://example.com/foo/bar/baz?a=b&c=d&e=f"#, + #"http://foobar.net/bar?baz=qux&search=nothing#top"#, + #"http://localhost/one/two?coffee"#, + #"http://127.0.0.1:8080/one/two?coffee"#, + #"http://[::1]:8080/one/two?coffee"#, + + #"https://www.reddit.com/r/mildlyinteresting/comments/lwhnig/locals_in_puerto_rico_painted_this_mural_they/gphk84q?utm_source=share&utm_medium=web2x&context=3"#, + #"https://www.reddit.com/r/mildlyinteresting/comments/lvbc3u/i_found_a_mushroom_that_looks_like_a_fried_egg/gpc49is?utm_source=share&utm_medium=web2x&context=3"#, + #"https://www.reddit.com/r/mildlyinteresting/comments/lwm6zn/my_friend_drunkenly_bought_sunglasses_for_their/gpiaigr?utm_source=share&utm_medium=web2x&context=3"#, + #"https://www.reddit.com/r/mildlyinteresting/comments/lwtlsi/this_tree_that_grew_into_an_old_gate/gpj3g0e?utm_source=share&utm_medium=web2x&context=3"#, + #"https://www.reddit.com/r/mildlyinteresting/comments/lwpcvh/this_redfleshed_apple/gpinqsj?utm_source=share&utm_medium=web2x&context=3"#, + #"https://www.reddit.com/r/mildlyinteresting/comments/lrct3m/this_mini_evolution_i_saw_in_london/gokxoqv?utm_source=share&utm_medium=web2x&context=3"#, + #"https://www.reddit.com/r/mildlyinteresting/comments/lw2um3/this_rock_that_looks_like_a_strawberry/gpf7sfb?utm_source=share&utm_medium=web2x&context=3"#, + #"https://www.reddit.com/r/mildlyinteresting/comments/lwcdhf/4_layers_of_flooring_in_this_house_im_remodeling/gpglqx9?utm_source=share&utm_medium=web2x&context=3"#, + #"https://www.reddit.com/r/mildlyinteresting/comments/lwo5qh/terracotta_piggy_from_poliochni_greece_23002500_bc/gpig723?utm_source=share&utm_medium=web2x&context=3"#, + #"https://www.reddit.com/r/mildlyinteresting/comments/lw8b67/this_set_of_stair_cases_that_you_cant_access_one/gpftyox?utm_source=share&utm_medium=web2x&context=3"#, + #"https://www.reddit.com/r/mildlyinteresting/comments/lwhcrk/shhh_hes_sleeping/gphd357?utm_source=share&utm_medium=web2x&context=3"#, + #"https://www.reddit.com/r/mildlyinteresting/comments/lvns4s/this_imported_salmon_so_tightly_wrapped_in/gpcw2uu?utm_source=share&utm_medium=web2x&context=3"#, + ] + suite.benchmark("AverageURLs") { + for string in average_strings { + blackHole(WebURL(string)) + } + } + + // As above, with a few tabs and newlines thrown in. + + let average_filter_strings = [ + // Each have 3 tabs + 1 newline. + "htt\tp://exa\nmpl\te.com/foo/bar/\tbaz?a=b&c=d&e=f", + "http\t://fooba\nr.net\t/bar?baz=q\tux&search=nothing#top", + "http:/\t/localho\ns\tt/one/two?co\tffee", + "http://12\t7.0.0.1\n:80\t80/one/two\t?coffee", + "http://[::\t1]:80\n80\t/one/two?coff\tee", + // Each have 5 tabs + 3 newlines. + "ht\ttps://ww\tw.reddit.com/r/mildlyinteresting/com\tments/lwhnig\n/lo\tcals_in_puerto_rico_painted_this_mural_they/gphk84q\n?utm_source\t=share&utm_medium\n=web2x&context=3", + "https\t\t://ww\nw.reddit.com/r\t/mildlyinteresting/com\nments/lvbc3u/i_found_a_mushroom_that_looks_like_a_fried_egg/gpc49is?\tutm_source=share&utm_medium\t=web2x&cont\next=3", + "http\ns://www.r\teddit.com/r/mildlyinte\nresting\t/comments/lwm6zn/my_friend_drunkenly_bought_sunglasses_\tfor_their/gpiaigr?\tutm_source=share&\tutm_medium\n=web2x&context=3", + "https:/\t/www\n.reddit.com/r/mil\tdlyinterestin\tg/\ncomments/lwtlsi/this_tree_that_grew_into_an_old_gate\t/gpj3g0e?utm_source=share&utm_medium=web2x&\ncon\ttext=3", + "https\t://www.reddit.com/r/mildlyinterest\ting/comments\t/lwpcvh/t\nhis_redfleshed_apple\t/gpinqsj?utm_sou\trce=share&utm_medium\n\n=web2x&context=3", + "\nhtt\tp\ts:/\t/www.reddit.com/r/mildlyinteresting/com\tments/lrct3m/\tthis_mini_evolution_i_saw_in_london/gokxoqv?utm_source\n=share&utm_medium=web2x&context=3\n", + "htt\nps://\twww.reddit.com/r/mi\tldlyintere\nsting/\tcomments/lw2um3/this_rock_that_looks_like_a_strawberry\t\n/gpf7sfb?utm_source=share&utm_medium=we\tb2x&context=3", + "https:/\t/www\n.reddit.com/r/mildlyinterestin\tg/\tcomments/\tlwcdhf/4_layers_of_flooring_in_this_house_im_remodelin\ng/gpglqx9?utm_source=share&\nutm_medium=web2x&\tcontext=3", + "https:\n//w\tww.reddit.com/r/mildlyint\teres\nting/\tcomments/lwo5qh/terracotta_piggy_from_poliochni_greece_23002500_bc\t/gpig723?utm_source\t=share&utm_medium=web\n2x&context=3", + "\t\t\t\nhttps://www.reddit.com/r/mildlyinteresting/comments/lw8b67/this_set_of_stair_cases_that_you_cant_access_one/gpftyox?utm_source=share&utm_medium=web2x&context=3\t\t\n\n", + "https://ww\nw.redd\tit.c\nom/r/\tmildlyinteresting/comment\ts/lwhcrk/shhh_hes_sleeping\t/gphd357?utm_source=\tshare&utm_medium=web2x&context=\n3", + "http\ts://ww\n\tw.reddit.com/r/mildlyinteres\tti\nng/comments/lvns4s\t/this_imported_salmon_so_tightly_wrapped_in/gpcw2uu?\tutm_source=share&utm_medium=web2x&\ncontext=3", + ] + suite.benchmark("AverageURLs filtered") { + for string in average_filter_strings { + blackHole(WebURL(string)) + } + } + + // An HTTP URL with an IPv4 address. + + let ipv4_strings = [ + #"http://0xbadf00d/"#, + #"http://127.0.0.1/"#, + #"http://10.9.9.8/"#, + #"http://217.234.090/"#, + #"http://0xbe.0xfc9409"#, + #"http://0xc239994e"#, + #"http://0346.0212.0x2e.0242"#, + #"http://0323.0xf3.0x37.0x1f"#, + #"http://773488775"#, + #"http://0xe1.0245.237.217"#, + #"http://0123.0x70646e"#, + + #"http://0437125212"#, + #"http://032.2148585"#, + #"http://031032371445"#, + #"http://0x48d25db9"#, + #"http://0377.5601714"#, + #"http://0171.0250.153.57"#, + #"http://86.0217.0x7dea"#, + #"http://0xd0.0111.230.04"#, + #"http://0xde.0x3e.0111.0xba"#, + #"http://155.0xc8.54099"#, + + #"http://0x7d.0x86b0be"#, + #"http://034.232.0260.0x4f"#, + #"http://0x38.0351.0301.180"#, + #"http://0115.102.0x34e"#, + #"http://250.0x158115"#, + #"http://0x34.0304.072342"#, + #"http://10.0x28.0376.0x10"#, + #"http://012215540245"#, + #"http://0xe8.12776487"#, + #"http://0120.163.15898"#, + #"http://052.0xaa.0113352"#, + ] + suite.benchmark("IPv4 host") { + for string in ipv4_strings { + blackHole(WebURL(string)) + } + } + + // As above, with a few tabs and newlines thrown in. + + let ipv4_filter_strings = [ + "http://0\nxba\t\tdf0\t0d\n/", + "http://1\n27.\t\t0.0\t.1\n/", + "http://1\n0.9\t\t.9.\t8/\n", + "http://2\n17.\t\t234\t.0\n90/", + "http://0\nxbe\t\t.0x\tfc\n9409", + "http://0\nxc2\t\t399\t94\ne", + "http://0\n346\t\t.02\t12\n.0x2e.0242", + "http://0\n323\t\t.0x\tf3\n.0x37.0x1f", + "http://7\n734\t\t887\t75\n", + "http://0\nxe1\t\t.02\t45\n.237.217", + "http://0\n123\t\t.0x\t70\n646e", + + "http://04\n371\t\n2521\t2", + "http://03\n2.2\t\n1485\t85", + "http://03\n103\t\n2371\t445", + "http://0x\n48d\t\n25db\t9", + "http://03\n77.\t\n5601\t714", + "http://01\n71.\t\n0250\t.153.57", + "http://86\n.02\t\n17.0\tx7dea", + "http://0x\nd0.\t\n0111\t.230.04", + "http://0x\nde.\t\n0x3e\t.0111.0xba", + "http://15\n5.0\t\nxc8.\t54099", + + "http://\n0x\t7d\t.0x8\n6b0\nbe", + "http://\n03\t4.\t232.\n026\n0.0x4f", + "http://\n0x\t38\t.035\n1.0\n301.180", + "http://\n01\t15\t.102\n.0x\n34e", + "http://\n25\t0.\t0x15\n811\n5", + "http://\n0x\t34\t.030\n4.0\n72342", + "http://\n10\t.0\tx28.\n037\n6.0x10", + "http://\n01\t22\t1554\n024\n5", + "http://\n0x\te8\t.127\n764\n87", + "http://\n01\t20\t.163\n.15\n898", + "http://\n05\t2.\t0xaa\n.01\n13352", + ] + suite.benchmark("IPv4 host filtered") { + for string in ipv4_filter_strings { + blackHole(WebURL(string)) + } + } + + // An HTTP URL with an IPv6 address. + + let ipv6_strings = [ + #"http://[7225:7eb:d838:cc21:c3a4:dba8:1fad:1f46]"#, + #"http://[0:0:0:0:0:0:78b9:301c]"#, + #"http://[::21.37.66.27]"#, + #"http://[0:0:0:0:0:0:355a:62a8]"#, + #"http://[d979:0:0:0:0:0:0:0]"#, + #"http://[::48.79.54.144]"#, + #"http://[ed8d:4670:6d0a:ee7f:78b:eb09:904d:b44]"#, + #"http://[5a3c:bd64::1bcf:d69f:4b8]"#, + #"http://[0:0:0:0:0:0:dfa7:5ce3]"#, + #"http://[::75.33.222.220]"#, + + #"http://[::155.147.186.251]"#, + #"http://[::48.161.242.105]"#, + #"http://[0:0:0:0:0:0:a523:d264]"#, + #"http://[b6ea:cd3e:ca43:6fe3:aceb::]"#, + #"http://[::476c:c763]"#, + #"http://[977:2aa0:6bf5:1507:77ba:dfe1:2976:77ca]"#, + #"http://[::167.126.187.247]"#, + #"http://[::53.197.134.182]"#, + #"http://[6880:1845:26e0:6df1:f7e6:9e4b:7b7:7bc4]"#, + #"http://[1f09:bebc:131f:3de7:8bfb:3192:9f6a:fc64]"#, + #"http://[::9bc8:da85]"#, + + #"http://[3ba8:7206:a9ab:83b1:e38e:7bc5:e83d:af51]"#, + #"http://[f821:b719:3fc6:5bd1:b000:d00c:1edb:75e8]"#, + #"http://[93fc:aedd:a15:50fb:dc62::]"#, + #"http://[3285:c199:3e58:6c80:d1:70be:f65a:19fd]"#, + #"http://[b631:b446:5572:4548:f13d:979e:18a4:34b5]"#, + #"http://[0:0:0:0:0:0:a2ba:91e0]"#, + #"http://[cd58:be56:ede0:d2c3:2d5:0:0:7712]"#, + #"http://[::15.185.11.7]"#, + #"http://[472b:2877:0:0:0:0:236e:e76b]"#, + #"http://[::8462:4e04]"#, + #"http://[985e:e239:3599:6ad8:0:0:1326:b995]"#, + ] + suite.benchmark("IPv6 host") { + for string in ipv6_strings { + blackHole(WebURL(string)) + } + } + + // As above, with a few tabs and newlines thrown in. + + let ipv6_filter_strings = [ + "ht\ntp://[\t7225\t:7eb\n:\td838:cc21:c3a4:dba8:1fad:1f46]", + "ht\ntp://[\t0:0:\t0:0:\n0\t:0:78b9:301c]", + "ht\ntp://[\t::21\t.37.\n6\t6.27]", + "ht\ntp://[\t0:0:\t0:0:\n0\t:0:355a:62a8]", + "ht\ntp://[\td979\t:0:0\n:\t0:0:0:0:0]", + "ht\ntp://[\t::48\t.79.\n5\t4.144]", + "ht\ntp://[\ted8d\t:467\n0\t:6d0a:ee7f:78b:eb09:904d:b44]", + "ht\ntp://[\t5a3c\t:bd6\n4\t::1bcf:d69f:4b8]", + "ht\ntp://[\t0:0:\t0:0:\n0\t:0:dfa7:5ce3]", + "ht\ntp://[\t::75\t.33.\n2\t22.220]", + + "http:/\t/\t[:\n:155\n.147.186.251]", + "http:/\t/\t[:\n:48.\n161.242.105]", + "http:/\t/\t[0\n:0:0\n:0:0:0:a523:d264]", + "http:/\t/\t[b\n6ea:\ncd3e:ca43:6fe3:aceb::]", + "http:/\t/\t[:\n:476\nc:c763]", + "http:/\t/\t[9\n77:2\naa0:6bf5:1507:77ba:dfe1:2976:77ca]", + "http:/\t/\t[:\n:167\n.126.187.247]", + "http:/\t/\t[:\n:53.\n197.134.182]", + "http:/\t/\t[6\n880:\n1845:26e0:6df1:f7e6:9e4b:7b7:7bc4]", + "http:/\t/\t[1\nf09:\nbebc:131f:3de7:8bfb:3192:9f6a:fc64]", + "http:/\t/\t[:\n:9bc\n8:da85]", + + "http://[3\tba\n\n8:72\n\t06:a9ab:83b1:e38e:7bc5:e83d:af51]", + "http://[f\t82\n\n1:b7\n\t19:3fc6:5bd1:b000:d00c:1edb:75e8]", + "http://[9\t3f\n\nc:ae\n\tdd:a15:50fb:dc62::]", + "http://[3\t28\n\n5:c1\n\t99:3e58:6c80:d1:70be:f65a:19fd]", + "http://[b\t63\n\n1:b4\n\t46:5572:4548:f13d:979e:18a4:34b5]", + "http://[0\t:0\n\n:0:0\n\t:0:0:a2ba:91e0]", + "http://[c\td5\n\n8:be\n\t56:ede0:d2c3:2d5:0:0:7712]", + "http://[:\t:1\n\n5.18\n\t5.11.7]", + "http://[4\t72\n\nb:28\n\t77:0:0:0:0:236e:e76b]", + "http://[:\t:8\n\n462:\n\t4e04]", + "http://[9\t85\n\ne:e2\n\t39:3599:6ad8:0:0:1326:b995]", + ] + suite.benchmark("IPv6 host filtered") { + for string in ipv6_filter_strings { + blackHole(WebURL(string)) + } + } + + // Components requiring percent-encoding. + + let percent_encoding_strings = [ + #"http://example.com/🦆/🦆/goose/🦆/🦆/goose/🦆/🦆/goose/🦆/🦆/goose/🦆/🦆/goose/🦆/🦆/goose/🦆/🦆/goose/🦆/🦆/goose/🦆/🦆/goose/🦆/🦆/goose/"#, + #"http://example.com/🦆/🦆/../../🦆/🦆/../../🦆/🦆/../../🦆/🦆/../../🦆/🦆/../../🦆/🦆/../../🦆/🦆/../../🦆/🦆/../../🦆/🦆/../../🦆/🦆/../../"#, + #"http://example.com?🦆=1️⃣&🐶=2️⃣&🦁=3️⃣&now=break&🐧=4️⃣&🦕=5️⃣&🦆=1️⃣&🐶=2️⃣&🦁=3️⃣&now=break&🐧=4️⃣&🦕=5️⃣&🦆=1️⃣&🐶=2️⃣&🦁=3️⃣&now=break&🐧=4️⃣&🦕=5️⃣&🛑"#, + #"http://example.com#🦆=1️⃣&🐶=2️⃣&🦁=3️⃣&now=break&🐧=4️⃣&🦕=5️⃣&🦆=1️⃣&🐶=2️⃣&🦁=3️⃣&now=break&🐧=4️⃣&🦕=5️⃣&🦆=1️⃣&🐶=2️⃣&🦁=3️⃣&now=break&🐧=4️⃣&🦕=5️⃣&🛑"#, + ] + suite.benchmark("Percent-encoding components") { + for string in percent_encoding_strings { + blackHole(WebURL(string)) + } + } + + // Hostnames requiring percent-decoding. + + let percent_encoded_hostname_strings = [ + #"http://ex%61mple.com"#, + #"http://loc%61lhost"#, + #"http://%74%68%69%73%69%73%61%76%65%72%79%6C%6F%6E%67%65%6E%63%6F%64%65%64%68%6F%73%74%6E%61%6D%65%61%63%74%75%61%6C%6C%79%74%6F%6F%6C%6F%6E%67%74%6F%72%65%61%6C%6C%79%62%65%75%73%61%62%6C%65%62%75%74%77%68%61%74%65%76%65%72%77%65%73%74%69%6C%6C%6E%65%65%64%74%6F%64%65%63%6F%64%65%69%74%2E%63%6F%6D"#, + // Percent-encoded IPv4 addresses. + #"http://%31%30%2E%30%2E%30%2E%31"#, + #"http://%30%78%62%61%64%66%30%30%64"#, + #"http://%30%78%64%30%2E%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%31%31%31%2E%32%33%30%2E%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%30%34"#, + ] + suite.benchmark("Percent-encoded hostnames") { + for string in percent_encoded_hostname_strings { + blackHole(WebURL(string)) + } + } + + // HTTP URLs with very long paths. + + suite.benchmark("Long paths") { + // Small (<255 chars). + blackHole(WebURL(#"http://example.com/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/"#)) + blackHole(WebURL(#"http://example.com////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////"#)) + blackHole(WebURL(#"http://example.com//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//"#)) + // Large. + blackHole(WebURL(#"http://example.com/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/"#)) + blackHole(WebURL(#"http://example.com/////////////////////////////////////////////////////////x/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////x/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////x/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////x/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////x/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////x/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////x/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////x/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////x/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////x//////////////////////////////"#)) + blackHole(WebURL(#"http://example.com//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//"#)) + } + + suite.benchmark("Complex paths 1") { + blackHole(WebURL(#"http://example.com/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../"#)) + + } + suite.benchmark("Complex paths 2") { + blackHole(WebURL(#"http://example.com//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../..../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../..//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//a//b//c//d//e//f//g//h//i//j//k//l//m//n//o//p//q//r//s//t//u//v//w//x//y//z//../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../../"#)) + } + + suite.benchmark("Long query 1") { + blackHole(WebURL(#"http://chart.apis.google.com/chart?chs=500x500&chma=0,0,100,100&cht=p&chco=FF0000%2CFFFF00%7CFF8000%2C00FF00%7C00FF00%2C0000FF&chd=t%3A122%2C42%2C17%2C10%2C8%2C7%2C7%2C7%2C7%2C6%2C6%2C6%2C6%2C5%2C5&chl=122%7C42%7C17%7C10%7C8%7C7%7C7%7C7%7C7%7C6%7C6%7C6%7C6%7C5%7C5&chdl=android%7Cjava%7Cstack-trace%7Cbroadcastreceiver%7Candroid-ndk%7Cuser-agent%7Candroid-webview%7Cwebview%7Cbackground%7Cmultithreading%7Candroid-source%7Csms%7Cadb%7Csollections%7Cactivity|Chart"#)) + } + suite.benchmark("Long query 2") { + blackHole(WebURL(#"https://opentimestamps.org/info.html?ots=004f70656e54696d657374616d7073000050726f6f6600bf89e2e884e892940108b1674191a88ec5cdd733e4240a81803105dc412d6c6708d53ab94fc248f4f553f0103af65c768ff047f5459d5b00be93ca2308fff01055ec009d3160b3b47df85addd4a4be4f08f02078db964aa8198a7b11c4c8f6cc40b66a8f7a11be3ecc762c10166a8dad6c07a208f10459f6e401f008ea8c7f8cade256c5ff0083dfe30d2ef90c8e2c2b68747470733a2f2f626f622e6274632e63616c656e6461722e6f70656e74696d657374616d70732e6f726708f0206dc515c8e66c1f185e5618bbc613cd0d8e9bea75ac3a3da047c53be2c6e1f1c608f020c712209464c9e1ed3a4b504750e7283d5a1be1f407ac496a391f2bcc969b580808f120d9d4e70f4f7bec078c3dda08272eace468fbb81e75fbc7ae1e20bc7e8db1425908f020a6669aa9fb0dc6155bb709913b6bae1bf1109c2251a9b6aa4fd848098b05210d08f120d8341043f84b2899e4606f4303aec21127340387c760df2e3b1088546969e0ae08f120e1638308abd4f0b2c844abc80afa2ba15681b566a9ef764c318d671b5630257308f0205323029f6bbb2e38e2581ccc03c2c4b4c0ca5d38b202d2215045832e99924c1908f0209e69d42f395e0cbc5ff7ced095d5bc8de5559dbd3b531574feb7771016efd83b08f0208d6948f8407d5ec643a129848fc358bde10ff21b2cf512a031ffaf7374081b0f08f020bdd67a080b1e917fc579aca2b60c1f6e18c5ed1e1752c2b77faf7b64426b0dee08f12030adcd755ce5d5e5c5ddf3a72ce3cde847950de074313b38c4d3845f102d387208f020f33e65c3425fbac1e453930b0d447b776ba56a9ae86e777f7c78d6a973f1ac2408f120a66fd1d059c783875bb08cf3ad0a27d836ab1cdc019e8ea238b72379250d6b5a08f159010000000103e11d4dafc11a60859d0bd33b55f6fd3bdf3f7d659b04fd2057a7d77d2071230000000000fdffffff0252d59200000000001600140db84d3cb80e3fe685834583d6216d0736bc12660000000000000000226a20f004218307000808f120dabddf003dc5a38416cba755881afbbc7d181bbbe74833a84d3fae9b0a4c6de60808f120742f880216b42f611fdf4b1421b9875a8c29002c4398104ba83a74a81dc0b84e0808f12035a5f19dcc35fdd3da008fc7c8ce6a00e8410a99f334a8f924574f8db973a8460808f020e968af2ee61b31a0b54e0c5515c2636b7f78d7e853f10cc1c46a0f25bd53ad1c0808f0204063f5a7601f4a1b114b780df5ca991296b4e77093c86d2e56c185ee131b63860808f020c44ede80587fd3e75cf083a0f870bd8ce92b5348972adccfd8ed7b929aabc8db0808f120d01b19249a54f0ba87b5d6f784bf8183a660d1e325fc58fd853008f1cbf723f50808f1201eac00c289371d96816dd1f38a69f9e6bb619a5738b0ce6858f785dd2ae1c1e50808f12049c1b3f01c3b9a295a4eae854fb8f56d8a2b08daee4ffceb808f035ded1cb4580808f0203089de16209b3cd42abdb18ee74324f1f22e61c05a1d8c52b503a06d781a4c540808f0207668971306f71e72a2096f41881704655d34936e407d3620b0f2428f6923ae870808000588960d73d7190103a2861efff01086707b19020de86aff1e49cb646ec5f808f10459f6e400f008275c0b61c93b4bdaff0083dfe30d2ef90c8e2e2d68747470733a2f2f616c6963652e6274632e63616c656e6461722e6f70656e74696d657374616d70732e6f726708f02089a1ff4781938b7d98e83c06d91c5b6a9017c1e82eb3bcfc68c8df2d96b0c92f08f0200cfaf8ff9bafd231b8b0a97c721b76211bea1383d172f3d2166784dbba307dca08f1203c9c6341f5847addbc077f9261a75faa68722995152655251e742baa7b83f45c08f120f5c40fb9e9a4bb66e9c8fe91b281cad0239a94804c54f97f75a12f57e0d8e92808f1209e2770b5be815578c07c07de9bc6dc10a1f578b55488cc97ffd95a1ce8d4dd8208f120524f5b1247ba14a8df609d8ee22d42efbcc7bde0727ccc95834ce46bb13d1b5708f120553a84fb1efe8a1699e8737f40d352e4defc6d231760416d61c5f0dcb19cfb6808f020d84f0c44052863ea0403dc57206b3e9c8595835a936de11ce24e3c8d6bb78a2708f1205abf406a4ab122b27f5f9a8234c5ff4f6d90789af6950424e8864cca53803c0208f120dd8b42e44fe5b8ae63e44f7612cf7a6ab384ca8543c16b097b5f25af7edb857f08f02049c23e0e98e7633798e242449aa76ac3fee6012e46897ad1a7eb08958b1565ed08f020f801e537dcf9761214a0e276bb2f9907bd834e0cafe9caeaee340180dfa3f7c208f1200088e5f5842df10e5869a363cc949715c5fbf86a4e4aa8b621efad6d1c2196df08f1590100000001c350bc30975e5c941cf3508e5302068e70bed189dca215752961c76c2524532a0000000000fdffffff02edf3510000000000160014e200dde45eb0529aebe86e16060fb9b109008b560000000000000000226a20f004238307000808f0209a7685c29057df8d5c1486942f794d24c1a3bd73497014644e232d4e60036b5d0808f0206d0a19463805ea0468537d4f4521edf91295bae46621ea67451dd3b56add29df0808f020e2183c0092ca5ead1dcbb9dde8a5e68adb80be25b28fc208914df7b3e47660f10808f020248dff2abbae8fa5f7ce87fabd6c941397cc1979c2b2161d1d0c51263468acb00808f1205c7fc70574a919eaa37257d5521ee82b1ba57c747d2a35bc5bbd8a1d370b4efd0808f120d8b61afb9b980a58a02845c3abb607eb72d3a47528f1a96d37da977ced48a6450808f020430119ebb726a3b9530d31f1110a3c67c0e0c4bdcf0b74d9d9dcaa91d0fcefe20808f0201d715d78659aac3fccea8a6a8cbcb6759dbcd851d1d919c72600d382b0bdae800808f120850d5c19809ade49c051cc27e308703087a7f2772aab43f4050ff260945468050808f1208a1c6db3604fef1d1be2d0404a65a2c1880fb0cd9621d18bc59fa8425543c90c0808f120a06b0bb4efae407b8dab28473a0783bb89e7663efbb8980326e7aaaf765e2ccd0808f020feff03e7f6ca0420fa92b1b8114c1961b76490ee88b09e4029838942379f35b00808000588960d73d7190103a4861ef010c4103e3a25d6ecc7243ae7802466616108f10459f6e400f008edafd20c9974932bff0083dfe30d2ef90c8e292868747470733a2f2f66696e6e65792e63616c656e6461722e657465726e69747977616c6c2e636f6d08f120856b81e3ec879b497a5e87c1638008bd9b4169c899fed24cd12ed3220b6bf74a08f120fa82b0ec2fa550365e1a3982924365ab6a38ed78e3efab1a41a8bad668e303ca08f12079cd28c92d707944dded2f768ee0d9d24e0c6621f443f7c1138e597f40c3f84f08f1205c92984825b9e4de3391e6f5f61f62dcd93a96087a54fd31de8231328fcea47508f020059ef220b8cde3d698e41c03ccce68cd933fdda037c4c42df486aa8b16c1823808f120527ac415d63c3568aa21bb9c3129cc8cbe0f1f60a034274b6d72fb46fa10e92d08f02063202c8a7fe2ccc746c36a671cc82d6c57c6ccd7e6a85ebd7983443eaeecff1908f1204da26285d0eab660de7a197a15227795d75da4ebf411ee588ea6167af42770e508f02030554533f33ba4185ccb2323a7edcdfbd43c7caf4984493c8b0cfa3408eac83a08f020efa635035eae5bfab2a01dd231cdcb40ceec233d4698a514398164ec09271bbe08f1205def3ea6346e21555aa3ef0a8ec59c6d94d1c16d7ea0f86082a4a7747fdd755808f02036f9858cc9571331faa13ec8d851df70a14b188e7b2bcba549a80b902866956a08f120ba24ea4c9d11a709038368225839a0e6af758a2d2a18d794996179368cb0d82d08f1ae010100000001c15f1985fa21e340ea5f12cad6528dc299acc41a3957566831e15f9e5710c2a80000000048473044022014cc04a47a7d45cc0ddf7b774f04877325ca4ad0e8b3e8c9fe1f4e8aca6ba4c2022051dc0ed24afe109ceaefdcfebf25abb8515ee77425e43d816a561efab19d162501fdffffff022530090000000000232103306be92d7bf2d8d57ac10d7773e69a5833e7f9495dc4bba78973144050211497ac0000000000000000226a20f004228307000808f020aaf914437aa8e53900a5c4226233effd210b170fa692aacc260ff51a9f8a05650808f02008d28698dbe824dfc16fe1809e59244e7a22f623719668a3dab2f42f2ea8fdcc0808f1200e7d1b9a340fae0b8d3ce59884e6ffc124be390f13f6f210cd97b8fbb612562e0808f020da2121dce81cd9f9e15434eefc46763c21ad26ebd2d7907698b7a6c013b376950808f0203236406de3c8f5723d6c6eeb43e92011c96e4d34d90200611053e88980ec49490808f0201057896f05e6887bcd21ba9004a09471a8efc56c34893281fee95d294357b4780808f0200002f00c6f81d8795c24575afc446a92b53c19d19b1e6c13d4348d6fd578e4df0808f12008ce44a1e21afe273be45ed6dfeb5430f763782dca2199222ae82aff2e082a460808f120701b4e3b2777b0ee82e627c4e6d180a39c32f3ac4e8fcc3fd3a7d0b6719580aa0808f0207085ed4a447f28d94dc89c4e2b7b0682ae78fd72c5d7eb95f3f55fbc93a0ff9b0808f120e9a20490f3b5ace95f2c2a56c07921c895bee371e87a7fd24e1789e2ef7d2da80808f020e7b5d791d1b118e9dd07a7c181b20dcf37ab5a9bb14970eebd04ffe03c9819430808000588960d73d7190103a3861e"#)) + } +} diff --git a/Benchmarks/Sources/WebURLBenchmark/URLEncoded.swift b/Benchmarks/Sources/WebURLBenchmark/URLEncoded.swift new file mode 100644 index 000000000..75a62dbab --- /dev/null +++ b/Benchmarks/Sources/WebURLBenchmark/URLEncoded.swift @@ -0,0 +1,52 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Benchmark +import WebURL + +/// Benchmarks the `StringProtocol.urlEncoded` and `StringProtocol.urlDecoded` properties. +/// +let urlEncoded_Decoded = BenchmarkSuite(name: "URLEncoded") { suite in + + let urlEncoded_strings = [ + String(decoding: 0 ..< 128, as: UTF8.self), // Every ASCII character + #"This 🦆 is 🐧 some 🦖 m!x€d%20cºnt£nt"#, + #""" + A very long string with the occassional 🦖 emoji thrown in for good measure! + A very long string with the occassional 🦖 emoji thrown in for good measure! + A very long string with the occassional 🦖 emoji thrown in for good measure! + A very long string with the occassional 🦖 emoji thrown in for good measure! + A very long string with the occassional 🦖 emoji thrown in for good measure! + A very long string with the occassional 🦖 emoji thrown in for good measure! + A very long string with the occassional 🦖 emoji thrown in for good measure! + A very long string with the occassional 🦖 emoji thrown in for good measure! + """# + ] + suite.benchmark("String.urlEncoded") { + for string in urlEncoded_strings { + blackHole(string.urlComponentEncoded) + } + } + + let urlDecoded_strings = [ + #"%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%20!%22%23%24%25%26'()*%2B%2C-.%2F0123456789%3A%3B%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~%7F"#, // Every ASCII character + #"This%20%F0%9F%A6%86%20is%20%F0%9F%90%A7%20some%20%F0%9F%A6%96%20m!x%E2%82%ACd%2520c%C2%BAnt%C2%A3nt"#, + #"A%20very%20long%20string%20with%20the%20occassional%20%F0%9F%A6%96%20emoji%20thrown%20in%20for%20good%20measure!%0AA%20very%20long%20string%20with%20the%20occassional%20%F0%9F%A6%96%20emoji%20thrown%20in%20for%20good%20measure!%0AA%20very%20long%20string%20with%20the%20occassional%20%F0%9F%A6%96%20emoji%20thrown%20in%20for%20good%20measure!%0AA%20very%20long%20string%20with%20the%20occassional%20%F0%9F%A6%96%20emoji%20thrown%20in%20for%20good%20measure!%0AA%20very%20long%20string%20with%20the%20occassional%20%F0%9F%A6%96%20emoji%20thrown%20in%20for%20good%20measure!%0AA%20very%20long%20string%20with%20the%20occassional%20%F0%9F%A6%96%20emoji%20thrown%20in%20for%20good%20measure!%0AA%20very%20long%20string%20with%20the%20occassional%20%F0%9F%A6%96%20emoji%20thrown%20in%20for%20good%20measure!%0AA%20very%20long%20string%20with%20the%20occassional%20%F0%9F%A6%96%20emoji%20thrown%20in%20for%20good%20measure!"# + ] + suite.benchmark("String.urlDecoded") { + for string in urlDecoded_strings { + blackHole(string.percentDecoded) + } + } +} diff --git a/Benchmarks/Sources/WebURLBenchmark/main.swift b/Benchmarks/Sources/WebURLBenchmark/main.swift new file mode 100644 index 000000000..9a97947da --- /dev/null +++ b/Benchmarks/Sources/WebURLBenchmark/main.swift @@ -0,0 +1,34 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Benchmark + +// https://github.com/google/swift-benchmark/issues/69 +@inline(__always) +internal func blackHole(_ x: T) { + @_optimize(none) + func assumePointeeIsRead(_ x: UnsafeRawPointer) {} + withUnsafePointer(to: x) { assumePointeeIsRead($0) } +} + +// Benchmark plan: +// - Non-special versions of SpecialNonFile tests. +// - Cannot-be-a-base URLs +// - file: URLs +// - setters + +Benchmark.main([ + constructor_specialNonFile, + urlEncoded_Decoded +]) diff --git a/Benchmarks/compare.py b/Benchmarks/compare.py new file mode 100644 index 000000000..7b6c9509b --- /dev/null +++ b/Benchmarks/compare.py @@ -0,0 +1,255 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import sys +import json +from pprint import pprint +from collections import defaultdict +import argparse +import re + + +def require(cond, msg): + """Fails with a message if condition is not true.""" + + if not cond: raise Exception(msg) + + +def validate(file_name, parsed): + """Validates that given json object is a valid benchmarks result.""" + + require("benchmarks" in parsed, + "{}: missing key 'benchmarks'.".format(file_name)) + require(len(parsed["benchmarks"]) > 0, + "{}: must have at least one benchmark.".format(file_name)) + + for i, benchmark in enumerate(parsed["benchmarks"]): + require("name" in benchmark, + "{}: benchmark #{}: missing key 'name'.".format(file_name, i)) + + for k, v in benchmark.items(): + if k != "name": + is_num = isinstance(v, int) or isinstance(v, float) + template = "{}: benchmark #{}: values must be numbers." + require(is_num, template.format(file_name, i)) + + +def parse_and_validate(args): + """Parse command-line args, parse given json files and validate their contents.""" + + runs = [] + + for file_name in args.file_names: + with open(file_name) as f: + parsed = None + try: + parsed = json.load(f) + except Exception as err: + raise Exception("failed to parse json: {}".format(err)) + validate(file_name, parsed) + runs.append((file_name, parsed)) + + return runs + + +def benchmark_predicate(args): + """Returns a predicate used to filter benchmark columns based on cli args.""" + + include = lambda x: True + + if args.filter: + regex = re.compile(args.filter) + prev_include = include + include = lambda x: regex.search(x) is not None and prev_include(x) + + if args.filter_not: + regex = re.compile(args.filter_not) + prev_include = include + include = lambda x: regex.search(x) is None and prev_include(x) + + return include + + +def collect_values(args, runs): + """Collect benchmark values for the comparison, excluding filtered out columns.""" + + baseline_name, baseline = runs[0] + + include_benchmark = benchmark_predicate(args) + include_column = lambda x: args.columns is None or x in args.columns + + confs = [] + values = {} + + for benchmark in baseline["benchmarks"]: + benchmark_name = benchmark["name"] + if not include_benchmark(benchmark_name): + continue + for column in benchmark.keys(): + if column == "name": + continue + if not include_column(column): + continue + conf = (benchmark_name, column) + confs.append(conf) + values[conf] = {} + + for conf in confs: + bench_name, column = conf + for (file_name, run) in runs: + for bench in run["benchmarks"]: + if bench["name"] == bench_name: + values[conf][file_name] = bench[column] + + return (confs, values) + + +def geomean(values): + """Compute geometric mean for the given sequence of values.""" + + product = 1.0 + for value in values: + product *= value + return product**(1.0/len(values)) + + +def to_table(confs, args, values): + """Compute a table of relative results across all input files.""" + + baseline_file_name = args.baseline + rows = [] + + # Header row. + header = [] + header.append("benchmark") + header.append("column") + for (n, file_name) in enumerate(args.file_names): + name = file_name.replace(".json", "") + header.append(name) + if n != 0: + header.append("%") + rows.append(header) + + # Body rows. + relative_values = defaultdict(lambda: defaultdict(list)) + for conf in confs: + bench_name, column = conf + row = [] + row.append(bench_name) + row.append(column) + for n, file_name in enumerate(args.file_names): + base_value = values[conf][baseline_file_name] + value = values[conf][file_name] + row.append("{:.2f}".format(value)) + if n != 0: + relative = value/base_value + relative_values[column][file_name].append(relative) + relative_percentage = (1 - relative ) * 100 + row.append("{:.2f}".format(relative_percentage)) + rows.append(row) + + # Compute totals for each columsn as a geomean of all relative results. + cols = [] + geomean_values = defaultdict(dict) + for (_, col) in confs: + if col not in cols: + cols.append(col) + for n, file_name in enumerate(args.file_names): + if n != 0: + vs = relative_values[col][file_name] + geomean_values[col][file_name] = geomean(vs) + + for col in cols: + row = [] + row.append("") + row.append(col) + for n, file_name in enumerate(args.file_names): + row.append("") + if n != 0: + value = geomean_values[col][file_name] + percentage = (1 - value) * 100 + row.append("{:.2f}".format(percentage)) + rows.append(row) + + return rows + + +def pad(base, fill, count, right = False): + """Pad base string with given fill until count, on either left or right.""" + + while len(base) < count: + if right: + base += fill + else: + base = fill + base + return base + + +def print_table(table): + """Pretty print results table as aligned human-readable text.""" + + # Collect width of each max column. + widths = defaultdict(lambda: 0) + for row in table: + for ncol, col in enumerate(row): + widths[ncol] = max(widths[ncol], len(str(col))) + + # Print results as an aligned text to stdout. + totals = False + for nrow, row in enumerate(table): + if row[0] == '' and not totals: + print("-" * (sum(widths.values()) + len(widths) - 1)) + totals = True + line = [] + for ncol, col in enumerate(row): + right = ncol == 0 or ncol == 1 + line.append(pad(str(col), " ", widths[ncol], right = right)) + print(" ".join(line)) + if nrow == 0: + print("-" * (sum(widths.values()) + len(widths) - 1)) + + +def parse_args(): + """Parse command-line flags into a configuration object, and return it.""" + + parser = argparse.ArgumentParser(description="Compare multiple swift-benchmark json files.") + parser.add_argument("baseline", help="Baseline json file to compare against.") + parser.add_argument("candidate", nargs="+", + help="Candidate json files to compare against baseline.") + parser.add_argument("--filter", help="Only show benchmarks that match the regular expression.") + parser.add_argument("--filter-not", help="Exclude benchmarks whose names match the regular expression.") + parser.add_argument("--columns", help="A comma-separated list of columns to show.") + + args = parser.parse_args() + args.file_names = [args.baseline] + args.file_names.extend(args.candidate) + if args.columns is not None: + args.columns = set(args.columns.split(",")) + + return args + + +def main(): + """Command-line entry-point.""" + + args = parse_args() + runs = parse_and_validate(args) + confs, values = collect_values(args, runs) + table = to_table(confs, args, values) + print_table(table) + + +if __name__ == "__main__": + main() diff --git a/Benchmarks/results/.gitignore b/Benchmarks/results/.gitignore new file mode 100644 index 000000000..94a2dd146 --- /dev/null +++ b/Benchmarks/results/.gitignore @@ -0,0 +1 @@ +*.json \ No newline at end of file diff --git a/Benchmarks/results/put_results_here_so_you_dont_commit_them b/Benchmarks/results/put_results_here_so_you_dont_commit_them new file mode 100644 index 000000000..e69de29bb diff --git a/GettingStarted.md b/GettingStarted.md new file mode 100644 index 000000000..65b90e0a3 --- /dev/null +++ b/GettingStarted.md @@ -0,0 +1,224 @@ +# WebURL Quickstart Guide + +WebURL is a new URL type for Swift which is compatible with the WHATWG's URL Living Standard. +To get started using WebURL, first add the package as a dependency (see the README for more information). + +Next, import the `WebURL` package: + +```swift +import WebURL +``` + +To parse a URL from a `String`, use the initializer: + +```swift +let url = WebURL("https://github.com/karwa/swift-url/")! +``` + +Note that this initializer expects an _absolute_ URL string - i.e. something which begins with a scheme (`"http:"`, `"file:"`, `"myapp:"`, etc). + +`WebURL` objects conform to many protocols from the standard library you may be familiar with: + - `Equatable` and `Hashable`, so they may be used as keys in a `Dictionary` or as members of a `Set`, + - `Comparable`, so they may be sorted, + - `Codable`, so they may be serialized/deserialized from JSON or other formats, and + - `LosslessStringConvertible`, as `WebURL` abides by the URL Standard's requirement that + converting a URL to/from a `String` must never change how the URL is interpreted. + +## Basic Components + +Once you have constructed a `WebURL` object, you can inspect its components, such as its `scheme`, `hostname` or `path`. Additionally, the entire URL string (its "serialization") is available via the `serialized` property: + +```swift +url.scheme // "https" +url.hostname // "github.com" +url.path // "/karwa/swift-url/" + +url.serialized // "https://github.com/karwa/swift-url/" +``` + +Components are returned as they appear in the URL string, including any percent-encoding. The `WebURL` package includes a number of extensions to standard library types and protocols, +to help you add and remove percent-encoding from strings. To remove percent-encoding, use the `percentDecoded` property, which is made available to all `String`s: + +```swift +let url = WebURL("https://github.com/karwa/swift%2Durl/")! +url.path // "/karwa/swift%2Durl/" +url.path.percentDecoded // "/karwa/swift-url/" +``` + +## Relative URLs + +You can also create a URL by resolving a string relative to an existing, absolute URL (the "base URL"). +The result of this is another absolute URL, pointing to the same location as an HTML `` tag on the base URL's page: + +```swift +let base = WebURL("https://github.com/karwa/swift-url/")! + +base.resolve("pulls/39")! // "https://github.com/karwa/swift-url/pulls/39" +base.resolve("/apple/swift/")! // "https://github.com/apple/swift/" +base.resolve("..?tab=repositories")! // "https://github.com/karwa/?tab=repositories" +base.resolve("https://swift.org/")! // "https://swift.org" +``` + +This is not limited to http(s) URLs; it works for every URL, including "file" URLs: + +```swift +let appData = WebURL("file:///tmp/")!.resolve("my_app/data/")! +// appData = "file:///tmp/my_app/data/" +let mapFile = appData.resolve("../other_data/map.json")! +// mapFile = "file:///tmp/my_app/other_data/map.json" +``` + +## Modifying URLs + +`WebURL` does not need an intermediate type like `URLComponents`. Instead, components may be set directly. + +Modifications are efficient, and occur in-place on the URL's existing storage object as capacity and value semantics allow. + +```swift +var url = WebURL("http://github.com/karwa/swift-url/")! + +// Upgrade to https: +url.scheme = "https" +url.serialized // "https://github.com/karwa/swift-url/" + +// Change the path: +url.path = "/apple/swift/" +url.serialized // "https://github.com/apple/swift/" +``` + +When you modify a component, the value you set will automatically be percent-encoded if it contains any illegal characters. +This applies to the `username`, `password`, `path`, `query`, and `fragment` fields. +Notably, it does not apply to the `scheme` or `hostname` - attempting to set an invalid `scheme` or `hostname` will fail. + +```swift +var url = WebURL("https://example.com/my_files/secrets.txt")! + +url.username = "my username" +url.password = "🤫" +url.serialized // "https://my%20username:%F0%9F%A4%AB@example.com/my_files/secrets.txt" + +url.hostname = "👾" // Fails, does not modify. +url.serialized // (unchanged) +``` + +In general, the setters are very permissive. However, if you do wish to detect and respond to failures to modify a component, +use the corresponding throwing setter method instead. The thrown `Error`s contain specific information about why the operation failed, +so it's easier for you to debug logic errors in your application. + +## Path Components + +You can access a URL's path components through the `pathComponents` property. +This returns an object which conforms to Swift's `Collection` protocol, so you can use it in `for` loops and +lots of other code directly, yet it efficiently shares storage with the URL it came from. + +The components returned by this view are automatically percent-decoded from their representation in the URL string. + +```swift +let url = WebURL("file:///Users/karl/My%20Files/data.txt")! + +for component in url.pathComponents { + ... // component = "Users", "karl", "My Files", "data.txt". +} + +if url.pathComponents.last!.hasSuffix(".txt") { + ... +} +``` + +Additionally, this view allows you to _modify_ a URL's path components. +Any inserted components will be automatically percent-encoded in the URL string. + +```swift +var url = WebURL("file:///swift-url/Sources/WebURL/WebURL.swift")! + +url.pathComponents.removeLast() +// url = "file:///swift-url/Sources/WebURL" + +url.pathComponents.append("My Folder") +// url = "file:///swift-url/Sources/WebURL/My%20Folder" + +url.pathComponents.removeLast(3) + +url.pathComponents += ["Tests", "WebURLTests", "WebURLTests.swift"] +// url = "file:///swift-url/Tests/WebURLTests/WebURLTests.swift" +``` + +Paths which end in a "/" (also called "directory paths"), are represented by an empty component at the end of the path. +However, if you append to a directory path, `WebURL` will automatically remove that empty component for you. +If you need to create a directory path, append an empty component, or use the `ensureDirectoryPath()` method. + +```swift +var url = WebURL("https://api.example.com/v1/")! + +for component in url.pathComponents { + ... // component = "v1", "". +} + +url.pathComponents += ["users", "karl"] +// url = "https://api.example.com/v1/users/karl" +// components = "v1", "users", "karl". + +url.pathComponents.ensureDirectoryPath() +// url = "https://api.example.com/v1/users/karl/" +// components = "v1", "users", "karl", "". +``` + +## Form-Encoded Query Items + +You can also access the key-value pairs in a URL's query string using the `formParams` property. +As with `pathComponents`, this returns an object which shares storage with the URL it came from. + +You can use Swift's "dynamic member" feature to access query parameters as though they were properties. +For example, in the query string `"from=EUR&to=USD"`, accessing `url.formParams.from` will return `"EUR"`. +For parameters whose names cannot be used in Swift identifiers, the `get` method will also return the corresponding value for a key. + +Additionally, all of the query's key-value pairs are available as a Swift `Sequence` via the `allKeyValuePairs` property. + +This view assumes that the query string's contents are encoded using `application/x-www-form-urlencoded` ("form encoding"), +and all of the keys and values returned by this view are automatically decoded from form-encoding. + +```swift +let url = WebURL("https://example.com/currency/convert?amount=20&from=EUR&to=USD")! + +url.formParams.amount // "20" +url.formParams.from // "EUR" +url.formParams.get("to") // "USD" + +for (key, value) in url.formParams.allKeyValuePairs { + ... // ("amount", "20"), ("from", "EUR"), ("to", "USD"). +} +``` + +And again, as with `pathComponent`, you can modify a URL's query string using `formParams`. +To set a parameter, assign a new value to its property or use the `set` method. Setting a key to `nil` will remove it from the query. + +Also, any modification will re-encode the entire query string so that it is consistently encoded as `application/x-www-form-urlencoded`, +if it is not already. + +```swift +var url = WebURL("https://example.com/currency/convert?amount=20&from=EUR&to=USD")! + +url.formParams.amount // "20" +url.formParams.to // "USD + +url.formParams.amount = "56" +url.formParams.to = "Pound Sterling" +// url = "https://example.com/currency/convert?amount=56&from=EUR&to=Pound+Sterling" + +url.formParams.format = "json" +// url = "https://example.com/currency/convert?amount=56&from=EUR&to=Pound+Sterling&format=json" +``` + +## Further Reading + +And that's your overview! We've covered creating, reading, and manipulating URLs using `WebURL`. Hopefully you agree that it makes +great use of the expressivity of Swift, and are excited to `WebURL` for: + +- URLs based on the latest industry standard. +- Better-defined behaviour, and better alignment with how modern web browsers behave. +- Speed and memory efficiency, as well as +- APIs designed for Swift + +There's even more that we didn't cover, like `Host` objects, IP Addresses, _lazy_ percent encoding/decoding, `Origin`s, the `JSModel`, +or our super-powered `UTF8View`. If you'd like to continue reading about the APIs available in the `WebURL` package, +see the [official documentation](https://karwa.github.io/swift-url/), or just go try it out for yourself! \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000..2177e46be --- /dev/null +++ b/LICENSE @@ -0,0 +1,178 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + \ No newline at end of file diff --git a/NOTICE b/NOTICE new file mode 100644 index 000000000..e42d4caab --- /dev/null +++ b/NOTICE @@ -0,0 +1,2 @@ +swift-url (WebURL) +Copyright Karl Wagner, and the swift-url Contributors. diff --git a/Package.swift b/Package.swift new file mode 100644 index 000000000..7af380299 --- /dev/null +++ b/Package.swift @@ -0,0 +1,38 @@ +// swift-tools-version:5.3 + +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import PackageDescription + +let package = Package( + name: "swift-url", + products: [ + .library(name: "WebURL", targets: ["WebURL"]), + .library(name: "WebURLTestSupport", targets: ["WebURLTestSupport"]), + ], + dependencies: [ + // Swift-checkit for testing protocol conformances. + .package(name: "Checkit", url: "https://github.com/karwa/swift-checkit.git", from: "0.0.2"), + ], + targets: [ + .target(name: "WebURL"), + .target(name: "WebURLTestSupport", dependencies: ["WebURL"]), + .testTarget( + name: "WebURLTests", + dependencies: ["WebURL", "WebURLTestSupport", "Checkit"], + resources: [.copy("Resources")] + ), + ] +) diff --git a/README.md b/README.md new file mode 100644 index 000000000..a2e9d80c6 --- /dev/null +++ b/README.md @@ -0,0 +1,271 @@ +# WebURL + +This package contains a new URL type for Swift, written in Swift. + +- The [Getting Started](GettingStarted.md) guide contains an overview of how to use the `WebURL` type. +- The [Full documentation](https://karwa.github.io/swift-url/) contains a detailed information about at the API. + +You may be interested in: + +- This prototype port of [async-http-client](https://github.com/karwa/async-http-client), which allows you to perform http(s) + requests using `WebURL`, and shows how easy it can be to adopt in your library. + +This URL type is compatible with the [URL Living Standard](https://url.spec.whatwg.org/) developed by the WHATWG, meaning it more closely +matches how modern web browsers behave. It has a fast and efficient implementation, and an API designed to meet the needs of Swift developers. + +## Using WebURL in your project + +To use `WebURL` in a SwiftPM project, add the following line to the dependencies in your Package.swift file: + +```swift +.package(url: "https://github.com/karwa/swift-url", from: "0.1.0"), +``` + +## Project Goals + +
1. For parsing to match the URL Living Standard. +
+ + The URL parser included in this project is derived from the reference parser implementation described by the standard, and should + be fully compatible with it. The programmatic API for reading and manipulating URL components (via the `WebURL` type) may contain + minor deviations from the JavaScript API defined in the standard in order to suit the expectations of Swift developers, + (e.g. the `query` property does not contain the leading "?" as it does in JavaScript, setters are stricter about invalid inputs, etc). + The full JavaScript API is available via the `JSModel` type, which is implemented entirely in terms of the Swift API. + + The list of differences between the `WebURL` API and the JavaScript `URL` class are documented [here](https://karwa.github.io/swift-url/WebURL_JSModel/). + + Conformance to the standard is tested via the common [Web Platform Tests](https://github.com/web-platform-tests/wpt/tree/master/url) + used by browser developers to validate their implementations. Currently this consists of close to 600 parser tests, and about 200 tests + for setting individual properties. The project also contains additional test databases which are validated against the JSDOM reference + implementation, with the intention to upstream them in the future. + + Conformance to a modern URL standard is the "killer feature" of this project, and other than the documented differences in APIs, + any mismatch between this parser and the standard is, categorically, a bug (please report them if you see them!). Foundation's `URL` type + conforms to RFC-1738, from 1994, and `URLComponents` conforms to a different standard, RFC-3986 from 2005. The 1994 standard contains many issues + which subsequent standards have defined or fixed; this project allows Swift to match the behaviour of modern web browsers. +
+ +
2. To be safe, fast, and memory-efficient. +
+ + Swift is designed to be a safe language, free of undefined behaviour and memory-safety issues. The APIs exposed by this library use + a combination of static and runtime checks to ensure memory-safety, and use of unsafe pointers internally is kept to a minimum. + + Performance is also very important to this project, but communicating comparisons is tricky. The obvious comparison would be against our existing + URL type, `Foundation.URL`; but as mentioned above, it conforms to an entirely different standard. The new standard's parser is very permissive + and components are normalized and percent-encoded during parsing in order to cast as wide a compatibility net as possible and harmonize their representation. + So it some sense comparing `WebURL` and `Foundation.URL` is apples-to-oranges, but it can be argued that parsing time is an important metric for developers, + regardless. + + Despite the extra processing, the "AverageURLs" benchmark in this repository demonstrates performance that is slightly faster than Foundation, + on an Intel Mac (Ivy Bridge). Depending on their size and structure, improvements for other URLs can range from 15% (IPv6 addresses) + to 66% (very long query strings), while using less memory. Additionally, common operations such as hashing and testing for equality can be more + than twice as fast as `Foundation.URL`. We'll also be exploring some ideas which could further increase parsing performance. + + On lower-end systems, such as a Raspberry Pi 4 8GB running 64-bit Ubuntu and the [swift-arm64 community toolchain (5.4)](https://github.com/futurejones/swift-arm64), + the same benchmarks can demonstrate even greater improvements; "AverageURLs" going from about 1.85s using Foundation, to only 62.67ms with `WebURL`. + + As with all benchmark numbers, YMMV. + + Additionally: + + - The API supports efficient in-place mutation, so `URLComponents` is no longer needed in order to modify a component's value. + - The API offers views of the URL's path components and query parameters which share the URL's storage, allowing fast and efficient iteration + and inspection. + - These views _also_ support in-place mutation, so when appending a path-component or setting a query parameter, + the operation should be as fast, if not faster, than the equivalent string manipulation. + + _(Note that benchmarking and optimizing the setters is still a work-in-progress.)_ +
+ +
3. To leverage Swift's language features in order to provide a clean, convenient, and powerful API. +
+ + This library makes extensive use of generics; almost every API which accepts a `String`, from the parser to the component setters, + also have variants which accept user-defined `Collection`s of UTF-8 code-units. This can be valuable in performance-sensitive scenarios, + such as when parsing large numbers of URLs from data files or network packets. + + It also makes extensive use of wrappers which share a URL's storage, for example to provide a `Collection` interface to a URL's path components. + These wrappers also showcase the power of `_modify` accessors, allowing for a clean API with namespaced operations, which retain the ability to modify + a URL in-place: + + ```swift + var url = WebURL("file:///usr/foo")! + url.pathComponents.removeLast() + url.pathComponents += ["lib", "swift"] + print(url) // file:///usr/lib/swift + ``` + + The view of a URL's form-encoded query parameters also supports `@dynamicMemberLookup` for concise get- and set- operations: + + ```swift + var url = WebURL("http://example.com/currency/convert?amount=20&from=EUR&to=USD")! + print(url.formParams.amount) // "20" + url.formParams.to = "GBP" + print(url) // http://example.com/currency/convert?amount=20&from=EUR&to=GBP + ``` + + Setters that can fail also have throwing sister methods, which provide rich error information about why a particular operation did not succeed. + These error descriptions do not capture any part of the URL, so they do not contain any privacy-sensitive data. + + Take a look at the [Getting Started](GettingStarted.md) guide for a tour of this package's core API. +
+
+ +## Roadmap + +The implementation is extensively tested, but the interfaces have not had time to stabilise. + +While the package is in its pre-1.0 state, it may be necessary to make source-breaking changes. +I'll do my best to keep these to a minimum, and any such changes will be accompanied by clear documentation explaining how to update your code. + +I'd love to see this library adopted by as many libraries and applications as possible, so if there's anything I can add to make that easier, +please file a GitHub issue or write a post on the Swift forums. + +Aside from stabilising the API, the other priorities for v1.0 are: + +1. file URL <-> file path conversion + + Having a port of `async-http-client` is a good start for handling http(s) requests, but file URLs also require attention. + + It would be great to add a way to create a file path from a file URL and vice-versa. This should be relatively straightforward; + we can look to cross-platform browsers for a good idea of how to handle this. Windows is the trickiest case (UNC paths, etc), + but since Microsoft Edge is now using Chromium, we can look to [their implementation](https://chromium.googlesource.com/chromium/src/net/+/master/base/filename_util.cc) + for guidance. It's also worth checking to see if WebKit or Firefox do anything different. + +2. Converting to/from `Foundation.URL`. + + This is a delicate area and needs careful consideration of the use-cases we need to support. Broadly speaking, there are 2 ways to approach it: + + - Re-parsing the URL string. + + This is what [WebKit does](https://github.com/WebKit/WebKit/blob/99f5741f2fe785981f20fb1fee5869a2863d16d6/Source/WTF/wtf/cocoa/URLCocoa.mm#L79). + The benefit is that it is straightforward to implement. The drawbacks are that Foundation refuses to accept a lot of URLs which the modern standards consider valid, + so support could be limited. In at least one case that I know of, differences between the parsers have lead to exploitable security vulnerabilities + (when conversion changes the URL's origin, which is why WebKit's conversion routine now includes a specific same-origin check). + + Something like this, with appropriate checks on the re-parsed result, may be acceptable as an MVP, but ideally we'd want something more robust with better support + for non-http(s) URLs in non-browser contexts. + + - Re-writing the URL string based on `Foundation.URL`'s _components_. + + This should ensure that the resulting URL contains semantically equivalent values for its username, password, hostname, path, query, etc., with the conversion + procedure adding percent-encoding as necessary to smooth over differences in allowed characters (e.g. Foundation appears to refuse "{" or "}" in hostnames or + query strings, while newer standards allow them, so we'd need to percent-encode those). + + The `WebURL` parser has been designed with half an eye on this; in theory we should be able to construct a `ScannedRangesAndFlags` over Foundation's URL string, + using the range information from Foundation's parser, and `URLWriter` will take care of percent-encoding the components, simplifying the path, and assembling the + components in to a URL string. That said, URLs are rarely so simple, and this process will need a _very thorough_ examination and database of tests. + + Even after this is done, my intuition is that it would be unwise for developers to assume seamless conversions between `Foundation.URL` and `WebURL`. + It should be okay to do it once at an API boundary - e.g. for an HTTP library built using `WebURL` to accept requests using `Foundation.URL` - + but such libraries should convert to one URL type as soon as possible, and use that single type to provide all information used to make the request. + + As an example of the issues that may arise: if the conversion process adds percent-encoding, performing multiple conversions such as `WebURL -> URL -> WebURL`, + or `URL -> WebURL -> URL`, will result in an object with the same type, but a different URL string (including a different hash value, and comparing as `!=` to the starting URL). + That would be a problem for developers who expect a response's `.url` property to be the same as the URL they made the request with. That's why it's better to stick to + a single type conversion; when a developer sees that the response's `.url` property has a different type, there is more of a signal that the content may have changed slightly. + +3. Benchmarking and optimizing setters, including modifications via `pathComponents` and `formParams` views. + +Post-1.0: + +4. Non-form-encoded query parameters. + + Like the `formParams` view, this would interpret the `query` string as a string of key-value pairs, but _without_ assuming that the query should be form-encoded. + Such an API [was pitched](https://github.com/whatwg/url/issues/491) for inclusion in the URL standard, but is not included since the key-value pair format was + only ever codified by the form-encoding standard; its use for non-form-encoded content is just a popular convention. + + That said, it would likely be valuable to add library-level support to make this convention easier to work with. + +5. Relative URLs. + + Have repeatedly [been pitched](https://github.com/whatwg/url/issues/531) for inclusion in the standard. Support can be emulated to some extent by + using the `thismessage:` scheme reserved by IANA for this purpose, but it is still a little cumbersome, and is common enough outside of browser contexts to + warrant its own type and independent test-suite. Implementation may be as simple as wrapping a `WebURL` with the `thismessage:` scheme, or as complex as the + Saturn V rocket; it is really quite hard to tell, because URLs. + +6. IDNA + + By far the biggest thing. See the FAQ for details. + +## Sponsorship + +I'm creating this library because I think that Swift is a great language, and it deserves a high-quality, modern library for handling URLs. +It has taken a lot of time to get things to this stage, and there is an exciting roadmap ahead. + +It demands a lot of careful study, a lot of patience, and a lot of motivation to bring something like this together. So if you +(or the company you work for) benefit from this project, do consider donating to show your support and encourage future development. +Maybe it saves you some time on your server instances, or saves you time chasing down weird bugs in your URL code. + +In any case, thank you for stopping by and checking it out. + +## FAQ + +### What is the WHATWG URL Living Standard? + +It may be surprising to learn that there isn't a single way to interpret URLs. There have been several attempts to create such a thing, +beginning with the IETF documents [RFC-1738](https://www.ietf.org/rfc/rfc1738.txt) in 1994, and the revised version +[RFC-3986](https://www.ietf.org/rfc/rfc3986.txt) in 2005. + +Unfortunately, it's rare to find an application or URL library which completely abides by those specifications, and the specifications +themselves contain ambiguitites which lead to divergent behaviour across implementations. Some of these issues were summarised +in a draft [working document](https://tools.ietf.org/html/draft-ruby-url-problem-01) by Sam Ruby and Larry Masinter. As the web +continued to develop, the WHATWG and W3C required a new definition of "URL" which matched how browsers _actually_ behaved. +That effort eventually became the WHATWG's URL Living Standard. + +The WHATWG is an industry association led by the major browser developers (currently, the steering committee consists of +representatives from Apple, Google, Mozilla, and Microsoft), and there is high-level approval for their browsers to align with the +standards developed by that group. The standards developed by the WHATWG are "living standards": + +> Despite the continuous maintenance, or maybe we should say as part of the continuing maintenance, a significant effort is placed on +getting the standard and the implementations to converge — the parts of the standard that are mature and stable are not changed +willy nilly. Maintenance means that the days where the standard are brought down from the mountain and remain forever locked, +even if it turns out that all the browsers do something else, or even if it turns out that the standard left some detail out and the browsers +all disagree on how to implement it, are gone. Instead, we now make sure to update the standard to be detailed enough that all the +implementations (not just browsers, of course) can do the same thing. Instead of ignoring what the browsers do, we fix the standard +to match what the browsers do. Instead of leaving the standard ambiguous, we fix the the standard to define how things work. + +From [WHATWG.org FAQ: What does "Living Standard" mean?](https://whatwg.org/faq#living-standard) + +While the WHATWG has [encountered criticism](https://daniel.haxx.se/blog/2016/05/11/my-url-isnt-your-url/) for being overly concerned with +browsers over all other users of URLs (a criticism which, to be fair, is not _entirely_ without merit), I've personally found the process to +be remarkably open, with development occurring in the open on GitHub, and the opportunity for anybody to file issues or submit improvements via pull-requests. +While their immediate priority is, of course, to unify browser behaviour, it's still the industry's best starting point to fix the issues +previous standards have faced and develop a modern interpretation of URLs. Not only that, but it seems to me that any future URL standards will have to +consider consistency with web browsers to have any realistic chance of describing how other applications should interpret them. + +### Does this library support IDNA? + +Not yet. +It is important to note that IDNA is _not (just) Punycode_ (a lot of people seem to mistake the two). + +Actually supporting IDNA involves 2 main steps (well, actually more, but for this discussion we can pretend it's only 2): + +1. Unicode normalization + + IDNA also requires a unique flavour of unicode normalization and case-folding, [defined by the Unicode Consortium](https://unicode.org/reports/tr46/). + Part of this is just NFC normalization, but there are additional, domain-specific mapping tables as well (literally, specific to networking domains). + The latest version of that mapping table can be found [here](https://www.unicode.org/Public/idna/latest/IdnaMappingTable.txt). + +2. Punycode + + It is the result of this normalization procedure which is encoded to ASCII via Punycode. In this respect, Punycode acts just like percent-encoding: + it takes a Unicode string in, and outputs an ASCII string which can be used to recover the original content. + + Why not just use percent-encoding? Because percent-encoding is seriously inefficient; it turns every non-ASCII byte from the input in to 3 bytes from the output. + DNS imposes limits on the maximum length of domain names (253 bytes total, 63 bytes per label), so a more space-efficient encoding was needed. + Only the hostname uses IDNA, because it is the only part affected by this DNS restriction. + +That Unicode normalization step is really crucial - unforuntately, Swift's standard library doesn't expose its Unicode algorithms or data tables at the moment, +meaning the only viable way to implement this would be to ship our own copy of ICU or introduce a platform dependency on the system's version of ICU. + +As it stands, this URL library doesn't contain _any_ direct dependencies on system libraries, and I would dearly like to keep it that way. At the same time, it has +long been recognised that the Swift standard library needs to provide access to these algorithms. So once this project has settled down a bit, my plan is to turn my attention +towards the Swift standard library - at least to implement NFC and case-folding, and possibly even to expose IDNA as a standard-library feature, if the core team are amenable to that. +I suspect that will largely be influenced by how well we can ensure that code which doesn't use IDNA doesn't pay the cost of loading those data tables. We'll see how it goes. + +For the time being, we detect non-ASCII and Punycode domains, and the parser fails if it encounters them. +Of those ~600 URL constructor tests in the WPT repository, we only fail 10, and all of them are because we refused to parse an address that would have required IDNA. + +We will support it eventually, but it's just not practical at this very moment. diff --git a/Sources/WebURL/IPAddress.swift b/Sources/WebURL/IPAddress.swift new file mode 100644 index 000000000..eac4d6406 --- /dev/null +++ b/Sources/WebURL/IPAddress.swift @@ -0,0 +1,1013 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +// -------------------------------------------- +// MARK: - Parser Callbacks +// -------------------------------------------- +// Almost no users of this type care about the specific errors +// that occur during parsing - but there are use-cases, and it's +// helpful for testing. It's important that regular, release-builds +// of the IP address parsers optimize out and around error reporting, +// so the callback needs to take advantage of generic specialization. +// -------------------------------------------- + + +@usableFromInline +internal protocol IPAddressParserCallback { + mutating func validationError(ipv6 error: IPv6Address.ParserError) + mutating func validationError(ipv4 error: IPv4Address.ParserError) +} + +@usableFromInline +internal struct IgnoreIPAddressParserErrors: IPAddressParserCallback { + + @inlinable + internal init() {} + + @inlinable @inline(__always) + internal func validationError(ipv6 error: IPv6Address.ParserError) {} + + @inlinable @inline(__always) + internal func validationError(ipv4 error: IPv4Address.ParserError) {} +} + + +// -------------------------------------------- +// MARK: - Byte Order +// -------------------------------------------- + + +/// The way in which octets are arranged to form a multi-byte integer. +/// +/// Applications should prefer to work with individual octets wherever possible, as octets have a consistent numeric interpretation and binary representation +/// across machines of different endianness. +/// +/// When combining octets in to larger (e.g. 16- or 32-bit) integers, we have to consider that machines have a choice in how their octets are arranged in memory. +/// For instance, the first piece of the IPv6 address `2608::3:5` consists of 2 octets, `0x26` and `0x08`; if we created a 16-bit integer with those same octets +/// arranged in that order, a big-endian machine would read this as having numeric value 9736 (for the purposes of integer-level operations, such as addition), +/// whereas a little-endian machine would consider the same octets to contain the numeric value 2086. +/// +/// Hence there are 2 ways to combine octets in to larger integers: +/// +/// 1. With the same octets in the same order in memory. We call this the `binary` interpretation, although it is more-commonly known as +/// "network" byte order, or big-endian. As noted above, integers derived from the same address using the `binary` interpretation +/// may have different numeric values on different machines. +/// +/// 2. By rearranging octets to give a consistent numeric value. We call this the `numeric` interpretation, although it is more-comonly known as +/// "host" byte-order. For instance, when reading the first 16-bit piece of the above address on a little-endian machine, +/// the octets `0x26 0x08` will be reordered to `0x08 0x26`, so that the numeric value (9736) is the same as the hexadecimal number `0x2608`. +/// Assigning a group of octets using the `numeric` integer 9736 will similarly reorder the octets, so that they appear as the octet sequence `0x26 0x08` +/// in the address. +/// +public enum OctetArrangement { + + /// Offers consistent numeric values across machines of different endianness, by adjusting the binary representation when reading or writing multi-byte integers. + /// Also known as host byte order (i.e. the integers that you read and write are expected to be in host byte order). + /// + case numeric + + /// Offers consistent binary representations across machines of different endianness, although each machine may interpret those bits as a different numeric value. + /// Also known as network byte order (i.e. the integers that you read and write are expected to be in network byte order). + /// + case binary + + /// A synonym for `.numeric`. + @inlinable public static var hostOrder: Self { return .numeric } + + /// A synonym for `.binary`. + @inlinable public static var networkOrder: Self { return .binary } +} + + +// -------------------------------------------- +// MARK: - IPv6 +// -------------------------------------------- + + +/// A 128-bit numerical identifier assigned to a device on an +/// [Internet Protocol, version 6](https://tools.ietf.org/html/rfc2460) network. +/// +public struct IPv6Address { + + public typealias Octets = ( + UInt8, UInt8, UInt8, UInt8, UInt8, UInt8, UInt8, UInt8, + UInt8, UInt8, UInt8, UInt8, UInt8, UInt8, UInt8, UInt8 + ) + + /// The octets of this address. + public var octets: Octets + + /// Creates an address with the given octets. + @inlinable + public init(octets: Octets = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)) { + self.octets = octets + } + + public typealias Pieces = (UInt16, UInt16, UInt16, UInt16, UInt16, UInt16, UInt16, UInt16) + + /// Creates an address from the given 16-bit integer pieces. + /// + /// - seealso: `OctetArrangement` + /// - parameters: + /// - pieces: The integer pieces of the address. + /// - octetArrangement: How the octets in each integer of `pieces` are arranged: + /// + /// + @inlinable + public init(pieces: Pieces, _ octetArrangement: OctetArrangement) { + self.init() + self[pieces: octetArrangement] = pieces + } + + /// The address, expressed as 16-bit integer pieces. + /// + /// - seealso: `OctetArrangement` + /// - parameters: + /// - octetArrangement: How the octets in each integer of `pieces` are arranged: + /// + /// + @inlinable + public subscript(pieces octetArrangement: OctetArrangement) -> Pieces { + get { + let networkOrder = withUnsafeBytes(of: octets) { $0.load(as: Pieces.self) } + switch octetArrangement { + case .binary: + return networkOrder + case .numeric: + return ( + UInt16(bigEndian: networkOrder.0), UInt16(bigEndian: networkOrder.1), UInt16(bigEndian: networkOrder.2), + UInt16(bigEndian: networkOrder.3), UInt16(bigEndian: networkOrder.4), UInt16(bigEndian: networkOrder.5), + UInt16(bigEndian: networkOrder.6), UInt16(bigEndian: networkOrder.7) + ) + } + } + set { + switch octetArrangement { + case .binary: + withUnsafeBytes(of: newValue) { src in + withUnsafeMutableBytes(of: &octets) { dst in + dst.copyBytes(from: src) + } + } + case .numeric: + self[pieces: .binary] = ( + newValue.0.bigEndian, newValue.1.bigEndian, newValue.2.bigEndian, + newValue.3.bigEndian, newValue.4.bigEndian, newValue.5.bigEndian, + newValue.6.bigEndian, newValue.7.bigEndian + ) + } + } + } +} + +// Standard protocols. + +extension IPv6Address: Equatable, Hashable, LosslessStringConvertible { + + @inlinable + public static func == (lhs: Self, rhs: Self) -> Bool { + return withUnsafeBytes(of: lhs.octets) { lhsBytes in + return withUnsafeBytes(of: rhs.octets) { rhsBytes in + return lhsBytes.elementsEqual(rhsBytes) + } + } + } + + @inlinable + public func hash(into hasher: inout Hasher) { + withUnsafeBytes(of: octets) { hasher.combine(bytes: $0) } + } + + @inlinable + public var description: String { + return serialized + } +} + +extension IPv6Address: Codable { + + public init(from decoder: Decoder) throws { + let container = try decoder.singleValueContainer() + let string = try container.decode(String.self) + guard let parsedValue = IPv6Address(string) else { + throw DecodingError.dataCorruptedError( + in: container, + debugDescription: "Invalid IPv6 Address" + ) + } + self = parsedValue + } + + public func encode(to encoder: Encoder) throws { + var container = encoder.singleValueContainer() + try container.encode(self.serialized) + } +} + +// Parsing. + +extension IPv6Address { + + /// Parses an IPv6 address from a String. + /// + /// Accepted formats are documented in [Section 2.2][rfc4291] ("Text Representation of Addresses") of + /// IP Version 6 Addressing Architecture (RFC 4291). + /// + /// [rfc4291]: https://tools.ietf.org/html/rfc4291#section-2.2 + /// + /// - parameters: + /// - description: The string to parse. + /// + @inlinable @inline(__always) + public init?(_ description: S) where S: StringProtocol { + self.init(utf8: description.utf8) + } + + /// Parses an IPv6 address from a collection of UTF-8 code-units. + /// + /// Accepted formats are documented in [Section 2.2][rfc4291] ("Text Representation of Addresses") of + /// IP Version 6 Addressing Architecture (RFC 4291). + /// + /// [rfc4291]: https://tools.ietf.org/html/rfc4291#section-2.2 + /// + /// - parameters: + /// - utf8: The string to parse, as a collection of UTF-8 code-units. + /// + @inlinable @inline(__always) + public init?(utf8: UTF8Bytes) where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8 { + var callback = IgnoreIPAddressParserErrors() + let _parsed = + utf8.withContiguousStorageIfAvailable { + IPv6Address.parse(utf8: $0.withoutTrappingOnIndexOverflow, callback: &callback) + } ?? IPv6Address.parse(utf8: utf8, callback: &callback) + guard let parsed = _parsed else { + return nil + } + self = parsed + } +} + +extension IPv6Address { + + @inlinable + internal static func parse( + utf8: UTF8Bytes, callback: inout Callback + ) -> Self? where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8, Callback: IPAddressParserCallback { + + guard utf8.isEmpty == false else { + callback.validationError(ipv6: .emptyInput) + return nil + } + + var _parsedPieces: IPv6Address.Pieces = (0, 0, 0, 0, 0, 0, 0, 0) + return withUnsafeMutableBufferPointerToElements(tuple: &_parsedPieces) { parsedPieces -> Self? in + var pieceIndex = 0 + var expandFrom = -1 // The index of the piece after the compressed range. -1 means no compression. + var idx = utf8.startIndex + + if utf8[idx] == ASCII.colon.codePoint { + idx = utf8.index(after: idx) + guard idx < utf8.endIndex, utf8[idx] == ASCII.colon.codePoint else { + callback.validationError(ipv6: .unexpectedLeadingColon) + return nil + } + idx = utf8.index(after: idx) + pieceIndex &+= 1 + expandFrom = pieceIndex + } + + parseloop: while idx < utf8.endIndex { + guard pieceIndex != 8 else { + callback.validationError(ipv6: .tooManyPieces) + return nil + } + guard utf8[idx] != ASCII.colon.codePoint else { + guard expandFrom == -1 else { + callback.validationError(ipv6: .multipleCompressedPieces) + return nil + } + idx = utf8.index(after: idx) + pieceIndex &+= 1 + expandFrom = pieceIndex + continue parseloop + } + // Parse a hex-numeric value. + // Byte-swap if necessary so the octets represent the same numeric value in network byte order. + let pieceStartIndex = idx + var value: UInt16 = 0 + var length: UInt8 = 0 + while length < 4, idx < utf8.endIndex, let numberValue = ASCII(utf8[idx])?.hexNumberValue { + value <<= 4 + value &+= UInt16(numberValue) + length &+= 1 + idx = utf8.index(after: idx) + } + value = value.bigEndian + // After the numeric value. + // - endIndex signifies the final piece. + // - ':' signifies the end of the piece, start of the next piece. + // - '.' signifies that number we just parsed is part of an embedded IPv4 address. Rewind and parse it as IPv4. + guard idx < utf8.endIndex else { + parsedPieces[pieceIndex] = value + pieceIndex &+= 1 + break parseloop + } + guard utf8[idx] != ASCII.colon.codePoint else { + parsedPieces[pieceIndex] = value + pieceIndex &+= 1 + idx = utf8.index(after: idx) + guard idx < utf8.endIndex else { + callback.validationError(ipv6: .unexpectedTrailingColon) + return nil + } + continue parseloop + } + guard utf8[idx] != ASCII.period.codePoint else { + guard length != 0 else { + callback.validationError(ipv6: .unexpectedPeriod) + return nil + } + guard !(pieceIndex > 6) else { + callback.validationError(ipv6: .invalidPositionForIPv4Address) + return nil + } + let addressRange = Range(uncheckedBounds: (pieceStartIndex, utf8.endIndex)) + guard let embeddedIPv4Address = IPv4Address(dottedDecimalUTF8: utf8[addressRange]) else { + callback.validationError(ipv6: .invalidIPv4Address) + return nil + } + withUnsafeBytes(of: embeddedIPv4Address.octets) { octetBuffer in + // After binding memory, the local variable `value` is poisoned and must not be used again. + // However, we may still copy its values through the bound pointer and abandon the local variable. + let uint16s = octetBuffer.bindMemory(to: UInt16.self) + parsedPieces.baseAddress.unsafelyUnwrapped.advanced(by: pieceIndex) + .assign(from: uint16s.baseAddress.unsafelyUnwrapped, count: 2) + } + pieceIndex &+= 2 + + break parseloop + } + callback.validationError(ipv6: .unexpectedCharacter) + return nil + } + + if expandFrom != -1 { + // Shift the pieces from 'expandFrom' out towards the end, by swapping with the zeroes already there. + var swaps = pieceIndex &- expandFrom + pieceIndex = 7 + while pieceIndex != 0, swaps > 0 { + let destinationPiece = expandFrom &+ swaps &- 1 + // Manual swap leads to suprisingly better codegen than 'swapAt': https://github.com/apple/swift/pull/36864 + let tmp = parsedPieces[pieceIndex] + parsedPieces[pieceIndex] = parsedPieces[destinationPiece] + parsedPieces[destinationPiece] = tmp + pieceIndex &-= 1 + swaps &-= 1 + } + } else { + guard pieceIndex == 8 else { + callback.validationError(ipv6: .notEnoughPieces) + return nil + } + } + + // Parsing successful. + // Rather than returning a success/failure flag and creating an address from our original `resultTuple`, + // load a new tuple via the pointer and return the constructed IP address. + // Doing so saves a huge amount of byte-shuffling due to outlining. + return IPv6Address( + octets: UnsafeRawPointer(parsedPieces.baseAddress.unsafelyUnwrapped).load(as: IPv6Address.Octets.self) + ) + } + } + + @usableFromInline + internal struct ParserError { + + @usableFromInline + internal let errorCode: UInt8 + + @inlinable + internal init(errorCode: UInt8) { + self.errorCode = errorCode + } + + /// Empty input. + @inlinable internal static var emptyInput: Self { Self(errorCode: 1) } + /// Unexpected lone ':' at start of address. + @inlinable internal static var unexpectedLeadingColon: Self { Self(errorCode: 2) } + /// Unexpected lone ':' at end of address. + @inlinable internal static var unexpectedTrailingColon: Self { Self(errorCode: 3) } + /// Unexpected '.' in address segment. + @inlinable internal static var unexpectedPeriod: Self { Self(errorCode: 4) } + /// Unexpected character after address segment. + @inlinable internal static var unexpectedCharacter: Self { Self(errorCode: 5) } + /// Too many pieces in address. + @inlinable internal static var tooManyPieces: Self { Self(errorCode: 6) } + /// Not enough pieces in address. + @inlinable internal static var notEnoughPieces: Self { Self(errorCode: 7) } + /// Multiple compressed pieces in address. + @inlinable internal static var multipleCompressedPieces: Self { Self(errorCode: 8) } + /// Invalid position for embedded IPv4 address + @inlinable internal static var invalidPositionForIPv4Address: Self { Self(errorCode: 9) } + /// Embedded IPv4 address is invalid. + @inlinable internal static var invalidIPv4Address: Self { Self(errorCode: 10) } + } +} + +// Serialization. + +extension IPv6Address { + + /// The canonical textual representation of this address, as defined by [RFC 5952](https://tools.ietf.org/html/rfc5952). + /// + public var serialized: String { + var direct = serializedDirect + return withUnsafeBytes(of: &direct.buffer) { + String(decoding: $0.prefix(Int(direct.count)), as: UTF8.self) + } + } + + @usableFromInline + internal var serializedDirect: (buffer: (UInt64, UInt64, UInt64, UInt64, UInt64), count: UInt8) { + + // Maximum length of an IPv6 address = 39 bytes. + // Note that this differs from libc's INET6_ADDRSTRLEN which is 46 because inet_ntop writes + // embedded IPv4 addresses in dotted-decimal notation, but RFC 5952 doesn't require that: + // https://tools.ietf.org/html/rfc5952#section-5 + var _stringBuffer: (UInt64, UInt64, UInt64, UInt64, UInt64) = (0, 0, 0, 0, 0) + let count = withUnsafeMutableBytes(of: &_stringBuffer) { stringBuffer -> Int in + withUnsafeBufferPointerToElements(tuple: self[pieces: .numeric]) { piecesBuffer -> Int in + // Look for ranges of consecutive zeroes. + let compressedPieces: Range + let compressedRangeResult = piecesBuffer._longestSubrange(equalTo: 0) + if compressedRangeResult.length > 1 { + compressedPieces = compressedRangeResult.subrange + } else { + compressedPieces = -1 ..< -1 + } + + var stringIndex = 0 + var pieceIndex = 0 + while pieceIndex < 8 { + // Skip compressed pieces. + if pieceIndex == compressedPieces.lowerBound { + stringBuffer[stringIndex] = ASCII.colon.codePoint + stringIndex &+= 1 + if pieceIndex == 0 { + stringBuffer[stringIndex] = ASCII.colon.codePoint + stringIndex &+= 1 + } + pieceIndex = compressedPieces.upperBound + continue + } + // Print the piece and, if not the last piece, the separator. + let bytesWritten = ASCII.writeHexString( + for: piecesBuffer[pieceIndex], + to: stringBuffer.baseAddress.unsafelyUnwrapped + stringIndex + ) + stringIndex &+= Int(bytesWritten) + if pieceIndex != 7 { + stringBuffer[stringIndex] = ASCII.colon.codePoint + stringIndex &+= 1 + } + pieceIndex &+= 1 + } + return stringIndex + } + } + assert((0...39).contains(count)) + return (_stringBuffer, UInt8(truncatingIfNeeded: count)) + } +} + + +// -------------------------------------------- +// MARK: - IPv4 +// -------------------------------------------- + + +/// A 32-bit numerical identifier assigned to a device on an +/// [Internet Protocol, version 4](https://tools.ietf.org/html/rfc791) network. +/// +public struct IPv4Address { + + public typealias Octets = (UInt8, UInt8, UInt8, UInt8) + + /// The octets of this address. + public var octets: Octets + + /// Creates an address with the given octets. + public init(octets: Octets = (0, 0, 0, 0)) { + self.octets = octets + } + + /// Creates an address with the given 32-bit integer value. + /// + /// - seealso: `OctetArrangement` + /// - parameters: + /// - value: The integer value of the address. + /// - octetArrangement: How the octets of `value` are arranged: + ///
    + ///
  • If `numeric`, the integer is assumed to be in "host byte order", and its octets will be rearranged if necessary.
  • + ///
  • If `binary`, the integer is assumed to be in "network byte order", and its octets will be stored in the order + /// they are in.
  • + ///
+ /// + public init(value: UInt32, _ octetArrangement: OctetArrangement) { + self.init() + self[value: octetArrangement] = value + } + + /// The address, expressed as 16-bit integer pieces. + /// + /// - seealso: `OctetArrangement` + /// - parameters: + /// - octetArrangement: How the octets of `value` are arranged: + ///
    + ///
  • If `numeric`, the integer is assumed to be in "host byte order", and its octets will be rearranged if necessary.
  • + ///
  • If `binary`, the integer is assumed to be in "network byte order", and its octets will be stored in the order + /// they are in.
  • + ///
+ /// + public subscript(value octetArrangement: OctetArrangement) -> UInt32 { + get { + let networkOrder = withUnsafeBytes(of: octets) { $0.load(as: UInt32.self) } + switch octetArrangement { + case .binary: + return networkOrder + case .numeric: + return UInt32(bigEndian: networkOrder) + } + } + set { + switch octetArrangement { + case .binary: + withUnsafeBytes(of: newValue) { src in + withUnsafeMutableBytes(of: &octets) { dst in + dst.copyBytes(from: src) + } + } + case .numeric: + self[value: .binary] = newValue.bigEndian + } + } + } +} + +// Standard protocols. + +extension IPv4Address: Equatable, Hashable, LosslessStringConvertible { + + @inlinable + public static func == (lhs: Self, rhs: Self) -> Bool { + return withUnsafeBytes(of: lhs.octets) { lhsBytes in + return withUnsafeBytes(of: rhs.octets) { rhsBytes in + return lhsBytes.elementsEqual(rhsBytes) + } + } + } + + @inlinable + public func hash(into hasher: inout Hasher) { + withUnsafeBytes(of: octets) { hasher.combine(bytes: $0) } + } + + public var description: String { + return serialized + } +} + +extension IPv4Address: Codable { + + public init(from decoder: Decoder) throws { + let container = try decoder.singleValueContainer() + let string = try container.decode(String.self) + guard let parsedValue = Self(string) else { + throw DecodingError.dataCorruptedError( + in: container, + debugDescription: "Invalid IPv4 Address" + ) + } + self = parsedValue + } + + public func encode(to encoder: Encoder) throws { + var container = encoder.singleValueContainer() + try container.encode(self.serialized) + } +} + +// Parsing. + +extension IPv4Address { + + /// Parses an IPv4 address from a String. + /// + /// The following formats are recognized: + /// + /// - _a.b.c.d_, where each numeric part defines the value of the address' octet at that position. + /// - _a.b.c_, where _a_ and _b_ define the address' first 2 octets, and _c_ is interpreted as a 16-bit integer whose most and least significant bytes define + /// the address' 3rd and 4th octets respectively. + /// - _a.b_, where _a_ defines the address' first octet, and _b_ is interpreted as a 24-bit integer whose bytes define the remaining octets from most to least + /// significant. + /// - _a_, where _a_ is interpreted as a 32-bit integer whose bytes define the octets of the address in order from most to least significant. + /// + /// The numeric parts may be written in decimal, octal (prefixed with a `0`), or hexadecimal (prefixed with `0x`, case-insensitive). + /// Additionally, a single trailing '.' is permitted (e.g. `a.b.c.d.`). + /// + /// Examples: + /// ``` + /// IPv4Address("0x7f.0.0.1")!.octets == (0x7f, 0x00, 0x00, 0x01) == "127.0.0.1" + /// IPv4Address("10.1.0x12.")!.octets == (0x0a, 0x01, 0x00, 0x12) == "10.1.0.18" + /// IPv4Address("0300.0xa80032")!.octets == (0xc0, 0xa8, 0x00, 0x32) == "192.168.0.50" + /// IPv4Address("0x8Badf00d")!.octets == (0x8b, 0xad, 0xf0, 0x0d) == "139.173.240.13" + /// ``` + /// + /// - parameters: + /// - description: The string to parse. + /// + @inlinable @inline(__always) + public init?(_ description: Source) where Source: StringProtocol { + self.init(utf8: description.utf8) + } + + /// Parses an IPv4 address from the given collection of UTF-8 code-units. + /// + /// The following formats are recognized: + /// + /// - _a.b.c.d_, where each numeric part defines the value of the address' octet at that position. + /// - _a.b.c_, where _a_ and _b_ define the address' first 2 octets, and _c_ is interpreted as a 16-bit integer whose most and least significant bytes define + /// the address' 3rd and 4th octets respectively. + /// - _a.b_, where _a_ defines the address' first octet, and _b_ is interpreted as a 24-bit integer whose bytes define the remaining octets from most to least + /// significant. + /// - _a_, where _a_ is interpreted as a 32-bit integer whose bytes define the octets of the address in order from most to least significant. + /// + /// The numeric parts may be written in decimal, octal (prefixed with a `0`), or hexadecimal (prefixed with `0x`, case-insensitive). + /// Additionally, a single trailing '.' is permitted (e.g. `a.b.c.d.`). + /// + /// Examples: + /// ``` + /// IPv4Address("0x7f.0.0.1".utf8)!.octets == (0x7f, 0x00, 0x00, 0x01) == "127.0.0.1" + /// IPv4Address("10.1.0x12.".utf8)!.octets == (0x0a, 0x01, 0x00, 0x12) == "10.1.0.18" + /// IPv4Address("0300.0xa80032".utf8)!.octets == (0xc0, 0xa8, 0x00, 0x32) == "192.168.0.50" + /// IPv4Address("0x8Badf00d".utf8)!.octets == (0x8b, 0xad, 0xf0, 0x0d) == "139.173.240.13" + /// ``` + /// + /// - parameters: + /// - utf8: The string to parse, as a collection of UTF-8 code-units. + /// + @inlinable @inline(__always) + public init?(utf8: UTF8Bytes) where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8 { + guard case .success(let parsed) = IPv4Address.parse(utf8: utf8) else { + return nil + } + self = parsed + } +} + +extension IPv4Address { + + /// A tri-state result which captures whether an IPv4 address failed to parse because it was invalid, + /// or whether it failed because the given string does not look like an IP address. + /// + public enum ParserResult { + + /// The string was successfully parsed as an IPv4 address. + /// + case success(IPv4Address) + + /// The string was recognized as probably being an IPv4 address, but was invalid and could not be parsed (e.g. because the value would overflow). + /// + case failure + + /// The string cannot be recognized as an IPv4 address. This is not the same as being an invalid IP address - for example, the string "9999999999.com" fails + /// to parse because the non-numeric characters "com" mean it isn't even an IP address string, whereas the string "9999999999" _is_ a properly-formatted + /// IP address string, but fails to parse because the value would overflow. + /// + /// When parsing "9999999999.com" as a hostname, it should be treated as a domain or opaque hostname rather than an invalid IP address. + /// The string "9999999999" should be treated as a invalid IP address. + /// + case notAnIPAddress + } + + /// Parses an IPv4 address from a buffer of UTF-8 codeunits, returning a tri-state `ParserResult` which is useful for parsing content which _might_ be + /// an IPv4 address. + /// + /// The following formats are recognized: + /// + /// - _a.b.c.d_, where each numeric part defines the value of the address' octet at that position. + /// - _a.b.c_, where _a_ and _b_ define the address' first 2 octets, and _c_ is interpreted as a 16-bit integer whose most and least significant bytes define + /// the address' 3rd and 4th octets respectively. + /// - _a.b_, where _a_ defines the address' first octet, and _b_ is interpreted as a 24-bit integer whose bytes define the remaining octets from most to least + /// significant. + /// - _a_, where _a_ is interpreted as a 32-bit integer whose bytes define the octets of the address in order from most to least significant. + /// + /// The numeric parts may be written in decimal, octal (prefixed with a `0`), or hexadecimal (prefixed with `0x`, case-insensitive). + /// Additionally, a single trailing '.' is permitted (e.g. `a.b.c.d.`). + /// + /// Examples: + /// ``` + /// IPv4Address("0x7f.0.0.1")!.octets == (0x7f, 0x00, 0x00, 0x01) == "127.0.0.1" + /// IPv4Address("10.1.0x12.")!.octets == (0x0a, 0x01, 0x00, 0x12) == "10.1.0.18" + /// IPv4Address("0300.0xa80032")!.octets == (0xc0, 0xa8, 0x00, 0x32) == "192.168.0.50" + /// IPv4Address("0x8Badf00d")!.octets == (0x8b, 0xad, 0xf0, 0x0d) == "139.173.240.13" + /// ``` + /// + /// - parameters: + /// - utf8: The string to parse, as a collection of UTF-8 code-units. + /// - returns: A tri-state result which captures whether the string should even be interpreted as an IPv4 address. + /// See `ParserResult` for more information. + /// + @inlinable + public static func parse( + utf8: UTF8Bytes + ) -> ParserResult where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8 { + var callback = IgnoreIPAddressParserErrors() + return utf8.withContiguousStorageIfAvailable { + parse(utf8: $0.withoutTrappingOnIndexOverflow, callback: &callback) + } ?? parse(utf8: utf8, callback: &callback) + } + + @inlinable + internal static func parse( + utf8: UTF8Bytes, callback: inout Callback + ) -> ParserResult where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8, Callback: IPAddressParserCallback { + + guard utf8.isEmpty == false else { + callback.validationError(ipv4: .emptyInput) + return .failure + } + + var _parsedPieces: (UInt32, UInt32, UInt32, UInt32) = (0, 0, 0, 0) + return withUnsafeMutableBufferPointerToElements(tuple: &_parsedPieces) { parsedPieces -> ParserResult in + var pieceIndex = -1 + var idx = utf8.startIndex + + // We need to track and continue processing numeric digits even if a piece overflows, + // because the standard works in terms of mathematical integers, not fixed-size binary integers. + // A piece overflow in a well-formatted IP-address string should return a `.failure`, + // but in a non-IP-address string, it should be ignored in favour of a `.notAnIPAddress` result. + // For example, the string "10000000000.com" should return `.notAnIPAddress` due to the `.com`, + // not a `.failure` due to overflow. + var pieceDidOverflow = false + + while idx < utf8.endIndex { + var value: UInt32 = 0 + var radix: UInt32 = 10 + + guard let firstCharInPiece = ASCII(utf8[idx]), ASCII.ranges.digits.contains(firstCharInPiece) else { + callback.validationError(ipv4: .pieceBeginsWithInvalidCharacter) + return .notAnIPAddress + } + if firstCharInPiece == ASCII.n0 { + idx = utf8.index(after: idx) + if idx < utf8.endIndex { + switch utf8[idx] { + case ASCII.x.codePoint, ASCII.X.codePoint: + radix = 16 + idx = utf8.index(after: idx) + default: + radix = 8 + } + } + } + while idx < utf8.endIndex, let numericValue = ASCII(utf8[idx])?.hexNumberValue { + guard numericValue < radix else { + callback.validationError(ipv4: .pieceContainsInvalidCharacterForRadix) + return .notAnIPAddress + } + var (overflowM, overflowA) = (false, false) + (value, overflowM) = value.multipliedReportingOverflow(by: radix) + (value, overflowA) = value.addingReportingOverflow(UInt32(numericValue)) + if overflowM || overflowA { + pieceDidOverflow = true + } + idx = utf8.index(after: idx) + } + // Set value for piece. + // Note that we do not flip to network byte order as we still want to process these numerically. + guard pieceIndex < 3 else { + callback.validationError(ipv4: .tooManyPieces) + return .notAnIPAddress + } + pieceIndex &+= 1 + parsedPieces[pieceIndex] = value + // Allow one trailing '.' after the piece, even if it's the last piece. + guard idx < utf8.endIndex, utf8[idx] == ASCII.period.codePoint else { + break + } + idx = utf8.index(after: idx) + } + + guard idx == utf8.endIndex else { + callback.validationError(ipv4: .unexpectedTrailingCharacter) + return .notAnIPAddress + } + guard pieceDidOverflow == false else { + callback.validationError(ipv4: .pieceOverflows) + return .failure + } + + var numericAddress: UInt32 = 0 + // swift-format-ignore + switch pieceIndex { + case 0: // 'a' - 32-bits. + numericAddress = parsedPieces[0] + case 1: // 'a.b' - 8-bits/24-bits. + var hasInvalid = parsedPieces[0] & ~0x0000_00FF + hasInvalid |= parsedPieces[1] & ~0x00FF_FFFF + guard hasInvalid == 0 else { + callback.validationError(ipv4: .addressOverflows) + return .failure + } + numericAddress = (parsedPieces[0] << 24) | parsedPieces[1] + case 2: // 'a.b.c' - 8-bits/8-bits/16-bits. + var hasInvalid = parsedPieces[0] & ~0x0000_00FF + hasInvalid |= parsedPieces[1] & ~0x0000_00FF + hasInvalid |= parsedPieces[2] & ~0x0000_FFFF + guard hasInvalid == 0 else { + callback.validationError(ipv4: .addressOverflows) + return .failure + } + numericAddress = (parsedPieces[0] << 24) | (parsedPieces[1] << 16) | parsedPieces[2] + case 3: // 'a.b.c.d' - 8-bits/8-bits/8-bits/8-bits. + var hasInvalid = parsedPieces[0] & ~0x0000_00FF + hasInvalid |= parsedPieces[1] & ~0x0000_00FF + hasInvalid |= parsedPieces[2] & ~0x0000_00FF + hasInvalid |= parsedPieces[3] & ~0x0000_00FF + guard hasInvalid == 0 else { + callback.validationError(ipv4: .addressOverflows) + return .failure + } + numericAddress = (parsedPieces[0] << 24) | (parsedPieces[1] << 16) | (parsedPieces[2] << 8) | parsedPieces[3] + default: + fatalError("Internal error. pieceIndex not in 0...3") + } + // Parsing successful. + return .success(IPv4Address(value: numericAddress, .numeric)) + } + } + + @usableFromInline + internal struct ParserError { + + @usableFromInline + internal let errorCode: UInt8 + + @inlinable + internal init(errorCode: UInt8) { + self.errorCode = errorCode + } + + /// Empty input. + @inlinable internal static var emptyInput: Self { Self(errorCode: 1) } + /// Piece begins with invalid character. + @inlinable internal static var pieceBeginsWithInvalidCharacter: Self { Self(errorCode: 2) } + /// Piece contains invalid character for radix. + @inlinable internal static var pieceContainsInvalidCharacterForRadix: Self { Self(errorCode: 3) } + /// Unexpected character at end of address. + @inlinable internal static var unexpectedTrailingCharacter: Self { Self(errorCode: 4) } + /// Invalid IPv4 address segment. Unexpected character. + @inlinable internal static var invalidCharacter: Self { Self(errorCode: 5) } + /// Piece overflows. + @inlinable internal static var pieceOverflows: Self { Self(errorCode: 6) } + /// Address overflows + @inlinable internal static var addressOverflows: Self { Self(errorCode: 7) } + /// Too many pieces in address. + @inlinable internal static var tooManyPieces: Self { Self(errorCode: 8) } + /// Incorrect number of pieces in address. + @inlinable internal static var notEnoughPieces: Self { Self(errorCode: 9) } + } +} + +extension IPv4Address { + + /// Parses an IPv4 address from a String. + /// + /// This simplified parser only recognises the 4-piece decimal notation ("a.b.c.d"), also known as dotted-decimal notation. + /// + /// - parameters: + /// - string: The string to parse. + /// + @inlinable + public init?(dottedDecimal string: S) where S: StringProtocol { + let _parsed = + string.utf8.withContiguousStorageIfAvailable { + IPv4Address(dottedDecimalUTF8: $0.withoutTrappingOnIndexOverflow) + } ?? IPv4Address(dottedDecimalUTF8: string.utf8) + guard let parsed = _parsed else { return nil } + self = parsed + } + + /// Parses an IPv4 address from a buffer of UTF-8 codeunits. + /// + /// This simplified parser only recognises the 4-piece decimal notation ("a.b.c.d"), also known as dotted-decimal notation. + /// + /// - parameters: + /// - utf8: The string to parse, as a collection of UTF-8 code-units. + /// + @inlinable + public init?(dottedDecimalUTF8 utf8: UTF8Bytes) where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8 { + + var numericAddress = UInt32(0) + var idx = utf8.startIndex + var numbersSeen = 0 + while idx < utf8.endIndex { + if numbersSeen != 0 { + guard ASCII(utf8[idx]) == .period else { + return nil // invalid character. + } + guard numbersSeen < 4 else { + return nil // too many pieces. + } + idx = utf8.index(after: idx) + } + var ipv4Piece = -1 // -1 means "no digits parsed". + while idx < utf8.endIndex, let digit = ASCII(utf8[idx])?.decimalNumberValue { + switch ipv4Piece { + case -1: + ipv4Piece = Int(digit) + case 0: + return nil // leading 0 - unsupported radix. + default: + ipv4Piece &*= 10 + ipv4Piece &+= Int(digit) + } + guard ipv4Piece < 256 else { + return nil // piece overflow. + } + idx = utf8.index(after: idx) + } + guard ipv4Piece > -1 else { + return nil // piece does not begin with a decimal digit. + } + numericAddress &<<= 8 + numericAddress &+= UInt32(ipv4Piece) + numbersSeen &+= 1 + } + guard numbersSeen == 4 else { + return nil // not enough pieces. + } + self = IPv4Address(value: numericAddress, .numeric) + } +} + +// Serialization. + +extension IPv4Address { + + /// The textual representation of this address, in dotted decimal notation, as defined by [RFC 4001](https://tools.ietf.org/html/rfc4001#page-7). + /// + public var serialized: String { + var direct = serializedDirect + return withUnsafeBytes(of: &direct.buffer) { + String(decoding: $0.prefix(Int(direct.count)), as: UTF8.self) + } + } + + @usableFromInline + internal var serializedDirect: (buffer: (UInt64, UInt64), count: UInt8) { + + // The maximum length of an IPv4 address in decimal notation ("XXX.XXX.XXX.XXX") is 15 bytes. + // We write one-too-many separators and chop it off at the end, so 16 bytes are needed. + var _stringBuffer: (UInt64, UInt64) = (0, 0) + let count = withUnsafeMutableBytes(of: &_stringBuffer) { stringBuffer -> Int in + return withUnsafeBytes(of: octets) { octetBytes -> Int in + var stringBufferIdx = stringBuffer.startIndex + for i in 0..<4 { + let bytesWritten = ASCII.writeDecimalString( + for: octetBytes[i], + to: stringBuffer.baseAddress.unsafelyUnwrapped + stringBufferIdx + ) + stringBufferIdx &+= Int(bytesWritten) + stringBuffer[stringBufferIdx] = ASCII.period.codePoint + stringBufferIdx &+= 1 + } + return stringBufferIdx &- 1 + } + } + assert((0...15).contains(count)) + return (_stringBuffer, UInt8(truncatingIfNeeded: count)) + } +} diff --git a/Sources/WebURL/Parser/Parser+Host.swift b/Sources/WebURL/Parser/Parser+Host.swift new file mode 100644 index 000000000..0bc572aec --- /dev/null +++ b/Sources/WebURL/Parser/Parser+Host.swift @@ -0,0 +1,282 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// A description of a host that has been parsed from a string. +/// +/// - seealso: `ParsedURLString` +/// +@usableFromInline +internal enum ParsedHost { + case asciiDomain(_ASCIIDomainInfo) + case ipv4Address(IPv4Address) + case ipv6Address(IPv6Address) + case opaque(_OpaqueHostnameInfo) + case empty +} + + +// -------------------------------------------- +// MARK: - Parsing +// -------------------------------------------- + + +extension ParsedHost { + + /// Parses the given hostname to determine what kind of host it is, and whether or not it is valid. + /// The created `ParsedHost` object may then be used to write a normalized/encoded version of the hostname. + /// + @inlinable + internal init?( + _ hostname: UTF8Bytes, schemeKind: WebURL.SchemeKind, callback: inout Callback + ) where UTF8Bytes: BidirectionalCollection, UTF8Bytes.Element == UInt8, Callback: URLParserCallback { + + guard hostname.isEmpty == false else { + self = .empty + return + } + + var ipv6Slice = hostname[...] + if ipv6Slice.removeFirst() == ASCII.leftSquareBracket.codePoint { + guard ipv6Slice.popLast() == ASCII.rightSquareBracket.codePoint else { + callback.validationError(.unclosedIPv6Address) + return nil + } + guard let result = IPv6Address(utf8: ipv6Slice) else { + callback.validationError(.invalidIPv6Address) + return nil + } + self = .ipv6Address(result) + return + } + + guard schemeKind.isSpecial else { + guard let hostnameInfo = ParsedHost._tryParseOpaqueHostname(hostname, callback: &callback) else { + return nil + } + self = .opaque(hostnameInfo) + return + } + + let domain = hostname.lazy.percentDecodedUTF8 + + // TODO: [idna] + // + // > 6. Let asciiDomain be the result of running domain to ASCII on domain. + // > 7. If asciiDomain is failure, validation error, return failure. + // + // We don't have IDNA, so we need to reject: + // - domains with non-ASCII characters + // - domains which are ASCII but have IDNA-encoded "labels" (dot-separated components). + // These require validation which we can't do yet. + // + // Which should leave us with pure ASCII domains, which don't depend on Unicode at all. + // For these, IDNA normalization/encoding is just lowercasing. At least that's something we can do... + + let (_, _asciiDomainInfo) = ParsedHost._tryParseASCIIDomain(domain, callback: &callback) + guard let asciiDomainInfo = _asciiDomainInfo else { + return nil + } + let asciiDomain = domain + + switch IPv4Address.parse(utf8: asciiDomain) { + case .success(let address): + self = .ipv4Address(address) + return + case .failure: + callback.validationError(.invalidIPv4Address) + return nil + case .notAnIPAddress: + break + } + + if schemeKind == .file, ASCII.Lowercased(asciiDomain).elementsEqual("localhost".utf8) { + self = .empty + return + } + self = .asciiDomain(asciiDomainInfo) + } + + /// Parses the given opaque hostname, returning an `_OpaqueHostnameInfo` if the hostname is valid. + /// + @inlinable + internal static func _tryParseOpaqueHostname( + _ hostname: UTF8Bytes, callback: inout Callback + ) -> _OpaqueHostnameInfo? where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8, Callback: URLParserCallback { + + guard hostname.isEmpty == false else { + return nil + } + var hostnameInfo = _OpaqueHostnameInfo(needsPercentEncoding: false, encodedCount: 0) + for byte in hostname { + hostnameInfo.encodedCount += 1 + guard let asciiChar = ASCII(byte) else { + hostnameInfo.needsPercentEncoding = true + hostnameInfo.encodedCount += 2 + continue // Non-ASCII codepoints checked by 'validateURLCodePointsAndPercentEncoding', are not fatal. + } + if asciiChar.isForbiddenHostCodePoint, asciiChar != .percentSign { + callback.validationError(.hostForbiddenCodePoint) + return nil + } + if PercentEncodeSet.C0Control.shouldPercentEncode(ascii: asciiChar.codePoint) { + hostnameInfo.needsPercentEncoding = true + hostnameInfo.encodedCount += 2 + } + } + validateURLCodePointsAndPercentEncoding(utf8: hostname, callback: &callback) + return hostnameInfo + } + + /// Parses the given domain, returning an `_ASCIIDomainInfo` if the domain is a valid, ASCII domain. + /// + /// If the returned value's `domainInfo` is `nil`, parsing may have failed because the domain contained non-ASCII codepoints, or + /// some of its labels were IDNA-encoded. In this case, the returned value's `mayBeValidIDNA` flag will be set to `true`, and the host-parser + /// should try to parse the domain using IDNA. + /// + @inlinable + internal static func _tryParseASCIIDomain( + _ domain: LazilyPercentDecodedUTF8WithoutSubstitutions, callback: inout Callback + ) -> (mayBeValidIDNA: Bool, domainInfo: _ASCIIDomainInfo?) + where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8, Callback: URLParserCallback { + + var domainInfo = _ASCIIDomainInfo(decodedCount: 0, needsDecodeOrLowercasing: false) + + guard hasIDNAPrefix(utf8: domain) == false else { + callback.validationError(.domainToASCIIFailure) + return (true, nil) + } + var i = domain.startIndex + while i < domain.endIndex { + guard let char = ASCII(domain[i]) else { + callback.validationError(.domainToASCIIFailure) + return (true, nil) + } + if char.isForbiddenHostCodePoint { + callback.validationError(.hostForbiddenCodePoint) + return (false, nil) + } + domainInfo.needsDecodeOrLowercasing = domainInfo.needsDecodeOrLowercasing || i.isDecoded || char.isUppercaseAlpha + domainInfo.decodedCount &+= 1 + domain.formIndex(after: &i) + if char == .period { + guard hasIDNAPrefix(utf8: domain[i...]) == false else { + callback.validationError(.domainToASCIIFailure) + return (true, nil) + } + } + } + return (false, domainInfo) + } +} + +@usableFromInline +internal struct _ASCIIDomainInfo { + + @usableFromInline + internal var decodedCount: Int + + @usableFromInline + internal var needsDecodeOrLowercasing: Bool + + @inlinable + internal init(decodedCount: Int, needsDecodeOrLowercasing: Bool) { + self.decodedCount = decodedCount + self.needsDecodeOrLowercasing = needsDecodeOrLowercasing + } +} + +@usableFromInline +internal struct _OpaqueHostnameInfo { + + @usableFromInline + internal var needsPercentEncoding: Bool + + @usableFromInline + internal var encodedCount: Int + + @inlinable + internal init(needsPercentEncoding: Bool, encodedCount: Int) { + self.needsPercentEncoding = needsPercentEncoding + self.encodedCount = encodedCount + } +} + + +// -------------------------------------------- +// MARK: - Writing +// -------------------------------------------- + + +extension ParsedHost { + + /// Writes a normalized hostname using the given `Writer` instance. + /// `bytes` must be the same collection this `ParsedHost` was created for. + /// + @inlinable + internal func write( + bytes: UTF8Bytes, using writer: inout Writer + ) where UTF8Bytes: BidirectionalCollection, UTF8Bytes.Element == UInt8, Writer: HostnameWriter { + + switch self { + case .empty: + writer.writeHostname(lengthIfKnown: 0) { $0(EmptyCollection()) } + + case .asciiDomain(let domainInfo): + if domainInfo.needsDecodeOrLowercasing { + writer.writeHostname(lengthIfKnown: domainInfo.decodedCount) { + $0(ASCII.Lowercased(bytes.lazy.percentDecodedUTF8)) + } + } else { + writer.writeHostname(lengthIfKnown: domainInfo.decodedCount) { + $0(bytes) + } + } + + case .opaque(let hostnameInfo): + if hostnameInfo.needsPercentEncoding { + writer.writeHostname(lengthIfKnown: hostnameInfo.encodedCount) { writePiece in + _ = bytes.lazy.percentEncodedGroups(as: \.c0Control).write(to: writePiece) + } + } else { + writer.writeHostname(lengthIfKnown: hostnameInfo.encodedCount) { writePiece in + writePiece(bytes) + } + } + + // For IPv4/v6 addresses, it's actually faster not to use 'lengthIfKnown' because it means + // hoisting '.serializedDirect' outside of 'writeHostname', and for some reason that's a lot slower. + case .ipv4Address(let addr): + writer.writeHostname(lengthIfKnown: nil) { (writePiece: (UnsafeRawBufferPointer) -> Void) in + var serialized = addr.serializedDirect + withUnsafeBytes(of: &serialized.buffer) { bufferBytes in + writePiece(UnsafeRawBufferPointer(start: bufferBytes.baseAddress, count: Int(serialized.count))) + } + } + + case .ipv6Address(let addr): + writer.writeHostname(lengthIfKnown: nil) { (writePiece: (UnsafeRawBufferPointer) -> Void) in + var serialized = addr.serializedDirect + withUnsafeBytes(of: &serialized.buffer) { bufferBytes in + var bracket = ASCII.leftSquareBracket.codePoint + withUnsafeMutableBytes(of: &bracket) { bracketPtr in + writePiece(UnsafeRawBufferPointer(bracketPtr)) + writePiece(UnsafeRawBufferPointer(start: bufferBytes.baseAddress, count: Int(serialized.count))) + bracketPtr.storeBytes(of: ASCII.rightSquareBracket.codePoint, as: UInt8.self) + writePiece(UnsafeRawBufferPointer(bracketPtr)) + } + } + } + } + } +} diff --git a/Sources/WebURL/Parser/Parser+Path.swift b/Sources/WebURL/Parser/Parser+Path.swift new file mode 100644 index 000000000..9446466e6 --- /dev/null +++ b/Sources/WebURL/Parser/Parser+Path.swift @@ -0,0 +1,971 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// An object which parses a path string. +/// +/// This protocol is an implementation detail. It defines a group of callbacks which are invoked by the `walkPathComponents` method +/// and should not be called directly. Conforming types implement these callbacks, as well as another method/initializer which invokes `walkPathComponents` to +/// compute any information the type requires about the path. +/// +/// Conforming types should be aware that components are visited in reverse order. Given a path "a/b/c", the components visited would be "c", "b", and finally "a". +/// All of the visited path components are present in the simplified path string, with all pushing/popping handled internally by the `walkPathComponents` method. +/// +/// Visited path components may originate from 2 sources: +/// +/// - They may be slices of some string given as an input. In order to be written as a normalized path string, they must +/// be percent-encoded and adjustments must be made to Windows drive letters. +/// - They may be slices of an existing, normalized path string, coming from a URL object given as the "base URL". In this case, things are a bit easier - +/// These components require no further processing before they are incorporated in to a normalized path string, and are known to exist in contiguous storage. +/// +@usableFromInline +internal protocol _PathParser { + associatedtype InputString: BidirectionalCollection where InputString.Element == UInt8 + + /// A callback which is invoked when the parser yields a path component originating from the input string. + /// These components may not be contiguously stored and require percent-encoding before writing. + /// + /// - parameters: + /// - pathComponent: The path component yielded by the parser. + /// - isWindowsDriveLetter: If `true`, the component is a Windows drive letter in a `file:` URL. + /// It should be normalized when written (by writing the first byte followed by the ASCII character `:`). + /// + mutating func visitInputPathComponent(_ pathComponent: InputString.SubSequence, isWindowsDriveLetter: Bool) + + /// A callback which is invoked when the parser yields a path component originating from the base URL's path. + /// These components are known to be contiguously stored, properly percent-encoded, and any Windows drive letters will already have been normalized. + /// They need no further processing, and may be written to the result as-is. + /// + /// - parameters: + /// - pathComponent: The path component yielded by the parser. + /// + mutating func visitBasePathComponent(_ pathComponent: WebURL.UTF8View.SubSequence) + + /// A callback which is invoked when the parser yields a number of consecutive empty path components. + /// Note that this does not imply that path components yielded via other callbacks are non-empty. + /// + /// This method exists as an optimisation, since empty components have no content to percent-encode/transform. + /// + mutating func visitEmptyPathComponents(_ n: Int) + + /// A callback which is invoked when the parser yields a path sigil ("/.") in order to disambiguate a path with leading slashes from an authority. + /// Conformers should ensure that a "/." is prepended to the path string if it is written to a URL without an authority sigil. + /// + /// If a sigil is yielded, it is always the very start of the path and the parser will not yield any components after this. + /// + mutating func visitPathSigil() + + /// An optional callback which is invoked when the parser encounters a non-fatal URL syntax oddity. + /// + /// The default implementation does nothing. + /// + mutating func visitValidationError(_ error: ValidationError) +} + +extension _PathParser { + + /// A callback which is invoked when the parser yields an empty path component. + /// Note that this does not imply that path components yielded via other callbacks are non-empty. + /// + /// This method exists as an optimisation, since empty components have no content to percent-encode/transform. + /// + @inlinable + internal mutating func visitEmptyPathComponent() { + visitEmptyPathComponents(1) + } + + @inlinable + internal mutating func visitValidationError(_ error: ValidationError) { + // No required action. + } +} + + +// -------------------------------------------- +// MARK: - Parsing +// -------------------------------------------- + + +@usableFromInline +internal enum _DeferredPathComponent { + case potentialWindowsDrive(Source, UInt) + case empties(Int) + + @inlinable + internal var isPotentialWindowsDrive: Bool { + if case .potentialWindowsDrive = self { return true } + return false + } + + /// Whether this component contains enough deferred empties to require a path sigil, were it to be flushed as the start of a path string. + /// + @inlinable + internal func needsPathSigilWhenFlushing(_ state: _PathParserState) -> Bool { + if state.didYieldComponent { + if case .empties(let count) = self { + return count != 0 + } + } else { + if case .empties(let count) = self { + return count > 1 + } + } + return false + } +} + +@usableFromInline +internal struct _PathParserState { + + @usableFromInline + internal var popcount: UInt + + @usableFromInline + internal var didYieldComponent: Bool + + @inlinable + internal init() { + self.popcount = 0 + self.didYieldComponent = false + } +} + +extension _PathParser { + + // Note: When making these local functions inside 'walkPathComponents', the compiler fails to prove they + // don't escape and introduces heap allocations which dominate the performance of the entire parser. + + /// Defers an empty path component. If there are already empty components deferred, it will be added to them. + /// + /// Asserts that no potential Windows drive letters have been deferred. + /// + @inlinable + internal mutating func _deferEmptyAssertNotWindowsDrive( + _ deferred: inout _DeferredPathComponent?, _ state: inout _PathParserState + ) { + + assert(state.popcount == 0, "This component has been popped. Cannot defer empty.") + guard case .empties(let count) = deferred else { + assert(deferred == nil, "Windows drive already deferred! Cannot defer empty.") + deferred = .empties(1) + return + } + deferred = .empties(count &+ 1) + } + + /// Flushes deferred empty components, if there are any, and asserts that no potential Windows drive letters have been deferred. + /// + @inlinable + internal mutating func _flushEmptiesAssertNotWindowsDrive( + _ deferred: inout _DeferredPathComponent?, _ state: inout _PathParserState + ) { + + guard case .empties(let count) = deferred else { + assert(deferred == nil, "Windows drive deferred! Cannot flush empties") + return + } + assert(count != 0, "0 is not a valid number of deferred empties") + visitEmptyPathComponents(count) + state.didYieldComponent = true + deferred = .none + } + + /// Flushes the deferred component(s). + /// + /// - For potential Windows drives, this means we have confirmed that the component is *not* a Windows drive. + /// If the `popcount` on the RHS of the candidate was 0, the candidate is yielded as a regular component. + /// Otherwise, the candidate is popped (like any other component would have been), and any remaining popcount from its RHS + /// is merged with the parser's current `popcount`. + /// + /// - Empty components are yielded. + /// + @inlinable + internal mutating func _flushAndMergePopcount( + _ deferred: inout _DeferredPathComponent?, + _ state: inout _PathParserState + ) { + guard case .potentialWindowsDrive(let componentContent, var storedPopcount) = deferred else { + _flushEmptiesAssertNotWindowsDrive(&deferred, &state) + return + } + if storedPopcount == 0 { + visitInputPathComponent(componentContent, isWindowsDriveLetter: false) + state.didYieldComponent = true + } else { + storedPopcount -= 1 + } + state.popcount += storedPopcount + deferred = .none + } + + /// Flushes the deferred component(s) appropriately for the start of the path string. + /// + /// - For potential Windows drives, this means we have confirmed that the component *is* a Windows drive. It will be yielded with the appropriate flag. + /// + /// - Empty components are yielded, and if a path sigil is required, that is yielded, too. + /// + @inlinable + internal mutating func _flushForFinalization( + _ deferred: inout _DeferredPathComponent?, _ state: inout _PathParserState + ) { + switch deferred { + case .potentialWindowsDrive(let firstComponent, _): + visitInputPathComponent(firstComponent, isWindowsDriveLetter: true) + return + // If we haven't yielded anything yet, make sure we at least write an empty path. + case .empties(let count): + if state.didYieldComponent == false { + assert(count != 0) + } + case .none: + if state.didYieldComponent == false { + // This should be impossible to reach. A non-empty path string which doesn't yield anything would + // need to pop, and would need to at least end in a "..", but the parser ensures that such paths end in a "/". + // Handle it to make *absolutely* sure we don't accidentally create a URL with 'nil' path from non-empty input. + assertionFailure("Finalizing a path without yielding or deferring anything?!") + deferred = .empties(1) + } + } + let needsPathSigil = deferred?.needsPathSigilWhenFlushing(state) ?? false + _flushEmptiesAssertNotWindowsDrive(&deferred, &state) + if needsPathSigil { + visitPathSigil() + } + } + + /// Parses the given path string, optionally relative to the path of a base URL object, and yields the simplified list of path components via callbacks + /// implemented on this `_PathParser`. The path components are yielded in *reverse order*. + /// + /// To construct the simplified path string, start with an empty string. For each path component yielded by the parser, + /// prepend `"/"` (ASCII forward slash) followed by the path component's contents, to that string. Note that path components from the input string may require + /// additional adjustments such as percent-encoding or drive letter normalization as described in the documentation for `visitInputPathComponent`. + /// + /// For example, consider the input `"a/b/../c/"`, which normalizes to the path string `"/a/c/"`. + /// This method yields the components `["", "c", "a"]`, and path construction by prepending proceeds as follows: `"/" -> "/c/" -> "/a/c/"`. + /// + /// - Note: + /// If the input string is empty, and the scheme **is not** special, no callbacks will be called (the path is `nil`). + /// If the input string is empty, and the scheme **is** special, the result is an implicit root path (`/`). + /// If the input string is not empty, this function will always yield a non-empty path. + /// + /// - parameters: + /// - input: The path string to parse, as a collection of UTF-8 code-units. + /// - schemeKind: The scheme of the URL which the path will be part of. + /// - baseURL: The URL whose path serves as the "base" for the input string, if it is a relative path. + /// Note that there are some edge-cases related to Windows drive letters which require the base URL be provided (if present), + /// even for absolute paths. + /// - absolutePathsCopyWindowsDriveFromBase: A flag set by the URL parser to enable special behaviours for absolute paths in path-only file + /// URLs, forcing them to be relative to the base URL's Windows drive, even if the given path contains + /// its own Windows drive. For example, the path-only URL "file:/hello" parsed against the base URL + /// "file:///C:/Windows" results in "file:///C:/hello", but the non-path-only + /// URL "file:///hello" results in "file:///hello" when parsed against the same base URL. + /// In both cases the path parser only sees the string "/hello" as its input, so the value of this flag must + /// be determined by the URL parser. + /// + @inlinable @inline(never) + internal mutating func walkPathComponents( + pathString input: InputString, + schemeKind: WebURL.SchemeKind, + baseURL: WebURL?, + absolutePathsCopyWindowsDriveFromBase: Bool + ) { + + guard input.isEmpty == false else { + // Special URLs have an implicit path, non-special URLs may have an empty path. + if schemeKind.isSpecial { + visitEmptyPathComponent() + } + return + } + + let isFileScheme = (schemeKind == .file) + + // Determine if this is an absolute or relative path, as denoted by a leading path separator ("usr/lib" vs "/usr/lib"). + + let input_firstComponentStart: InputString.Index + let isInputAbsolute = PathComponentParser.isPathSeparator(input[input.startIndex], scheme: schemeKind) + if isInputAbsolute { + input_firstComponentStart = input.index(after: input.startIndex) + } else { + input_firstComponentStart = input.startIndex + } + + guard input_firstComponentStart < input.endIndex else { + // The input string is a single separator, i.e. an absolute root path. + // We can fast-path this: it results in a single "/", or the drive root if baseURL has a Windows drive letter. + assert(isInputAbsolute) + visitEmptyPathComponent() + if isFileScheme, let base = baseURL { + let _baseDrive = PathComponentParser._normalizedWindowsDrive( + in: base.utf8.path, firstCmptLength: base.storage.structure.firstPathComponentLength + ) + if let baseDrive = _baseDrive { + visitBasePathComponent(baseDrive) + } + } + return + } + + // Consume path components from the end. + // + // Consuming in reverse means that we can avoid tracking the shortened path in an dynamically-sized Array. + // Instead, we maintain an integer `popcount`, which tells us how many components towards the front ultimately get + // popped and removed from the traversal. For instance, consider the path "/p1/p2/../p3/p4/p5/../../p6": + // - When we see p6, the popcount is 0. p6 is yielded. + // - When we see p5 and p4, the popcount is 2 and 1 respectively. They are not yielded. + // - When we see p3, the popcount is 0 again, and it is yielded. p2 is not yielded, p1 is. + // Hence the final path is p1/p3/p6 - only, we discovered that information in reverse order (["p6", "p3", "p1"]). + // + // The downside is that it's harder for us to tell when something is at the start of the final path + // or whether other components may appear before it. For this reason, some components which have different + // behaviour get deferred until we can make that determination. + // + // Deferred Components + // =================== + // + // - Potential Windows drive letters (file URLs only). The parser in the URL standard visits components in-order, + // "shortening" (popping) when it sees a "..", unless the pop would remove the first component, and that component + // is a Windows drive letter. That means: + // + // 1. Windows drives cannot be popped ("C:/../../../foo" becomes "C:/foo") + // 2. Arbitrary stuff can appear before the drive, as long as it gets popped-out later. + // When parsing "abc/../C:", at some point "C:" will land at path[0], at which point nothing can pop it out. + // + // So when we see a potential Windows drive letter, we stash the component and popcount at that point, and + // continue parsing everything left of the component. For a string like "abc/../C:", nothing on the left + // actually yields a component, so the candidate "C:" is confirmed as a drive letter. If something does yield + // ("abc/C:"), we know the candidate isn't really a drive, so we consider if it should have been popped and + // merge the stashed popcount from the RHS with the one from the LHS. + // + // - Empty components. These were originally deferred because an older version of the standard required + // empty components at the start of file paths to be removed (changed in https://github.com/whatwg/url/pull/544). + // Currently, they are used to detect if the last yielded component was empty, and the resulting path string + // would start with 2 slashes (e.g. "//p2"). https://github.com/whatwg/url/pull/505 introduced what we call + // a "path sigil" for such paths, and tracking empty components is handy for that. + // + // Other special components + // ======================== + // + // - Single dot ('.') components get skipped. + // - If at the end of the path, they force a trailing slash/empty component. + // - They cannot be independently popped; e.g. "/a/b/./../" -> "/a/", not "/a/b/". + // + // - Double dot ('..') components pop previous components from the path. + // - For file URLs, they do not pop beyond a Windows drive letter. + // - If at the end of the path, they force a trailing slash/empty component. + // (even if they have popped all other components, the result is an empty path, not a nil path) + + var remainingInput = input[...] + var state = _PathParserState() + var deferredComponent: _DeferredPathComponent? = .none + + repeat { + let separatorIndex = remainingInput.lastIndex { PathComponentParser.isPathSeparator($0, scheme: schemeKind) } + let pathComponent: InputString.SubSequence + if let separatorIdx = separatorIndex { + pathComponent = remainingInput[remainingInput.index(after: separatorIdx)...] + if ASCII(input[separatorIdx]) == .backslash { + visitValidationError(.unexpectedReverseSolidus) + } + } else { + pathComponent = remainingInput + } + + switch pathComponent { + case _ where PathComponentParser.isDoubleDotPathSegment(pathComponent): + state.popcount &+= 1 + fallthrough + + case _ where PathComponentParser.isSingleDotPathSegment(pathComponent): + // Don't defer this as it would have no effect due to the popcount increment. + // Since this must be at the end of the path, 'didYieldComponent' is sufficient for calculating path sigil. + if pathComponent.endIndex == input.endIndex { + visitEmptyPathComponent() + state.didYieldComponent = true + } + + case _ where isFileScheme && PathComponentParser.isWindowsDriveLetter(pathComponent): + _flushAndMergePopcount(&deferredComponent, &state) + deferredComponent = .potentialWindowsDrive(pathComponent, state.popcount) + state.popcount = 0 + + case _ where state.popcount > 0: + state.popcount -= 1 + + case _ where deferredComponent?.isPotentialWindowsDrive == true: + _flushAndMergePopcount(&deferredComponent, &state) + continue // Re-check this component with the new popcount. + + default: + if pathComponent.isEmpty { + _deferEmptyAssertNotWindowsDrive(&deferredComponent, &state) + } else { + _flushEmptiesAssertNotWindowsDrive(&deferredComponent, &state) + visitInputPathComponent(pathComponent, isWindowsDriveLetter: false) + state.didYieldComponent = true + } + } + + remainingInput = remainingInput[..<(separatorIndex ?? remainingInput.startIndex)] + } while !remainingInput.isEmpty + + assert( + deferredComponent != nil || state.didYieldComponent, + "Since the input path was not empty, we must have either deferred or yielded something from it." + ) + + let _basePath = baseURL?.utf8.path + var baseDrive: WebURL.UTF8View.SubSequence? + + if isFileScheme { + if let basePath = _basePath { + let baseURLFirstCmptLength = baseURL.unsafelyUnwrapped.storage.structure.firstPathComponentLength + baseDrive = PathComponentParser._normalizedWindowsDrive(in: basePath, firstCmptLength: baseURLFirstCmptLength) + } + // If the first written component of the input string is a Windows drive letter, the path is never relative - + // even if it normally would be. [URL Standard: "file" state, "file slash" state] + if case .potentialWindowsDrive(let firstComponent, _) = deferredComponent, + firstComponent.startIndex == input_firstComponentStart + { + visitInputPathComponent(firstComponent, isWindowsDriveLetter: true) + return + } + // If the Windows drive is not the first written component of the input string, and the path is absolute, + // we still prefer to use the drive from the base URL. [URL Standard: "file slash" state] + if isInputAbsolute, absolutePathsCopyWindowsDriveFromBase, let baseDrive = baseDrive { + _flushAndMergePopcount(&deferredComponent, &state) + visitBasePathComponent(baseDrive) + return + } + } + + guard isInputAbsolute == false, let basePath = _basePath, basePath.startIndex < basePath.endIndex else { + _flushForFinalization(&deferredComponent, &state) + return // Absolute paths, and relative paths with no base URL, are finished now. + } + assert(basePath.first == ASCII.forwardSlash.codePoint, "Normalized non-empty base paths must start with a /") + + // Drop the last base path component (unless it is a Windows drive, in which case flush and we're done). + if let baseDrive = baseDrive, basePath.count == 3 { + _flushAndMergePopcount(&deferredComponent, &state) + visitBasePathComponent(baseDrive) + return + } + var remainingBasePath = basePath[..<(basePath.lastIndex(of: ASCII.forwardSlash.codePoint) ?? basePath.startIndex)] + + while let separatorIndex = remainingBasePath.lastIndex(of: ASCII.forwardSlash.codePoint) { + let pathComponent = remainingBasePath[ + Range(uncheckedBounds: (remainingBasePath.index(after: separatorIndex), remainingBasePath.endIndex)) + ] + + assert(PathComponentParser.isDoubleDotPathSegment(pathComponent) == false) + assert(PathComponentParser.isSingleDotPathSegment(pathComponent) == false) + + switch pathComponent { + // If we reached the base path's Windows drive letter, we can flush everything and end. + case _ where separatorIndex == basePath.startIndex && baseDrive != nil: + _flushAndMergePopcount(&deferredComponent, &state) + visitBasePathComponent(baseDrive!) + return + + case _ where state.popcount != 0: + state.popcount -= 1 + + case _ where deferredComponent?.isPotentialWindowsDrive == true: + _flushAndMergePopcount(&deferredComponent, &state) + continue // Re-check this component with the new popcount. + + default: + if pathComponent.isEmpty { + _deferEmptyAssertNotWindowsDrive(&deferredComponent, &state) + } else { + _flushEmptiesAssertNotWindowsDrive(&deferredComponent, &state) + visitBasePathComponent(pathComponent) + state.didYieldComponent = true + } + } + + remainingBasePath = remainingBasePath[Range(uncheckedBounds: (remainingBasePath.startIndex, separatorIndex))] + } + + assert(remainingBasePath.isEmpty, "Normalized non-empty base paths must start with a /") + _flushForFinalization(&deferredComponent, &state) + // Finally done! + } +} + + +// -------------------------------------------- +// MARK: - Parsers +// -------------------------------------------- + + +/// A summary of statistics about a of the lexically-normalized, percent-encoded path string. +/// +@usableFromInline +internal struct PathMetrics { + + /// The precise length of the path string, in bytes. + @usableFromInline + internal private(set) var requiredCapacity: Int + + /// The length of the first path component, in bytes, including its leading "/". + @usableFromInline + internal private(set) var firstComponentLength: Int + + /// The number of components in the path. + @usableFromInline + internal private(set) var numberOfComponents: Int + + /// Whether or not the path must be prefixed with a path sigil when it is written to a URL which does not have an authority sigil. + @usableFromInline + internal private(set) var requiresPathSigil: Bool + + /// Whether there is at least one component in the path which needs percent-encoding. + @usableFromInline + internal private(set) var needsPercentEncoding: Bool +} + +extension PathMetrics { + + /// Creates a `PathMetrics` object containing information about the shape of the given path-string if it were written in its simplified, normalized form. + /// + /// The metrics may also contain information about simplification/normalization steps which can be skipped when writing the path-string. + /// + @inlinable + internal init( + parsing utf8: UTF8Bytes, + schemeKind: WebURL.SchemeKind, + baseURL: WebURL?, + absolutePathsCopyWindowsDriveFromBase: Bool + ) where UTF8Bytes: BidirectionalCollection, UTF8Bytes.Element == UInt8 { + + self.requiredCapacity = 0 + self.firstComponentLength = 0 + self.numberOfComponents = 0 + self.needsPercentEncoding = false + self.requiresPathSigil = false + + var parser = _Parser(_emptyMetrics: self) + parser.walkPathComponents( + pathString: utf8, schemeKind: schemeKind, baseURL: baseURL, + absolutePathsCopyWindowsDriveFromBase: absolutePathsCopyWindowsDriveFromBase) + + self = parser.metrics + } + + @usableFromInline + internal struct _Parser: _PathParser + where UTF8Bytes: BidirectionalCollection, UTF8Bytes.Element == UInt8 { + + @usableFromInline + internal var metrics: PathMetrics + + @inlinable + internal init(_emptyMetrics: PathMetrics) { + self.metrics = _emptyMetrics + } + + @usableFromInline + internal typealias InputString = UTF8Bytes + + @inlinable + internal mutating func visitInputPathComponent( + _ pathComponent: UTF8Bytes.SubSequence, isWindowsDriveLetter: Bool + ) { + metrics.numberOfComponents += 1 + let (encodedLength, needsEncoding) = pathComponent.lazy.percentEncodedGroups(as: \.path).encodedLength + metrics.needsPercentEncoding = metrics.needsPercentEncoding || needsEncoding + metrics.firstComponentLength = 1 /* "/" */ + encodedLength + metrics.requiredCapacity += metrics.firstComponentLength + } + + @inlinable + internal mutating func visitEmptyPathComponents(_ n: Int) { + metrics.numberOfComponents += n + metrics.requiredCapacity += n + metrics.firstComponentLength = 1 + } + + @inlinable + internal mutating func visitPathSigil() { + metrics.requiresPathSigil = true + } + + @inlinable + internal mutating func visitBasePathComponent(_ pathComponent: WebURL.UTF8View.SubSequence) { + metrics.numberOfComponents += 1 + metrics.firstComponentLength = 1 /* "/" */ + pathComponent.count + metrics.requiredCapacity += metrics.firstComponentLength + } + } +} + +extension UnsafeMutableBufferPointer where Element == UInt8 { + + /// Initializes this buffer to the simplified, normalized path parsed from `utf8`. + /// + /// The buffer must have precisely the correct capacity to store the path string, or a runtime error will be triggered. This implies that its address may not be `nil`. + /// The fact that the exact capacity is known is taken as proof that `PathMetrics` have been calculated, and that the number of bytes written will not overflow + /// an `Int`. + /// + /// - returns; The number of bytes written. This will be equal to `self.count`, but is calculated independently as an additional check. + /// + @inlinable + internal func writeNormalizedPath( + parsing utf8: UTF8Bytes, + schemeKind: WebURL.SchemeKind, + baseURL: WebURL?, + absolutePathsCopyWindowsDriveFromBase: Bool, + needsPercentEncoding: Bool = true + ) -> Int where UTF8Bytes: BidirectionalCollection, UTF8Bytes.Element == UInt8 { + return _PathWriter.writePath( + to: self, pathString: utf8, schemeKind: schemeKind, baseURL: baseURL, + absolutePathsCopyWindowsDriveFromBase: absolutePathsCopyWindowsDriveFromBase, + needsPercentEncoding: needsPercentEncoding + ) + } + + /// A path parser which writes a properly percent-encoded, normalised URL path string + /// in to a correctly-sized, uninitialized buffer. Use `PathMetrics` to calculate the buffer's required size. + /// + @usableFromInline + internal struct _PathWriter: _PathParser + where UTF8Bytes: BidirectionalCollection, UTF8Bytes.Element == UInt8 { + + @usableFromInline + internal let buffer: UnsafeMutableBufferPointer + + @usableFromInline + internal private(set) var front: Int + + @usableFromInline + internal let needsEscaping: Bool + + @inlinable + internal static func writePath( + to buffer: UnsafeMutableBufferPointer, + pathString input: UTF8Bytes, + schemeKind: WebURL.SchemeKind, + baseURL: WebURL?, + absolutePathsCopyWindowsDriveFromBase: Bool, + needsPercentEncoding: Bool = true + ) -> Int { + // Checking this now allows the implementation to safely use `.baseAddress.unsafelyUnwrapped`. + precondition(buffer.baseAddress != nil) + var writer = _PathWriter(_doNotUse: buffer, front: buffer.endIndex, needsPercentEncoding: needsPercentEncoding) + writer.walkPathComponents( + pathString: input, + schemeKind: schemeKind, + baseURL: baseURL, + absolutePathsCopyWindowsDriveFromBase: absolutePathsCopyWindowsDriveFromBase + ) + // Checking this now allows the implementation to be safe when omitting bounds checks. + precondition(writer.front == 0, "Buffer was incorrectly sized") + return buffer.count - writer.front + } + + /// **Do not use**. Use the `_PathWriter.writePath(...)` static method instead. + /// + @inlinable + internal init(_doNotUse buffer: UnsafeMutableBufferPointer, front: Int, needsPercentEncoding: Bool) { + self.buffer = buffer + self.front = front + self.needsEscaping = needsPercentEncoding + } + + @usableFromInline + internal typealias InputString = UTF8Bytes + + @inlinable + internal mutating func prependSlash(_ n: Int = 1) { + front &-= n + buffer.baseAddress.unsafelyUnwrapped.advanced(by: front) + .initialize(repeating: ASCII.forwardSlash.codePoint, count: n) + } + + @inlinable + internal mutating func visitInputPathComponent( + _ pathComponent: UTF8Bytes.SubSequence, isWindowsDriveLetter: Bool + ) { + guard pathComponent.isEmpty == false else { + prependSlash() + return + } + guard isWindowsDriveLetter == false else { + assert(pathComponent.count == 2) + front &-= 2 + buffer.baseAddress.unsafelyUnwrapped.advanced(by: front).initialize(to: pathComponent[pathComponent.startIndex]) + buffer.baseAddress.unsafelyUnwrapped.advanced(by: front &+ 1).initialize(to: ASCII.colon.codePoint) + prependSlash() + return + } + if needsEscaping { + for byteGroup in pathComponent.reversed().lazy.percentEncodedGroups(as: \.path) { + switch byteGroup.encoding { + case .percentEncoded: + (buffer.baseAddress.unsafelyUnwrapped + front - 3).initialize(to: byteGroup[0]) + (buffer.baseAddress.unsafelyUnwrapped + front - 2).initialize(to: byteGroup[1]) + (buffer.baseAddress.unsafelyUnwrapped + front - 1).initialize(to: byteGroup[2]) + front &-= 3 + case .unencoded, .substituted: + (buffer.baseAddress.unsafelyUnwrapped + front - 1).initialize(to: byteGroup[0]) + front &-= 1 + } + } + } else { + let count = pathComponent.count + let newFront = front &- count + _ = UnsafeMutableBufferPointer( + start: buffer.baseAddress.unsafelyUnwrapped.advanced(by: newFront), + count: count + ).fastInitialize(from: pathComponent) + front = newFront + } + prependSlash() + } + + @inlinable + internal mutating func visitEmptyPathComponents(_ n: Int) { + prependSlash(n) + } + + @inlinable + internal func visitPathSigil() { + // URLWriter is reponsible for writing its own path sigil. + } + + @inlinable + internal mutating func visitBasePathComponent(_ pathComponent: WebURL.UTF8View.SubSequence) { + let count = pathComponent.count + let newFront = front &- count + _ = UnsafeMutableBufferPointer( + start: buffer.baseAddress.unsafelyUnwrapped.advanced(by: newFront), + count: count + ).fastInitialize(from: pathComponent) + front = newFront + prependSlash() + } + } +} + +/// An objects which checks for URL validation errors in a path string. +/// +/// Validation errors are communicated to the given `URLParserCallback` if the path-string contains: +/// - Non-URL code points +/// - Invalid percent encoding (e.g. "%ZZ"), or +/// - Backslashes as path separators +/// +/// This type must not be initialized directly. To validate a path string, use the static `.validate` method. +/// +@usableFromInline +internal struct PathStringValidator: _PathParser +where UTF8Bytes: BidirectionalCollection, UTF8Bytes.Element == UInt8, Callback: URLParserCallback { + + @usableFromInline + internal let path: UTF8Bytes + + @usableFromInline + internal let callback: UnsafeMutablePointer + + @inlinable + internal init(_doNotUse path: UTF8Bytes, callback: UnsafeMutablePointer) { + self.path = path + self.callback = callback + } + + /// Checks for non-fatal syntax oddities in the given path string. + /// + /// See the URL standard's "path state" or the type-level documentation for `PathStringValidator` for more information. + /// + @inlinable + internal static func validate( + pathString input: UTF8Bytes, + schemeKind: WebURL.SchemeKind, + callback: inout Callback + ) { + // The compiler has a tough time optimizing this function away when we ignore validation errors. + guard Callback.self != IgnoreValidationErrors.self else { + return + } + var visitor = PathStringValidator(_doNotUse: input, callback: &callback) + visitor.walkPathComponents( + pathString: input, schemeKind: schemeKind, baseURL: nil, + absolutePathsCopyWindowsDriveFromBase: false) + } + + @usableFromInline + internal typealias InputString = UTF8Bytes + + @usableFromInline + internal mutating func visitInputPathComponent( + _ pathComponent: UTF8Bytes.SubSequence, isWindowsDriveLetter: Bool + ) { + validateURLCodePointsAndPercentEncoding(utf8: pathComponent, callback: &callback.pointee) + } + + @usableFromInline + internal mutating func visitEmptyPathComponents(_ n: Int) { + // Nothing to do. + } + + @usableFromInline + internal func visitPathSigil() { + // Nothing to do. + } + + @usableFromInline + internal mutating func visitBasePathComponent(_ pathComponent: WebURL.UTF8View.SubSequence) { + assertionFailure("Should never be invoked without a base URL") + } + + @usableFromInline + internal mutating func visitValidationError(_ error: ValidationError) { + callback.pointee.validationError(error) + } +} + + +// -------------------------------------------- +// MARK: - Path Utilities +// -------------------------------------------- + + +/// A namespace for functions relating to parsing of path components. +/// +@usableFromInline +internal enum PathComponentParser {} + +extension PathComponentParser where T == Never { + + /// Whether the given UTF-8 code-unit is a path separator character for the given `scheme`. + /// + @inlinable @inline(__always) + internal static func isPathSeparator(_ codeUnit: UInt8, scheme: WebURL.SchemeKind) -> Bool { + ASCII(codeUnit) == .forwardSlash || (scheme.isSpecial && ASCII(codeUnit) == .backslash) + } +} + +extension PathComponentParser where T: Collection, T.Element == UInt8 { + + /// A Windows drive letter is two code points, of which the first is an ASCII alpha and the second is either U+003A (:) or U+007C (|). + /// + /// https://url.spec.whatwg.org/#url-miscellaneous + /// + @inlinable + internal static func isWindowsDriveLetter(_ bytes: T) -> Bool { + var it = bytes.makeIterator() + guard let byte1 = it.next(), ASCII(byte1)?.isAlpha == true else { return false } + guard let byte2 = it.next(), ASCII(byte2) == .colon || ASCII(byte2) == .verticalBar else { return false } + guard it.next() == nil else { return false } + return true + } + + /// A normalized Windows drive letter is a Windows drive letter of which the second code point is U+003A (:). + /// + /// https://url.spec.whatwg.org/#url-miscellaneous + /// + @inlinable + internal static func isNormalizedWindowsDriveLetter(_ bytes: T) -> Bool { + isWindowsDriveLetter(bytes) && (bytes.dropFirst().first.map { ASCII($0) == .colon } ?? false) + } + + /// A string starts with a Windows drive letter if all of the following are true: + /// + /// - its length is greater than or equal to 2 + /// - its first two code points are a Windows drive letter + /// - its length is 2 or its third code point is U+002F (/), U+005C (\), U+003F (?), or U+0023 (#). + /// + /// https://url.spec.whatwg.org/#url-miscellaneous + /// + @inlinable + internal static func hasWindowsDriveLetterPrefix(_ bytes: T) -> Bool { + var it = bytes.makeIterator() + guard let byte1 = it.next(), ASCII(byte1)?.isAlpha == true else { return false } + guard let byte2 = it.next(), ASCII(byte2) == .colon || ASCII(byte2) == .verticalBar else { return false } + guard let byte3 = it.next() else { return true } + switch ASCII(byte3) { + case .forwardSlash?, .backslash?, .questionMark?, .numberSign?: return true + default: return false + } + } + + /// Interprets the given collection as a URL's normalized path whose first component length is `firstCmptLength`, and returns a slice + /// covering the path's normalized Windows drive letter, if it has one. + /// + /// Windows drive letters only have meaning for `file` URLs. + /// + @inlinable + internal static func _normalizedWindowsDrive( + in path: T, firstCmptLength: Int + ) -> T.SubSequence? { + + if firstCmptLength == 3 { + let firstComponentContent = path.dropFirst().prefix(2) + if PathComponentParser.isNormalizedWindowsDriveLetter(firstComponentContent) { + return firstComponentContent + } + } + return nil + } + + /// Returns `true` if the next contents of `iterator` are either the ASCII byte U+002E (.), the string "%2e", or "%2E". + /// Otherwise, `false`. + /// + @inlinable + internal static func _checkForDotOrCaseInsensitivePercentEncodedDot(in iterator: inout T.Iterator) -> Bool { + guard let byte1 = iterator.next(), let ascii1 = ASCII(byte1) else { return false } + if ascii1 == .period { return true } + guard ascii1 == .percentSign, + let byte2 = iterator.next(), ASCII(byte2) == .n2, + let byte3 = iterator.next(), ASCII(byte3 & 0b11011111) == .E // bitmask uppercases ASCII alphas. + else { + return false + } + return true + } + + /// Returns `true` if `bytes` contains a single U+002E (.), the ASCII string "%2e" or "%2E" only. + /// Otherwise, `false`. + /// + @inlinable + internal static func isSingleDotPathSegment(_ bytes: T) -> Bool { + var it = bytes.makeIterator() + guard _checkForDotOrCaseInsensitivePercentEncodedDot(in: &it) else { return false } + guard it.next() == nil else { return false } + return true + } + + /// Returns `true` if `bytes` contains two of either U+002E (.), the ASCII string "%2e" or "%2E" only. + /// Otherwise, `false`. + /// + @inlinable + internal static func isDoubleDotPathSegment(_ bytes: T) -> Bool { + var it = bytes.makeIterator() + guard _checkForDotOrCaseInsensitivePercentEncodedDot(in: &it) else { return false } + guard _checkForDotOrCaseInsensitivePercentEncodedDot(in: &it) else { return false } + guard it.next() == nil else { return false } + return true + } + + /// Returns `true` if the given normalized path requires a path sigil when written to a URL that does not have an authority sigil. + /// + @inlinable + internal static func doesNormalizedPathRequirePathSigil(_ path: T) -> Bool { + var iter = path.makeIterator() + guard iter.next() == ASCII.forwardSlash.codePoint, iter.next() == ASCII.forwardSlash.codePoint else { + return false + } + return true + } +} diff --git a/Sources/WebURL/Parser/Parser+StringUtils.swift b/Sources/WebURL/Parser/Parser+StringUtils.swift new file mode 100644 index 000000000..0a171ca54 --- /dev/null +++ b/Sources/WebURL/Parser/Parser+StringUtils.swift @@ -0,0 +1,212 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +// -------------------------------------------- +// MARK: - URL Code Points +// -------------------------------------------- + + +/// Detects non-URL code points in the given sequence. The sequence is assumed to contain valid UTF8 text. +/// +/// - parameters: +/// - utf8: A sequence of valid UTF8-encoded text. +/// - allowPercentSign: If `true`, the ASCII percent sign (U+0025) is considered an allowed code-point. +/// - returns: `true` if the sequence contains code-points which are not URL code-points, otherwise `false`. +/// +/// The URL code points are ASCII alphanumeric, U+0021 (!), U+0024 ($), U+0026 (&), +/// U+0027 ('), U+0028 LEFT PARENTHESIS, U+0029 RIGHT PARENTHESIS, U+002A (*), +/// U+002B (+), U+002C (,), U+002D (-), U+002E (.), U+002F (/), U+003A (:), U+003B (;), +/// U+003D (=), U+003F (?), U+0040 (@), U+005F (_), U+007E (~), +/// and code points in the range U+00A0 to U+10FFFD, inclusive, excluding surrogates and noncharacters. +/// +/// https://url.spec.whatwg.org/#url-code-points +/// +@inlinable @inline(never) +internal func hasNonURLCodePoints( + utf8: UTF8Bytes, allowPercentSign: Bool = false +) -> Bool where UTF8Bytes: Sequence, UTF8Bytes.Element == UInt8 { + + // Rather than using UTF8.decode to parse the actual 21-bit unicode codepoint, we can detect + // the handful of disallowed codepoints while they're still in UTF-8 form. + // + // (Note: "?" indicates a "don't care"/"match both" bit) + // + // - ASCII values need to be checked, but don't need any fancy decoding. + // - (0x80 ..< 0xA0) are the patterns 0xC28? and 0xC29? when encoded, which is an easy range to detect + // by just ignoring the last 4 bits of the 2nd encoded codepoint. + // - Surrogates (0xD800 ... 0xDFFF) run the range (0xD800 ... 0xDFFF) => (0xEDA080 ... 0xEDBFBF) when encoded, + // so we can match them with the bit-pattern 11101101_101?????_10??????. + // - Non-characters have a relatively simple pattern when encoded: + // > A noncharacter is a code point that is in the range U+FDD0 to U+FDEF, inclusive, + // or U+FFFE, U+FFFF, U+(0x01 ... 0x10)FFFE, U+(0x01 ... 0x10)FFFF. + // https://infra.spec.whatwg.org/#noncharacter + // + // 1. 0xFDD? and 0xFDE?: + // - 0xFDD? => 0xEFB79? in UTF8 (11101111_10110111_1001????) + // - 0xFDE? => 0xEFB7A? in UTF8 (11101111_10110111_1010????) + // 2. 0x??FFFE and 0x??FFFF: + // - 0xFFFE, 0xFFFF => 0xEFBFB(E/F) in UTF8 (11101111_10111111_1011111?) + // - 0x??FFFE, 0x??FFFF => 0xF??FBFB(E/F) in UTF8 (11110???_10??1111_10111111_1011111?) + // (even though there are 2 "?" hex characters, we only have 5 "?" bits because the max codepoint is 10FFFF). + + var utf8 = utf8.makeIterator() + while let byte1 = utf8.next() { + switch (~byte1).leadingZeroBitCount { + case 0: + // ASCII. + if byte1 == 0x25, allowPercentSign { + continue + } + // FEDCBA98_76543210_FEDCBA98_76543210_FEDCBA98_76543210_FEDCBA98_76543210 + let lo: UInt64 = 0b01010000_00000000_00000000_00101101_11111111_11111111_11111111_11111111 + let hi: UInt64 = 0b10111000_00000000_00000000_00000001_01111000_00000000_00000000_00000000 + if byte1 < 64 { + guard lo & (1 &<< byte1) == 0 else { return true } + } else { + guard hi & (1 &<< (byte1 &- 64)) == 0 else { return true } + } + case 2: + // 2-byte sequence. + guard let byte2 = utf8.next() else { + return true // Invalid UTF8. + } + let encodedScalar = UInt32(byte1) << 8 | UInt32(byte2) + // Reject 0x80-0x8F, 0x90-0x9F (non-characters) + let masked = encodedScalar & 0b11111111_11110000 + if masked == 0b11000010_10000000 || masked == 0b11000010_10010000 { + return true + } + case 3: + // 3-byte sequence. + guard let byte2 = utf8.next(), let byte3 = utf8.next() else { + return true // Invalid UTF8. + } + let encodedScalar = UInt32(byte1) << 16 | UInt32(byte2) << 8 | UInt32(byte3) + // Reject 0xFDD0-0xFDEF (non-characters). + let masked = encodedScalar & 0b11111111_11111111_11110000 + if masked == 0b11101111_10110111_10010000 || masked == 0b11101111_10110111_10100000 { + return true + } + // Reject 0xFFFE, 0xFFFF (non-characters). + if encodedScalar & 0b11111111_11111111_11111110 == 0b11101111_10111111_10111110 { + return true + } + // Reject 0xD800-0xD8FF (surrogates). + // These shouldn't appear in any valid UTF8 sequence anyway. + if encodedScalar & 0b11111111_11100000_11000000 == 0b11101101_10100000_10000000 { + return true + } + case 4: + // 4-byte sequence. + guard let byte2 = utf8.next(), let byte3 = utf8.next(), let byte4 = utf8.next() else { + return true // Invalid UTF8. + } + let encodedScalar = UInt32(byte1) << 24 | UInt32(byte2) << 16 | UInt32(byte3) << 8 | UInt32(byte4) + // Reject 0x??FFFE, 0x??FFFF (non-characters). + if encodedScalar & 0b11111000_11001111_11111111_11111110 == 0b11110000_10001111_10111111_10111110 { + return true + } + default: + return true // Invalid UTF8. + } + } + return false +} + +/// Checks if `utf8`, which is a collection of UTF-8 code-units, contains any non-URL code-points or invalid percent encoding (e.g. "%XY"). +/// If it does, `callback` is informed with an appropriate `ValidationError`. +/// +/// - Note: This method considers the percent sign ("%") to be a valid URL code-point. +/// - Note: This method is a no-op if `callback` is an instance of `IgnoreValidationErrors`. +/// +@inlinable +internal func validateURLCodePointsAndPercentEncoding( + utf8: @autoclosure () -> UTF8Bytes, callback: inout Callback +) where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8, Callback: URLParserCallback { + + // The compiler has a tough time optimising this function away when we ignore validation errors. + guard Callback.self != IgnoreValidationErrors.self else { + return + } + let utf8 = utf8() + if hasNonURLCodePoints(utf8: utf8, allowPercentSign: true) { + callback.validationError(.invalidURLCodePoint) + } + var percentSignSearchIdx = utf8.startIndex + while let percentSignIdx = utf8[percentSignSearchIdx...].firstIndex(where: { ASCII($0) == .percentSign }) { + percentSignSearchIdx = utf8.index(after: percentSignIdx) + let nextTwo = utf8[percentSignIdx...].prefix(2) + if nextTwo.count != 2 || !nextTwo.allSatisfy({ ASCII($0)?.isHexDigit ?? false }) { + callback.validationError(.unescapedPercentSign) + } + } +} + + +// -------------------------------------------- +// MARK: - Other Utilities +// -------------------------------------------- + + +/// Returns `true` if `utf8` begins with two U+002F (/) codepoints. +/// Otherwise, `false`. +/// +@inlinable +internal func indexAfterDoubleSolidusPrefix( + utf8: UTF8Bytes +) -> UTF8Bytes.Index? where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8 { + var idx = utf8.startIndex + guard idx < utf8.endIndex, utf8[idx] == ASCII.forwardSlash.codePoint else { return nil } + utf8.formIndex(after: &idx) + guard idx < utf8.endIndex, utf8[idx] == ASCII.forwardSlash.codePoint else { return nil } + utf8.formIndex(after: &idx) + return idx +} + +@inlinable +internal var _idnaPrefix: StaticString { "xn--" } + +/// Returns `true` if `utf8` begins with the ASCII string "xn--", indicating that a domain's label is encoded by IDNA. +/// Otherwise, `false`. +/// +@inlinable +internal func hasIDNAPrefix( + utf8: UTF8Bytes +) -> Bool where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8 { + _idnaPrefix.withUTF8Buffer { utf8.starts(with: $0) } +} + +extension ASCII { + + /// Returns `true` if this character is a forbidden host code point, otherwise `false`. + /// + /// A forbidden host code point is U+0000 NULL, U+0009 TAB, U+000A LF, U+000D CR, + /// U+0020 SPACE, U+0023 (#), U+0025 (%), U+002F (/), U+003A (:), U+003C (<), U+003E (>), + /// U+003F (?), U+0040 (@), U+005B ([), U+005C (\), U+005D (]), U+005E (^), or U+007C (|). + /// + /// https://url.spec.whatwg.org/#host-miscellaneous + /// + @inlinable + internal var isForbiddenHostCodePoint: Bool { + // FEDCBA98_76543210_FEDCBA98_76543210_FEDCBA98_76543210_FEDCBA98_76543210 + let lo: UInt64 = 0b11010100_00000000_10000000_00101001_00000000_00000000_00100110_00000001 + let hi: UInt64 = 0b00010000_00000000_00000000_00000000_01111000_00000000_00000000_00000001 + if self.codePoint < 64 { + return lo & (1 &<< self.codePoint) != 0 + } else { + return hi & (1 &<< (self.codePoint &- 64)) != 0 + } + } +} diff --git a/Sources/WebURL/Parser/Parser.swift b/Sources/WebURL/Parser/Parser.swift new file mode 100644 index 000000000..4b0b6adb2 --- /dev/null +++ b/Sources/WebURL/Parser/Parser.swift @@ -0,0 +1,1497 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//------------------------------------------------------------------------- +// +// This file contains the URL parser. +// +// Parsing happens in 3 steps, starting at the 'urlFromBytes' function: +// +// - Prep: +// 'urlFromBytes' trims C0 control characters and spaces from the input as the WHATWG parser does. +// We then need to remove all ASCII tabs and newlines: first, we try to get away with just trimming +// (keeping the contiguity of the content) but if that isn't possible, we remove the characters lazily +// using the `NewlineAndTabFiltered` wrapper. +// +// - Parsing: +// A 'ParsedURLString' is initialized for either the filtered/trimmed input string. +// This initializer calls in to `URLScanner.scanURLString', which begins the process of scanning the byte string - +// looking for components and marking where they start/end. The scanner concerns itself with locating components, +// not with checking their content. +// +// The resulting component ranges and flags are validated and stored as a 'ParsedURLString.ProcessedMapping', +// which checks the few components that can actually be rejected on content. At this point, we are finished parsing, +// and the 'ParsedURLString' object is returned to 'urlFromBytes'. +// +// - Construction: +// 'urlFromBytes' calls 'constructURLObject' on the result of the previous step, which unconditionally writes +// the content to freshly-allocated storage. The actual construction process involves performing a dry-run +// to calculate the optimal result type and produce an allocation which is correctly sized to hold the result. +// +//------------------------------------------------------------------------- + +@inlinable +func urlFromBytes(_ inputString: Bytes, baseURL: WebURL?) -> WebURL? +where Bytes: BidirectionalCollection, Bytes.Element == UInt8 { + + var callback = IgnoreValidationErrors() + return inputString.withContiguousStorageIfAvailable { + _urlFromBytes_impl($0.withoutTrappingOnIndexOverflow, baseURL: baseURL, callback: &callback) + } ?? _urlFromBytes_impl(inputString, baseURL: baseURL, callback: &callback) +} + +@inlinable @inline(never) +func _urlFromBytes_impl( + _ inputString: Bytes, baseURL: WebURL?, callback: inout Callback +) -> WebURL? where Bytes: BidirectionalCollection, Bytes.Element == UInt8, Callback: URLParserCallback { + + // Trim leading/trailing C0 control characters and spaces. + let trimmedInput = inputString.trim { + switch ASCII($0) { + case ASCII.ranges.c0Control?, .space?: return true + default: return false + } + } + if trimmedInput.startIndex != inputString.startIndex || trimmedInput.endIndex != inputString.endIndex { + callback.validationError(.unexpectedC0ControlOrSpace) + } + return ASCII.filterNewlinesAndTabs(from: trimmedInput).map( + left: { filtered in + ParsedURLString(parsing: filtered, baseURL: baseURL, callback: &callback)?.constructURLObject() + }, + right: { trimmed in + ParsedURLString(parsing: trimmed, baseURL: baseURL, callback: &callback)?.constructURLObject() + } + ).get() +} + + +// -------------------------------------------- +// MARK: - ParsedURLString +// -------------------------------------------- + + +/// A collection of UTF8 bytes which have been successfully parsed as a URL string. +/// +/// A `ParsedURLString` contains both the input string and base URL, together with information from the parser which describes where each component can +/// be found (either within the input string, or copied from the base URL). The `ParsedURLString` can then write the contents to any `URLWriter`, performing +/// any transformations needed by specific components (e.g. lowercasing, percent-encoding/decoding) as it does so. +/// +@usableFromInline +internal struct ParsedURLString where InputString: BidirectionalCollection, InputString.Element == UInt8 { + + @usableFromInline + internal let inputString: InputString + + @usableFromInline + internal let baseURL: WebURL? + + @usableFromInline + internal let mapping: ProcessedMapping + + /// Parses the given collection of UTF8 bytes as a URL string, relative to the given base URL. + /// + /// - parameters: + /// - inputString: The input string, as a collection of UTF8 bytes. + /// - baseURL: The base URL against which `inputString` should be interpreted. + /// - callback: A callback to receive validation errors. If these are unimportant, pass an instance of `IngoreValidationErrors`. + /// + @inlinable + internal init?( + parsing inputString: InputString, baseURL: WebURL?, callback: inout Callback + ) { + guard let ranges = URLScanner.scanURLString(inputString, baseURL: baseURL, callback: &callback), + let mapping = ProcessedMapping(ranges, inputString: inputString, baseURL: baseURL, callback: &callback) + else { + return nil + } + self.inputString = inputString + self.baseURL = baseURL + self.mapping = mapping + } + + /// Writes the URL to the given `URLWriter`. + /// + /// - Note: Providing a `URLWriterHints` object for this `ParsedURLString` can significantly speed the process. + /// Obtain metrics by writing the string to a `StructureAndMetricsCollector`. + /// + @inlinable + internal func write(to writer: inout WriterType) { + mapping.write(inputString: inputString, baseURL: baseURL, to: &writer) + } + + /// Writes the URL to new `URLStorage`, appropriate for its size and structure, and returns it as a `WebURL` object. + /// + @inlinable + internal func constructURLObject() -> WebURL { + var info = StructureAndMetricsCollector() + write(to: &info) + let storage = AnyURLStorage(optimalStorageForCapacity: info.requiredCapacity, structure: info.structure) { buffer in + var writer = UnsafePresizedBufferWriter(buffer: buffer, hints: info.hints) + write(to: &writer) + return writer.bytesWritten + } + return WebURL(storage: storage) + } +} + +extension ParsedURLString { + + /// The validated URL information contained within a string. + /// + @usableFromInline + internal struct ProcessedMapping { + + @usableFromInline + internal let info: ScannedRangesAndFlags + + @usableFromInline + internal let parsedHost: ParsedHost? + + @usableFromInline + internal let port: UInt16? + } +} + +/// The positions of URL components found within a string, and the flags to interpret them. +/// +/// After coming from the scanner, certain components require additional _content validation_, which can be performed by constructing a `ProcessedMapping`. +/// +@usableFromInline +internal struct ScannedRangesAndFlags where InputString: Collection { + + /// The position of the scheme's content, if present, without trailing separators. + @usableFromInline + internal var schemeRange: Range? + + /// The position of the authority section, if present, without leading or trailing separators. + @usableFromInline + internal var authorityRange: Range? + + /// The position of the username content, if present, without leading or trailing separators. + @usableFromInline + internal var usernameRange: Range? + + /// The position of the password content, if present, without leading or trailing separators. + @usableFromInline + internal var passwordRange: Range? + + /// The position of the hostname content, if present, without leading or trailing separators. + @usableFromInline + internal var hostnameRange: Range? + + /// The position of the port content, if present, without leading or trailing separators. + @usableFromInline + internal var portRange: Range? + + /// The position of the path content, if present, without leading or trailing separators. + /// Note that the path's initial "/", if present, is not considered a separator. + @usableFromInline + internal var pathRange: Range? + + /// The position of the query content, if present, without leading or trailing separators. + @usableFromInline + internal var queryRange: Range? + + /// The position of the fragment content, if present, without leading or trailing separators. + @usableFromInline + internal var fragmentRange: Range? + + // Flags. + + /// The kind of scheme contained in `schemeRange`, if it is not `nil`. + @usableFromInline + internal var schemeKind: WebURL.SchemeKind? + + /// Whether this URL 'cannot be a base'. + @usableFromInline + internal var cannotBeABaseURL: Bool + + /// A flag for a quirk in the standard, which means that absolute paths in particular URL strings should copy the Windows drive from their base URL. + @usableFromInline + internal var absolutePathsCopyWindowsDriveFromBase: Bool + + /// The components to copy from the base URL. If non-empty, there must be a base URL. + /// Only the scheme and path may overlap with components detected in the input string - for the former, it is a meaningless quirk of the control flow, + /// and the two schemes must be equal; for the latter, it means the two paths should be merged (i.e. that the input string's path is relative to the base URL's path). + @usableFromInline + internal var componentsToCopyFromBase: _CopyableURLComponentSet + + @inlinable + internal init( + schemeRange: Range?, + authorityRange: Range?, + usernameRange: Range?, + passwordRange: Range?, + hostnameRange: Range?, + portRange: Range?, + pathRange: Range?, + queryRange: Range?, + fragmentRange: Range?, + schemeKind: WebURL.SchemeKind?, + cannotBeABaseURL: Bool, + absolutePathsCopyWindowsDriveFromBase: Bool, + componentsToCopyFromBase: _CopyableURLComponentSet + ) { + self.schemeRange = schemeRange + self.authorityRange = authorityRange + self.usernameRange = usernameRange + self.passwordRange = passwordRange + self.hostnameRange = hostnameRange + self.portRange = portRange + self.pathRange = pathRange + self.queryRange = queryRange + self.fragmentRange = fragmentRange + self.schemeKind = schemeKind + self.cannotBeABaseURL = cannotBeABaseURL + self.absolutePathsCopyWindowsDriveFromBase = absolutePathsCopyWindowsDriveFromBase + self.componentsToCopyFromBase = componentsToCopyFromBase + } + + @inlinable init() { + self.init( + schemeRange: nil, authorityRange: nil, usernameRange: nil, passwordRange: nil, + hostnameRange: nil, portRange: nil, pathRange: nil, queryRange: nil, fragmentRange: nil, + schemeKind: nil, cannotBeABaseURL: false, absolutePathsCopyWindowsDriveFromBase: false, + componentsToCopyFromBase: [] + ) + } +} + +//swift-format-ignore +/// A set of components to be copied from a URL. +/// +/// - seealso: `ScannedRangesAndFlags.componentsToCopyFromBase` +/// +@usableFromInline +internal struct _CopyableURLComponentSet: OptionSet { + + @usableFromInline + internal var rawValue: UInt8 + + @inlinable + internal init(rawValue: UInt8) { + self.rawValue = rawValue + } + + @inlinable internal static var scheme: Self { Self(rawValue: 1 << 0) } + @inlinable internal static var authority: Self { Self(rawValue: 1 << 1) } + @inlinable internal static var path: Self { Self(rawValue: 1 << 2) } + @inlinable internal static var query: Self { Self(rawValue: 1 << 3) } +} + +extension ParsedURLString.ProcessedMapping { + + /// Parses failable components discovered by the scanner. + /// This is the last stage at which parsing may fail, and successful construction of this mapping signifies that the input string can definitely be written as a URL. + /// + @inlinable + internal init?( + _ scannedInfo: ScannedRangesAndFlags, + inputString: InputString, + baseURL: WebURL?, + callback: inout Callback + ) where Callback: URLParserCallback { + + var scannedInfo = scannedInfo + scannedInfo.checkInvariants(inputString, baseURL: baseURL) + + if scannedInfo.schemeKind == nil { + guard let baseURL = baseURL, scannedInfo.componentsToCopyFromBase.contains(.scheme) else { + preconditionFailure("We must have a scheme") + } + scannedInfo.schemeKind = baseURL.schemeKind + } + + // Port. + var port: UInt16? + if let portRange = scannedInfo.portRange, portRange.isEmpty == false { + guard let parsedInteger = ASCII.parseDecimalU16(from: inputString[portRange]) else { + callback.validationError(.portOutOfRange) + return nil + } + if parsedInteger != scannedInfo.schemeKind!.defaultPort { + port = parsedInteger + } + } + + // Host. + var parsedHost: ParsedHost? + if let hostnameRange = scannedInfo.hostnameRange { + parsedHost = ParsedHost(inputString[hostnameRange], schemeKind: scannedInfo.schemeKind!, callback: &callback) + guard parsedHost != nil else { return nil } + } + + self.info = scannedInfo + self.parsedHost = parsedHost + self.port = port + } + + @inlinable + internal func write( + inputString: InputString, + baseURL: WebURL?, + to writer: inout WriterType + ) { + + let schemeKind = info.schemeKind! + + if info.componentsToCopyFromBase.isEmpty == false { + precondition(baseURL != nil) // Important: allows us to use 'unsafelyUnwrapped' when copying from a base URL. + } + + // 1: Flags + writer.writeFlags(schemeKind: schemeKind, cannotBeABaseURL: info.cannotBeABaseURL) + + // 2: Scheme. + if let inputScheme = info.schemeRange { + writer.writeSchemeContents(ASCII.Lowercased(inputString[inputScheme])) + } else { + precondition(info.componentsToCopyFromBase.contains(.scheme), "Cannot construct a URL without a scheme") + assert(schemeKind == baseURL!.schemeKind) + writer.writeSchemeContents(baseURL.unsafelyUnwrapped.utf8.scheme) + } + + // 3: Authority. + var hasAuthority = false + if let hostname = info.hostnameRange { + hasAuthority = true + writer.writeAuthoritySigil() + + var hasCredentials = false + if let username = info.usernameRange, username.isEmpty == false { + hasCredentials = true + if writer.getHint(maySkipPercentEncoding: .username) { + writer.writeUsernameContents { writer in writer(inputString[username]) } + } else { + var wasEncoded = false + writer.writeUsernameContents { writer in + wasEncoded = inputString[username].lazy.percentEncodedGroups(as: \.userInfo).write(to: writer) + } + writer.writeHint(.username, maySkipPercentEncoding: !wasEncoded) + } + } + if let password = info.passwordRange, password.isEmpty == false { + hasCredentials = true + if writer.getHint(maySkipPercentEncoding: .password) { + writer.writePasswordContents { writer in writer(inputString[password]) } + } else { + var wasEncoded = false + writer.writePasswordContents { writer in + wasEncoded = inputString[password].lazy.percentEncodedGroups(as: \.userInfo).write(to: writer) + } + writer.writeHint(.password, maySkipPercentEncoding: !wasEncoded) + } + } + if hasCredentials { + writer.writeCredentialsTerminator() + } + parsedHost!.write(bytes: inputString[hostname], using: &writer) + if let port = port { + writer.writePort(port) + } + + } else if info.componentsToCopyFromBase.contains(.authority) { + baseURL.unsafelyUnwrapped.storage.withUTF8OfAllAuthorityComponents { + guard let baseAuthority = $0 else { return } + hasAuthority = true + writer.writeAuthoritySigil() + writer.writeKnownAuthorityString( + baseAuthority, usernameLength: $1, passwordLength: $2, hostnameLength: $3, portLength: $4 + ) + } + + } else if schemeKind == .file { + // 'file:' URLs have an implicit authority. [URL Standard: "file host" state] + writer.writeAuthoritySigil() + hasAuthority = true + } + + // 4: Path. + switch info.pathRange { + case .some(let path) where info.cannotBeABaseURL: + if writer.getHint(maySkipPercentEncoding: .path) { + writer.writePath(firstComponentLength: 0) { writer in writer(inputString[path]) } + } else { + var wasEncoded = false + writer.writePath(firstComponentLength: 0) { writer in + wasEncoded = inputString[path].lazy.percentEncodedGroups(as: \.c0Control).write(to: writer) + } + writer.writeHint(.path, maySkipPercentEncoding: !wasEncoded) + } + + case .some(let path): + let pathMetrics = + writer.getPathMetricsHint() + ?? PathMetrics( + parsing: inputString[path], + schemeKind: schemeKind, + baseURL: info.componentsToCopyFromBase.contains(.path) ? baseURL.unsafelyUnwrapped : nil, + absolutePathsCopyWindowsDriveFromBase: info.absolutePathsCopyWindowsDriveFromBase + ) + assert(pathMetrics.requiredCapacity > 0) + writer.writePathMetricsHint(pathMetrics) + writer.writeHint(.path, maySkipPercentEncoding: !pathMetrics.needsPercentEncoding) + + if pathMetrics.requiresPathSigil, hasAuthority == false { + writer.writePathSigil() + } + writer.writePresizedPathUnsafely( + length: pathMetrics.requiredCapacity, + firstComponentLength: pathMetrics.firstComponentLength + ) { buffer in + return buffer.writeNormalizedPath( + parsing: inputString[path], + schemeKind: schemeKind, + baseURL: info.componentsToCopyFromBase.contains(.path) ? baseURL.unsafelyUnwrapped : nil, + absolutePathsCopyWindowsDriveFromBase: info.absolutePathsCopyWindowsDriveFromBase, + needsPercentEncoding: pathMetrics.needsPercentEncoding + ) + } + + case .none where info.componentsToCopyFromBase.contains(.path): + let baseURL = baseURL.unsafelyUnwrapped + if baseURL.storage.structure.pathRequiresSigil, hasAuthority == false { + writer.writePathSigil() + } + writer.writePath(firstComponentLength: baseURL.storage.structure.firstPathComponentLength) { writer in + writer(baseURL.utf8.path) + } + + case .none where schemeKind.isSpecial: + // Special URLs always have a path. + writer.writePath(firstComponentLength: 1) { writer in writer(CollectionOfOne(ASCII.forwardSlash.codePoint)) } + + default: + break + } + + // 5: Query. + if let query = info.queryRange { + if writer.getHint(maySkipPercentEncoding: .query) { + writer.writeQueryContents { writer in writer(inputString[query]) } + } else { + var wasEncoded = false + writer.writeQueryContents { (writer: (_PercentEncodedByte) -> Void) in + if schemeKind.isSpecial { + wasEncoded = inputString[query].lazy.percentEncodedGroups(as: \.query_special).write(to: writer) + } else { + wasEncoded = inputString[query].lazy.percentEncodedGroups(as: \.query_notSpecial).write(to: writer) + } + } + writer.writeHint(.query, maySkipPercentEncoding: !wasEncoded) + } + + } else if info.componentsToCopyFromBase.contains(.query) { + let baseURL = baseURL.unsafelyUnwrapped + if let baseQuery = baseURL.utf8.query { + let isFormEncoded = baseURL.storage.structure.queryIsKnownFormEncoded + writer.writeQueryContents(isKnownFormEncoded: isFormEncoded) { writer in writer(baseQuery) } + } + } + + // 6: Fragment. + if let fragment = info.fragmentRange { + if writer.getHint(maySkipPercentEncoding: .fragment) { + writer.writeFragmentContents { writer in writer(inputString[fragment]) } + } else { + var wasEncoded = false + writer.writeFragmentContents { writer in + wasEncoded = inputString[fragment].lazy.percentEncodedGroups(as: \.fragment).write(to: writer) + } + writer.writeHint(.fragment, maySkipPercentEncoding: !wasEncoded) + } + } // Fragment is never copied from base URL. + + + // 7: Finalize. + writer.finalize() + return + } +} + + +// -------------------------------------------- +// MARK: - URL Scanner +// -------------------------------------------- + + +/// A namespace for URL scanning methods. +/// +@usableFromInline +internal enum URLScanner +where InputString: BidirectionalCollection, InputString.Element == UInt8, Callback: URLParserCallback { + + /// The result of an operation which scans a non-failable component: + /// either to continue scanning from the given next component, or that scanning completed succesfully. + /// + @usableFromInline + internal enum ScanComponentResult { + case scan(_ component: ComponentToScan, _ startIndex: InputString.Index) + case scanningComplete + } + + /// The result of an operation which scans a failable component: + /// either instructions about which component to scan next, or a signal to abort scanning. + /// + @usableFromInline + internal enum ScanFailableComponentResult { + case success(continueFrom: ScanComponentResult) + case failed + } + + @usableFromInline + internal typealias SchemeKind = WebURL.SchemeKind + @usableFromInline + internal typealias InputSlice = InputString.SubSequence +} + +@usableFromInline +internal enum ComponentToScan { + case authority + case pathStart + case path + case query + case fragment + // host and port only used internally within scanAuthority. + case host + case port +} + +extension URLScanner { + + /// Scans the given URL string and returns a mapping of components that were discovered. + /// + /// - parameters: + /// - input: The input string, as a collection of UTF8 bytes. + /// - baseURL: The base URL to interpret `input` against. + /// - callback: An object to notify about any validation errors which are encountered. + /// - returns: A mapping of detected URL components, or `nil` if the string could not be parsed. + /// + @inlinable + internal static func scanURLString( + _ input: InputString, + baseURL: WebURL?, + callback: inout Callback + ) -> ScannedRangesAndFlags? { + + var scanResults = ScannedRangesAndFlags() + + if let (schemeEndIndex, schemeKind) = parseScheme(input), schemeEndIndex != input.endIndex { + scanResults.schemeKind = schemeKind + scanResults.schemeRange = Range(uncheckedBounds: (input.startIndex, schemeEndIndex)) + + return scanURLWithScheme( + input.suffix(from: input.index(after: schemeEndIndex)), + scheme: schemeKind, + baseURL: baseURL, + &scanResults, + callback: &callback + ) ? scanResults : nil + } + + // [URL Standard: "no scheme" state] + + guard let base = baseURL else { + callback.validationError(.missingSchemeNonRelativeURL) + return nil + } + var relative = input[...] + + if base.cannotBeABase { + guard ASCII(flatMap: relative.popFirst()) == .numberSign else { + callback.validationError(.missingSchemeNonRelativeURL) + return nil + } + scanResults.componentsToCopyFromBase = [.scheme, .path, .query] + scanResults.cannotBeABaseURL = true + _ = scanFragment(relative, &scanResults, callback: &callback) + return scanResults + } + + if case .file = base.schemeKind { + scanResults.componentsToCopyFromBase = [.scheme] + return scanAllFileURLComponents( + relative, + baseURL: baseURL, + &scanResults, + callback: &callback + ) ? scanResults : nil + } + + return scanAllRelativeURLComponents( + relative, + baseScheme: base.schemeKind, + &scanResults, + callback: &callback + ) ? scanResults : nil + } + + /// Scans all components of the input string `input`, and builds up a map based on the URL's `scheme`. + /// + @inlinable + internal static func scanURLWithScheme( + _ input: InputSlice, scheme: SchemeKind, baseURL: WebURL?, + _ mapping: inout ScannedRangesAndFlags, callback: inout Callback + ) -> Bool { + + // [URL Standard: "scheme" state]. + switch scheme { + case .file: + if indexAfterDoubleSolidusPrefix(utf8: input) == nil { + callback.validationError(.fileSchemeMissingFollowingSolidus) + } + return scanAllFileURLComponents(input, baseURL: baseURL, &mapping, callback: &callback) + + case .other: + var authority = input + guard ASCII(flatMap: authority.popFirst()) == .forwardSlash else { + mapping.cannotBeABaseURL = true + return scanAllCannotBeABaseURLComponents(input, scheme: scheme, &mapping, callback: &callback) + } + // [URL Standard: "path or authority" state]. + guard ASCII(flatMap: authority.popFirst()) == .forwardSlash else { + return scanAllComponents(from: .path, input, scheme: scheme, &mapping, callback: &callback) + } + return scanAllComponents(from: .authority, authority, scheme: scheme, &mapping, callback: &callback) + + default: + // [URL Standard: "special relative or authority" state]. + var authority = input + if let afterPrefix = indexAfterDoubleSolidusPrefix(utf8: input) { + // [URL Standard: "special authority slashes" state]. + authority = authority[afterPrefix...] + } else { + // Since `scheme` is special, comparing the kind is sufficient. + if scheme == baseURL?.schemeKind { + callback.validationError(.relativeURLMissingBeginningSolidus) + return scanAllRelativeURLComponents(input, baseScheme: scheme, &mapping, callback: &callback) + } + callback.validationError(.missingSolidusBeforeAuthority) + } + // [URL Standard: "special authority ignore slashes" state]. + authority = authority.drop { ASCII($0) == .forwardSlash || ASCII($0) == .backslash } + return scanAllComponents(from: .authority, authority, scheme: scheme, &mapping, callback: &callback) + } + } +} + + +// -------------------------------------------- +// MARK: - Non-specific URLs and components +// -------------------------------------------- + + +extension URLScanner { + + /// Scans the given component from `input`, and continues scanning additional components until we can't find any more. + /// + @inlinable + internal static func scanAllComponents( + from initialComponent: ComponentToScan, _ input: InputSlice, scheme: WebURL.SchemeKind, + _ mapping: inout ScannedRangesAndFlags, callback: inout Callback + ) -> Bool { + + var remaining = input + var nextLocation: ScanComponentResult = .scan(initialComponent, remaining.startIndex) + + if case .scan(.authority, _) = nextLocation { + switch scanAuthority(remaining, scheme: scheme, &mapping, callback: &callback) { + case .success(continueFrom: let afterAuthority): + nextLocation = afterAuthority + case .failed: + return false + } + } + while case .scan(let thisComponent, let thisStartIndex) = nextLocation { + remaining = remaining[Range(uncheckedBounds: (thisStartIndex, remaining.endIndex))] + switch thisComponent { + case .pathStart: + nextLocation = scanPathStart(remaining, scheme: scheme, &mapping, callback: &callback) + case .path: + nextLocation = scanPath(remaining, scheme: scheme, &mapping, callback: &callback) + case .query: + nextLocation = scanQuery(remaining, scheme: scheme, &mapping, callback: &callback) + case .fragment: + nextLocation = scanFragment(remaining, &mapping, callback: &callback) + case .host, .port, .authority: + fatalError("Component tried to return to scanning authority") + } + } + return true + } + + /// Scans the "authority" component of a URL, containing: + /// - Username + /// - Password + /// - Host + /// - Port + /// + /// If parsing doesn't fail, the next component is always `pathStart`. + /// + @inlinable + internal static func scanAuthority( + _ input: InputSlice, scheme: WebURL.SchemeKind, + _ mapping: inout ScannedRangesAndFlags, callback: inout Callback + ) -> ScanFailableComponentResult { + + // 1. Validate the mapping. + assert(mapping.usernameRange == nil) + assert(mapping.passwordRange == nil) + assert(mapping.hostnameRange == nil) + assert(mapping.portRange == nil) + assert(mapping.pathRange == nil) + assert(mapping.queryRange == nil) + assert(mapping.fragmentRange == nil) + + // 2. Find the extent of the authority (i.e. the terminator between host and path/query/fragment). + let authority = input.prefix { + switch ASCII($0) { + case ASCII.forwardSlash?, ASCII.questionMark?, ASCII.numberSign?: + return false + case ASCII.backslash? where scheme.isSpecial: + return false + default: + return true + } + } + + mapping.authorityRange = Range(uncheckedBounds: (authority.startIndex, authority.endIndex)) + + // 3. Find the extent of the credentials, if there are any, and where the host starts. + var hostStartIndex = authority.startIndex + if let credentialsEndIndex = authority.lastIndex(of: ASCII.commercialAt.codePoint) { + callback.validationError(.unexpectedCommercialAt) + hostStartIndex = input.index(after: credentialsEndIndex) + guard hostStartIndex < authority.endIndex else { + callback.validationError(.unexpectedCredentialsWithoutHost) + return .failed + } + + let credentials = authority[.., callback: inout Callback + ) -> ScanFailableComponentResult { + + // 1. Validate the mapping. + assert(mapping.hostnameRange == nil) + assert(mapping.portRange == nil) + assert(mapping.pathRange == nil) + assert(mapping.queryRange == nil) + assert(mapping.fragmentRange == nil) + + // 2. Find the extent of the hostname. + var separatorIndex: InputSlice.Index? + do { + var cursor = input.startIndex + var inBracket = false + portSearch: while cursor < input.endIndex { + switch ASCII(input[cursor]) { + case .leftSquareBracket?: + inBracket = true + case .rightSquareBracket?: + inBracket = false + case .colon? where !inBracket: + separatorIndex = cursor + break portSearch + default: + break + } + cursor = input.index(after: cursor) + } + } + + let hostname = input[..<(separatorIndex ?? input.endIndex)] + + // 3. Validate the structure. + if let portStartIndex = separatorIndex, portStartIndex == input.startIndex { + callback.validationError(.unexpectedPortWithoutHost) + return .failed + } + + // 4. Return the next component. + mapping.hostnameRange = Range(uncheckedBounds: (hostname.startIndex, hostname.endIndex)) + if let separatorIndex = separatorIndex { + return .success(continueFrom: .scan(.port, input.index(after: separatorIndex))) + } else { + return .success(continueFrom: .scan(.pathStart, input.endIndex)) + } + } + + @inlinable + internal static func scanPort( + _ input: InputSlice, scheme: SchemeKind, + _ mapping: inout ScannedRangesAndFlags, callback: inout Callback + ) -> ScanFailableComponentResult { + + // 1. Validate the mapping. + assert(mapping.portRange == nil) + assert(mapping.pathRange == nil) + assert(mapping.queryRange == nil) + assert(mapping.fragmentRange == nil) + + // 2. Find the extent of the port string. + let portString = input + + // 3. Validate the port string. + if !portString.allSatisfy({ ASCII($0)?.isDigit ?? false }), !portString.isEmpty { + callback.validationError(.portInvalid) + return .failed + } + + // 4. Return the next component. + mapping.portRange = Range(uncheckedBounds: (portString.startIndex, portString.endIndex)) + return .success(continueFrom: .scan(.pathStart, portString.endIndex)) + } + + /// Scans the URL string from the character immediately following the authority, and advises + /// whether the remainder is a path, query or fragment. + /// + @inlinable + internal static func scanPathStart( + _ input: InputSlice, scheme: SchemeKind, _ mapping: inout ScannedRangesAndFlags, + callback: inout Callback + ) -> ScanComponentResult { + + // 1. Validate the mapping. + assert(mapping.pathRange == nil) + assert(mapping.queryRange == nil) + assert(mapping.fragmentRange == nil) + + // 2. Return the component to parse based on input. + guard input.startIndex < input.endIndex else { + // Shortcut the 'path' state. This would otherwise ensure that special URLs have a non-nil pathRange, + // but `ParsedURLString.write` already knows to give special URLs an implicit path. + return .scanningComplete + } + + switch ASCII(input[input.startIndex]) { + case .questionMark?: + return .scan(.query, input.index(after: input.startIndex)) + case .numberSign?: + return .scan(.fragment, input.index(after: input.startIndex)) + default: + return .scan(.path, input.startIndex) + } + } + + /// Scans a URL path string from the given input, and advises whether there are any components following it. + /// + @inlinable + internal static func scanPath( + _ input: InputSlice, scheme: SchemeKind, + _ mapping: inout ScannedRangesAndFlags, callback: inout Callback + ) -> ScanComponentResult { + + // 1. Validate the mapping. + assert(mapping.pathRange == nil) + assert(mapping.queryRange == nil) + assert(mapping.fragmentRange == nil) + + // 2. Find the extent of the path. + let startOfNextComponent = input.firstIndex { ASCII($0) == .questionMark || ASCII($0) == .numberSign } + let path = input[..<(startOfNextComponent ?? input.endIndex)] + + // 3. Validate the path's contents. + PathStringValidator.validate(pathString: path, schemeKind: scheme, callback: &callback) + + // 4. Return the next component. + if !(path.startIndex < path.endIndex), !scheme.isSpecial { + mapping.pathRange = nil + } else { + mapping.pathRange = Range(uncheckedBounds: (path.startIndex, path.endIndex)) + } + if let nextStart = startOfNextComponent { + return .scan(ASCII(input[nextStart]) == .questionMark ? .query : .fragment, input.index(after: nextStart)) + } else { + return .scanningComplete + } + } + + /// Scans a URL query string from the given input, and advises whether there are any components following it. + /// + @inlinable + internal static func scanQuery( + _ input: InputSlice, scheme: SchemeKind, + _ mapping: inout ScannedRangesAndFlags, callback: inout Callback + ) -> ScanComponentResult { + + // 1. Validate the mapping. + assert(mapping.queryRange == nil) + assert(mapping.fragmentRange == nil) + + // 2. Find the extent of the query + let startOfFrg = input.firstIndex(of: ASCII.numberSign.codePoint) + + // 3. Validate the query-string. + validateURLCodePointsAndPercentEncoding(utf8: input.prefix(upTo: startOfFrg ?? input.endIndex), callback: &callback) + + // 3. Return the next component. + mapping.queryRange = Range(uncheckedBounds: (input.startIndex, startOfFrg ?? input.endIndex)) + if let nextStart = startOfFrg { + return .scan(.fragment, input.index(after: nextStart)) + } else { + return .scanningComplete + } + } + + /// Scans a URL fragment string from the given input. There are never any components following it. + /// + @inlinable + internal static func scanFragment( + _ input: InputSlice, + _ mapping: inout ScannedRangesAndFlags, callback: inout Callback + ) -> ScanComponentResult { + + // 1. Validate the mapping. + assert(mapping.fragmentRange == nil) + + // 2. Validate the fragment string. + validateURLCodePointsAndPercentEncoding(utf8: input, callback: &callback) + + mapping.fragmentRange = Range(uncheckedBounds: (input.startIndex, input.endIndex)) + return .scanningComplete + } +} + + +// -------------------------------------------- +// MARK: - File URLs +// -------------------------------------------- + + +extension URLScanner { + + /// Scans the given component from `input`, and continues scanning additional components until we can't find any more. + /// + @inlinable + internal static func scanAllFileURLComponents( + _ input: InputSlice, baseURL: WebURL?, + _ mapping: inout ScannedRangesAndFlags, callback: inout Callback + ) -> Bool { + + var nextLocation = parseFileURLStart(input, baseURL: baseURL, &mapping, callback: &callback) + guard case .scan(_, let firstComponentStartIndex) = nextLocation else { + return true + } + var remaining = input.suffix(from: firstComponentStartIndex) + + while case .scan(let thisComponent, let thisStartIndex) = nextLocation { + remaining = remaining[Range(uncheckedBounds: (thisStartIndex, remaining.endIndex))] + switch thisComponent { + case .pathStart: + nextLocation = scanPathStart(remaining, scheme: .file, &mapping, callback: &callback) + case .path: + nextLocation = scanPath(remaining, scheme: .file, &mapping, callback: &callback) + case .query: + nextLocation = scanQuery(remaining, scheme: .file, &mapping, callback: &callback) + case .fragment: + nextLocation = scanFragment(remaining, &mapping, callback: &callback) + case .host, .port, .authority: + fatalError("Component tried to return to scanning authority") + } + } + return true + } + + @inlinable + internal static func parseFileURLStart( + _ input: InputSlice, baseURL: WebURL?, + _ mapping: inout ScannedRangesAndFlags, callback: inout Callback + ) -> ScanComponentResult { + + // Note that file URLs may also be relative URLs. It all depends on what comes after "file:". + // - 0 slashes: copy base host, parse as path relative to base path. + // - 1 slash: copy base host, parse as absolute path. + // - 2 slashes: parse own host, parse absolute path. + // - 3 slahses: empty host, parse as absolute path. + // - 4+ slashes: invalid. + + let baseScheme = baseURL?.schemeKind + + var cursor = input.startIndex + guard cursor < input.endIndex, let c0 = ASCII(input[cursor]), c0 == .forwardSlash || c0 == .backslash else { + // [URL Standard: "file" state]. + // No slashes. May be a relative path ("file:usr/lib/Swift") or no path ("file:?someQuery"). + guard baseScheme == .file else { + return .scan(.path, cursor) + } + assert(mapping.componentsToCopyFromBase.isEmpty || mapping.componentsToCopyFromBase == [.scheme]) + mapping.componentsToCopyFromBase.formUnion([.authority, .path, .query]) + + guard cursor < input.endIndex else { + return .scanningComplete + } + switch ASCII(input[cursor]) { + case .questionMark?: + mapping.componentsToCopyFromBase.remove(.query) + return .scan(.query, input.index(after: cursor)) + case .numberSign?: + return .scan(.fragment, input.index(after: cursor)) + default: + mapping.componentsToCopyFromBase.remove(.query) + // Relative paths which begin with a Windows drive letter are not actually relative to baseURL. + // This doesn't depend on the surrounding URL structure, so the path parser handles it + // without needing special instruction/flags. + if PathComponentParser.hasWindowsDriveLetterPrefix(input[cursor...]) { + callback.validationError(.unexpectedWindowsDriveLetter) + } + return .scan(.path, cursor) + } + } + cursor = input.index(after: cursor) + if c0 == .backslash { + callback.validationError(.unexpectedReverseSolidus) + } + + guard cursor < input.endIndex, let c1 = ASCII(input[cursor]), c1 == .forwardSlash || c1 == .backslash else { + // [URL Standard: "file slash" state]. + // 1 slash. Absolute path ("file:/usr/lib/Swift"). + guard baseScheme == .file else { + return .scan(.path, input.startIndex) + } + mapping.componentsToCopyFromBase.formUnion([.authority]) + + // Absolute paths in path-only URLs are still relative to the base URL's Windows drive letter (if it has one). + // This only occurs if the string goes through the "file slash" state - not if it contains a hostname + // and goes through the "file host" state. The path parser requires a flag to opt-in to that behaviour. + mapping.absolutePathsCopyWindowsDriveFromBase = true + mapping.componentsToCopyFromBase.formUnion([.path]) + + return .scan(.path, input.startIndex) + } + let pathStartIfDriveLetter = cursor + cursor = input.index(after: cursor) + if c1 == .backslash { + callback.validationError(.unexpectedReverseSolidus) + } + + // [URL Standard: "file host" state]. + // 2+ slashes. e.g. "file://localhost/usr/lib/Swift" or "file:///usr/lib/Swift". + return scanFileHost(input[cursor...], pathStartIfDriveLetter: pathStartIfDriveLetter, &mapping, callback: &callback) + } + + /// Scans a hostname for a file URL from `input` and advises on how to proceed with scanning. + /// + /// Note that, unlike the nonspecific "host" parser, this never fails - even if there is a port and the hostname is empty. + /// In that case, the scanned hostname will contain the port and ultimately get rejected for containing a forbidden host code-point. + /// + /// If `pathStartIfDriveLetter` is given and the hostname is a Windows drive letter, scanning will be advised to scan a path from that position. + /// This position is typically just before `input.startIndex`, which is unusual for scanning methods as they don't typically advise to go backwards. + /// + @inlinable + internal static func scanFileHost( + _ input: InputSlice, pathStartIfDriveLetter: InputString.Index?, + _ mapping: inout ScannedRangesAndFlags, callback: inout Callback + ) -> ScanComponentResult { + + // 1. Validate the mapping. + assert(mapping.authorityRange == nil) + assert(mapping.hostnameRange == nil) + assert(mapping.portRange == nil) + assert(mapping.pathRange == nil) + assert(mapping.queryRange == nil) + assert(mapping.fragmentRange == nil) + + // 2. Find the extent of the hostname. + // The hostname is not validated after this, as it will be checked by the host parser. + let startOfNextComponent = + input.firstIndex { byte in + switch ASCII(byte) { + case .forwardSlash?, .backslash?, .questionMark?, .numberSign?: return true + default: return false + } + } ?? input.endIndex + + let hostname = input[.., callback: inout Callback + ) -> Bool { + + var nextLocation = scanCannotBeABaseURLPath(input, &mapping, callback: &callback) + guard case .scan(_, let firstComponentStartIndex) = nextLocation else { + return true + } + var remaining = input.suffix(from: firstComponentStartIndex) + + while case .scan(let thisComponent, let thisStartIndex) = nextLocation { + remaining = remaining[Range(uncheckedBounds: (thisStartIndex, remaining.endIndex))] + switch thisComponent { + case .query: + nextLocation = scanQuery(remaining, scheme: scheme, &mapping, callback: &callback) + case .fragment: + nextLocation = scanFragment(remaining, &mapping, callback: &callback) + case .pathStart, .path, .host, .port, .authority: + fatalError("Tried to scan invalid component for cannot-be-a-base URL") + } + } + return true + } + + @inlinable + internal static func scanCannotBeABaseURLPath( + _ input: InputSlice, + _ mapping: inout ScannedRangesAndFlags, callback: inout Callback + ) -> ScanComponentResult { + + // 1. Validate the mapping. + assert(mapping.authorityRange == nil) + assert(mapping.hostnameRange == nil) + assert(mapping.portRange == nil) + assert(mapping.pathRange == nil) + assert(mapping.queryRange == nil) + assert(mapping.fragmentRange == nil) + + // 2. Find the extent of the path. + let startOfNextComponent = input.firstIndex { byte in + switch ASCII(byte) { + case .questionMark?, .numberSign?: return true + default: return false + } + } + + let path = input[..<(startOfNextComponent ?? input.endIndex)] + + // 3. Validate the path. + validateURLCodePointsAndPercentEncoding(utf8: path, callback: &callback) + + // 4. Return the next component. + if let nextStart = startOfNextComponent { + mapping.pathRange = Range(uncheckedBounds: (path.startIndex, nextStart)) + return .scan(ASCII(input[nextStart]) == .questionMark ? .query : .fragment, input.index(after: nextStart)) + } else { + mapping.pathRange = path.isEmpty ? nil : Range(uncheckedBounds: (input.startIndex, input.endIndex)) + return .scanningComplete + } + } +} + + +// -------------------------------------------- +// MARK: - Relative URLs +// -------------------------------------------- + + +extension URLScanner { + + /// Scans the given component from `input`, and continues scanning additional components until we can't find any more. + /// + @inlinable + internal static func scanAllRelativeURLComponents( + _ input: InputSlice, baseScheme: WebURL.SchemeKind, + _ mapping: inout ScannedRangesAndFlags, callback: inout Callback + ) -> Bool { + + var nextLocation = parseRelativeURLStart(input, baseScheme: baseScheme, &mapping, callback: &callback) + guard case .scan(_, let firstComponentStartIndex) = nextLocation else { + return true + } + var remaining = input.suffix(from: firstComponentStartIndex) + + if case .scan(.authority, _) = nextLocation { + switch scanAuthority(remaining, scheme: baseScheme, &mapping, callback: &callback) { + case .success(continueFrom: let afterAuthority): + nextLocation = afterAuthority + case .failed: + return false + } + } + while case .scan(let thisComponent, let thisStartIndex) = nextLocation { + remaining = remaining[Range(uncheckedBounds: (thisStartIndex, remaining.endIndex))] + switch thisComponent { + case .path: + nextLocation = scanPath(remaining, scheme: baseScheme, &mapping, callback: &callback) + case .pathStart: + nextLocation = scanPathStart(remaining, scheme: baseScheme, &mapping, callback: &callback) + case .query: + nextLocation = scanQuery(remaining, scheme: baseScheme, &mapping, callback: &callback) + case .fragment: + nextLocation = scanFragment(remaining, &mapping, callback: &callback) + case .host, .port, .authority: + fatalError("Component tried to return to scanning authority") + } + } + return true + } + + @inlinable + internal static func parseRelativeURLStart( + _ input: InputSlice, baseScheme: WebURL.SchemeKind, + _ mapping: inout ScannedRangesAndFlags, callback: inout Callback + ) -> ScanComponentResult { + + mapping.componentsToCopyFromBase = [.scheme] + + // [URL Standard: "relative" state]. + guard input.startIndex < input.endIndex else { + mapping.componentsToCopyFromBase.formUnion([.authority, .path, .query]) + return .scanningComplete + } + + switch ASCII(input[input.startIndex]) { + // [URL Standard: "relative slash" state]. + case .backslash? where baseScheme.isSpecial: + callback.validationError(.unexpectedReverseSolidus) + fallthrough + case .forwardSlash?: + var cursor = input.index(after: input.startIndex) + guard cursor < input.endIndex else { + mapping.componentsToCopyFromBase.formUnion([.authority]) + return .scan(.path, input.startIndex) + } + switch ASCII(input[cursor]) { + case .backslash? where baseScheme.isSpecial: + callback.validationError(.unexpectedReverseSolidus) + fallthrough + case .forwardSlash?: + cursor = input.index(after: cursor) + if baseScheme.isSpecial { + // [URL Standard: "special authority ignore slashes" state]. + cursor = + input[cursor...].firstIndex { + ASCII($0) != .forwardSlash && ASCII($0) != .backslash + } ?? input.endIndex + } + return .scan(.authority, cursor) + default: + mapping.componentsToCopyFromBase.formUnion([.authority]) + return .scan(.path, input.startIndex) + } + + // Back to [URL Standard: "relative" state]. + case .questionMark?: + mapping.componentsToCopyFromBase.formUnion([.authority, .path]) + return .scan(.query, input.index(after: input.startIndex)) + case .numberSign?: + mapping.componentsToCopyFromBase.formUnion([.authority, .path, .query]) + return .scan(.fragment, input.index(after: input.startIndex)) + default: + // Since we have a non-empty input string which doesn't begin with a query/fragment sigil ("?"/"#"), + // `scanPath` will always set a non-nil pathRange. + // `ParsedURLString.write` knows that if it sees a non-nil pathRange, *and* we ask the base's path, + // that it should provide both to the path parser, which will combine them. + mapping.componentsToCopyFromBase.formUnion([.authority, .path]) + return .scan(.path, input.startIndex) + } + } +} + + +// -------------------------------------------- +// MARK: - Post-scan validation +// -------------------------------------------- + + +extension ScannedRangesAndFlags where InputString: BidirectionalCollection, InputString.Element == UInt8 { + + /// Performs some basic invariant checks on the scanned URL data. For debug builds. + /// + #if DEBUG + @usableFromInline + internal func checkInvariants(_ inputString: InputString, baseURL: WebURL?) { + + // - Structural invariants. + // Ensure that the combination of scanned ranges and flags makes sense. + + if schemeRange == nil { + assert(componentsToCopyFromBase.contains(.scheme), "We must have a scheme from somewhere") + } + if usernameRange != nil || passwordRange != nil || hostnameRange != nil || portRange != nil { + assert(hostnameRange != nil, "A scanned authority component implies a scanned hostname") + assert(authorityRange != nil, "A scanned authority component implies a scanned authority") + assert(!cannotBeABaseURL, "A URL with an authority cannot be a cannot-be-a-base URL") + if passwordRange != nil { + assert(usernameRange != nil, "Can't have a password without a username (even if empty)") + } + if portRange != nil { + assert(hostnameRange != nil, "Can't have a port without a hostname") + } + } + // Ensure components from input string do not overlap with 'componentsToCopyFromBase' (except path). + if schemeRange != nil { + // Scheme can only overlap in relative URLs of special schemes. + if componentsToCopyFromBase.contains(.scheme) { + assert( + schemeKind!.isSpecial && schemeKind == baseURL!.schemeKind, "Copying a different scheme from baseURL?!") + } + } + if authorityRange != nil { + assert( + !componentsToCopyFromBase.contains(.authority), "Authority was scanned; shouldn't be copied from baseURL") + } + if queryRange != nil { + assert(!componentsToCopyFromBase.contains(.query), "Query was scanned; shouldn't be copied from baseURL") + } + + // - Content invariants. + // Make sure that things such as separators are where we expect and not inside the scanned ranges. + + if let schemeRange = schemeRange { + assert(!schemeRange.isEmpty) + assert(inputString[schemeRange].last != ASCII.colon.codePoint) + } + if authorityRange != nil { + if let username = usernameRange { + if let password = passwordRange { + assert(inputString[inputString.index(before: password.lowerBound)] == ASCII.colon.codePoint) + } + assert(inputString[passwordRange?.upperBound ?? username.upperBound] == ASCII.commercialAt.codePoint) + } + assert(hostnameRange != nil) + if let port = portRange { + assert(inputString[inputString.index(before: port.lowerBound)] == ASCII.colon.codePoint) + } + } + if let query = queryRange { + assert(inputString[inputString.index(before: query.lowerBound)] == ASCII.questionMark.codePoint) + } + if let fragment = fragmentRange { + assert(inputString[inputString.index(before: fragment.lowerBound)] == ASCII.numberSign.codePoint) + } + } + #else + @inlinable @inline(__always) + internal func checkInvariants(_ inputString: InputString, baseURL: WebURL?) {} + #endif +} + + +// -------------------------------------------- +// MARK: - Parsing Utilities +// -------------------------------------------- + + +/// Parses a scheme from the start of the given UTF-8 code-units. +/// +/// If the string contains a scheme terminator ("`:`"), the returned tuple's `terminator` element will be equal to its index. +/// Otherwise, the entire string will be considered the scheme name, and `terminator` will be equal to the input string's `endIndex`. +/// If the string does not contain a valid scheme, this function returns `nil`. +/// +@inlinable +func parseScheme( + _ input: UTF8Bytes +) -> (terminator: UTF8Bytes.Index, kind: WebURL.SchemeKind)? where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8 { + + let terminatorIdx = input.firstIndex { $0 == ASCII.colon.codePoint } ?? input.endIndex + let schemeName = input[Range(uncheckedBounds: (input.startIndex, terminatorIdx))] + let kind = WebURL.SchemeKind(parsing: schemeName) + + guard case .other = kind else { + return (terminatorIdx, kind) + } + // Note: this ensures empty strings are rejected. + guard ASCII(flatMap: schemeName.first)?.isAlpha == true else { return nil } + let isValidSchemeName = schemeName.allSatisfy { byte in + // https://bugs.swift.org/browse/SR-14438 + // swift-format-ignore + switch ASCII(byte) { + case .some(let char) where char.isAlphaNumeric: fallthrough + case .plus?, .minus?, .period?: return true + default: return false + } + } + return isValidSchemeName ? (terminatorIdx, kind) : nil +} + +/// Given a string, like "example.com:99/some/path?hello=world", returns the endIndex of the hostname component. +/// This is used by the Javascript model's `hostname` setter, which accepts a rather wide variety of inputs. +/// +/// This is a "scan-level" operation: the discovered hostname may need additional processing before being written to a URL string. +/// The only situation in which this function returns `nil` is if the scheme is not `file`, and the given string starts with a `:` +/// (i.e. contains a port but no hostname). +/// +internal func findEndOfHostnamePrefix( + _ input: UTF8Bytes, scheme: WebURL.SchemeKind, callback cb: inout Callback +) -> UTF8Bytes.Index? +where UTF8Bytes: BidirectionalCollection, UTF8Bytes.Element == UInt8, Callback: URLParserCallback { + + var mapping = ScannedRangesAndFlags() + + // See `URLScanner.scanAuthority`. + let hostname = input.prefix { + switch ASCII($0) { + case ASCII.forwardSlash?, ASCII.questionMark?, ASCII.numberSign?: + return false + case ASCII.backslash? where scheme.isSpecial: + return false + default: + return true + } + } + if scheme == .file { + // [URL Standard: "file host" state]. + // Hostnames which are Windows drive letters are not interpreted as paths in setter mode, so pSIDL = nil. + _ = URLScanner.scanFileHost(hostname, pathStartIfDriveLetter: nil, &mapping, callback: &cb) + } else { + guard case .success(_) = URLScanner.scanHostname(hostname, scheme: scheme, &mapping, callback: &cb) else { + // Only fails if there is a port and the hostname is empty. + assert(hostname.first == ASCII.colon.codePoint) + return nil + } + } + return mapping.hostnameRange?.upperBound ?? hostname.endIndex +} diff --git a/Sources/WebURL/Parser/URLWriter.swift b/Sources/WebURL/Parser/URLWriter.swift new file mode 100644 index 000000000..4a6381806 --- /dev/null +++ b/Sources/WebURL/Parser/URLWriter.swift @@ -0,0 +1,689 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// An interface through which `ParsedURLString` writes a normalized URL string. +/// +/// `ParsedURLString` will call the `write...` functions implemented by conformers to this protocol, writing the UTF-8 code-units of each component, +/// in the order in which they appear in the final string. Conformers are required to ensure that all code-units are written without loss. +/// +@usableFromInline +internal protocol URLWriter: HostnameWriter { + + /// Notes the given information about the URL. This is always the first function to be called. + /// + mutating func writeFlags(schemeKind: WebURL.SchemeKind, cannotBeABaseURL: Bool) + + /// A function which writes a piece of a component. + /// + /// Functions using this pattern typically look like the following: + /// ```swift + /// func writeUsername(_ usernameWriter: (PieceWriter)->Void) + /// ``` + /// + /// Callers invoke this function with a closure, which is passed a `PieceWriter` through which it can write its contents iteratively, incorporating its own + /// control-flow and using whichever `Collection` type is convenient. + /// ```swift + /// writeUsername { writePiece in + /// for piece in ... { + /// writePiece(piece) + /// } + /// } + /// ``` + /// + typealias PieceWriter = (T) -> Void + + /// Appends the given UTF-8 code-units to the URL string, followed by the scheme separator character (`:`). + /// This is always the first call to the writer after `writeFlags`. + /// + mutating func writeSchemeContents(_ schemeBytes: T) where T: Collection, T.Element == UInt8 + + /// Appends the authority header (`//`) to the URL string. + /// If called, this must always be the immediate successor to `writeSchemeContents`. + /// + mutating func writeAuthoritySigil() + + /// Appends the path sigil (`/.`) to the URL string. + /// If called, this must always be the immediate successor to `writeSchemeContents`. + /// + mutating func writePathSigil() + + /// Appends the UTF-8 code-units provided by `usernameWriter` to the URL string. + /// The content must already be percent-encoded and not include any separators. + /// If called, this must always be the immediate successor to `writeAuthoritySigil`. + /// + /// Note that `usernameWriter` is not guaranteed to be invoked. + /// + mutating func writeUsernameContents( + _ usernameWriter: (PieceWriter) -> Void + ) where T: Collection, T.Element == UInt8 + + /// Appends the password separator character (`:`), followed by the UTF-8 code-units provided by `passwordWriter`, to the URL string. + /// The content must already be percent-encoded and not include any separators. + /// + /// Note that `passwordWriter` is not guaranteed to be invoked. + /// + mutating func writePasswordContents( + _ passwordWriter: (PieceWriter) -> Void + ) where T: Collection, T.Element == UInt8 + + /// Appends the credential terminator byte (`@`) to the URL string. + /// If called, this must always be the immediate successor to either `writeUsernameContents` or `writePasswordContents`. + /// + mutating func writeCredentialsTerminator() + + /// Appends the UTF-8 code-units given by `hostnameWriter` to the URL string. + /// The content must already be percent-encoded/IDNA-transformed and not include any separators. + /// If called, this must always have been preceded by a call to `writeAuthoritySigil`. + /// + /// Note that `hostnameWriter` is not guaranteed to be invoked. + /// + mutating func writeHostname( + lengthIfKnown: Int?, _ hostnameWriter: (PieceWriter) -> Void + ) where T: Collection, T.Element == UInt8 + + /// Appends the port separator character (`:`), followed by the textual representation of the given port number, to the URL string. + /// If called, this must always be the immediate successor to `writeHostname`. + /// + mutating func writePort(_ port: UInt16) + + /// Appends an entire authority string (username + password + hostname + port) to the URL string. + /// The content must already be percent-encoded/IDNA-transformed. + /// If called, this must always be the immediate successor to `writeAuthoritySigil`. + /// + /// - important: `passwordLength` and `portLength` include their required leading separators (so a port component of `:8080` has a length of 5). + /// + mutating func writeKnownAuthorityString( + _ authority: UnsafeBufferPointer, + usernameLength: Int, passwordLength: Int, hostnameLength: Int, portLength: Int + ) + + /// Appends the UTF-8 code-units given by `writer` to the URL string. + /// The content must already be percent-encoded. No separators are added before or after the content. + /// + /// Note that `writer` is not guaranteed to be invoked. + /// + mutating func writePath( + firstComponentLength: Int, _ writer: (PieceWriter) -> Void + ) where T: Collection, T.Element == UInt8 + + /// Appends a path of size `length`, which may be initialized by `writer`, to the URL string. + /// The `writer` closure must initialize all bytes from `0..) -> Int + ) + + /// Appends the query separator character (`?`), followed by the UTF-8 code-units provided by `queryWriter`, to the URL string. + /// The content must already be percent-encoded. + /// + /// Note that `queryWriter` is not guaranteed to be invoked. + /// + mutating func writeQueryContents( + isKnownFormEncoded: Bool, _ queryWriter: (PieceWriter) -> Void + ) where T: Collection, T.Element == UInt8 + + /// Appends the fragment separator character (`#`), followed by the UTF-8 code-units provided by `fragmentWriter`, to the URL string. + /// The content must already be percent-encoded. + /// + /// Note that `fragmentWriter` is not guaranteed to be invoked. + /// + mutating func writeFragmentContents( + _ fragmentWriter: (PieceWriter) -> Void + ) where T: Collection, T.Element == UInt8 + + // Optional callbacks. + + /// Optional function which informs the writer that the URL has completed writing. No more content or hints will be written after this function is called. + /// The default implementation does nothing. + /// + mutating func finalize() + + // Optional hints. + + /// Optional function which asks the writer whether it happens to know that the given component may skip percent-encoding. + /// + /// The default implementation returns `false`. + /// + func getHint(maySkipPercentEncoding component: WebURL.Component) -> Bool + + /// Optional function which notes that the given component did not require percent-encoding when writing from the input-string. + /// This doesn't mean that the component does not _contain_ any percent-encoded contents, only that we don't need to perform an + /// additional level of encoding when writing. + /// + /// Conformers may wish to store and share this information, in case they wish to write the same contents using another `URLWriter`. + /// The default implementation does nothing. + /// + mutating func writeHint(_ component: WebURL.Component, maySkipPercentEncoding: Bool) + + /// Optional function which asks the writer whether it happens to have `PathMetrics` for the URL which is being written. + /// + /// The default implementation returns `nil`. + /// + func getPathMetricsHint() -> PathMetrics? + + /// Optional function which takes note of metrics collected while writing the URL's path component. + /// + /// Conformers may wish to store and share this information, in case they wish to write the same contents using another `URLWriter`. + /// The default implementation does nothing. + /// + mutating func writePathMetricsHint(_ pathMetrics: PathMetrics) +} + +extension URLWriter { + + @inlinable + internal mutating func writeQueryContents( + _ queryWriter: (PieceWriter) -> Void + ) where T: Collection, T.Element == UInt8 { + writeQueryContents(isKnownFormEncoded: false, queryWriter) + } + + @inlinable + internal mutating func finalize() { + // Not required. + } + + @inlinable + internal func getHint(maySkipPercentEncoding component: WebURL.Component) -> Bool { + false + } + + @inlinable + internal mutating func writeHint(_ component: WebURL.Component, maySkipPercentEncoding: Bool) { + // Not required. + } + + @inlinable + internal func getPathMetricsHint() -> PathMetrics? { + nil + } + + @inlinable + internal mutating func writePathMetricsHint(_ pathMetrics: PathMetrics) { + // Not required. + } +} + +/// Stored hints about how to write a particular URL string. +/// +@usableFromInline +internal struct URLWriterHints { + + /// If set, contains information about the number of code-units in the path, number of path components, etc. + /// If not set, users may make no assumptions about the path. + /// + @usableFromInline + internal var pathMetrics: PathMetrics? + + /// Components which are known to not require percent-encoding. + /// If a component is not in this set, users must assume that it requires percent-encoding. + /// + @usableFromInline + internal var componentsWhichMaySkipPercentEncoding: WebURL.ComponentSet + + @inlinable + internal init() { + self.pathMetrics = nil + self.componentsWhichMaySkipPercentEncoding = [] + } +} + + +// -------------------------------------------- +// MARK: - Writers +// -------------------------------------------- + + +/// A `URLWriter` which does not actually write to any storage, only gathering information about what the URL string looks like. +/// +/// This type cannot be instantiated directly. Use the `StructureAndMetricsCollector.collect { ... }` function +/// to obtain an instance, write to it, and collect its results. +/// +@usableFromInline +internal struct StructureAndMetricsCollector: URLWriter { + + // Note: requiredCapacity must always use arithmetic which traps on overflow, + // as 'UnsafePresizedBufferWriter' relies on this fact being verified for memory safety. + @usableFromInline + internal private(set) var requiredCapacity: Int + + @usableFromInline + internal private(set) var structure: URLStructure + + @usableFromInline + internal private(set) var hints: URLWriterHints + + /// Creates a new structure and metrics collector, initially representing an invalid, empty URL string. + /// + /// - important: Do not use the returned instance's data until an URL string has been written to it. + /// + @inlinable + internal init() { + self.requiredCapacity = 0 + self.structure = .invalidEmptyStructure() + self.hints = URLWriterHints() + } + + @inlinable + internal mutating func writeFlags(schemeKind: WebURL.SchemeKind, cannotBeABaseURL: Bool) { + structure.schemeKind = schemeKind + structure.cannotBeABaseURL = cannotBeABaseURL + } + + @inlinable + internal mutating func writeSchemeContents( + _ schemeBytes: T + ) where T: Collection, T.Element == UInt8 { + + assert(structure.schemeLength == 0) + structure.schemeLength = schemeBytes.count + 1 /* ":" */ + requiredCapacity = structure.schemeLength + } + + @inlinable + internal mutating func writeAuthoritySigil() { + assert(structure.sigil == .none) + structure.sigil = .authority + requiredCapacity += Sigil.authority.length + } + + @inlinable + internal mutating func writePathSigil() { + assert(structure.sigil == .none) + structure.sigil = .path + requiredCapacity += Sigil.path.length + } + + @inlinable + internal mutating func writeUsernameContents( + _ usernameWriter: (PieceWriter) -> Void + ) where T: Collection, T.Element == UInt8 { + + assert(structure.usernameLength == 0) + usernameWriter { structure.usernameLength += $0.count } + requiredCapacity += structure.usernameLength + } + + @inlinable + internal mutating func writePasswordContents( + _ passwordWriter: (PieceWriter) -> Void + ) where T: Collection, T.Element == UInt8 { + + assert(structure.passwordLength == 0) + structure.passwordLength = 1 + passwordWriter { structure.passwordLength += $0.count } + requiredCapacity += structure.passwordLength + } + + @inlinable + internal mutating func writeCredentialsTerminator() { + requiredCapacity += 1 + } + + @inlinable + internal mutating func writeHostname( + lengthIfKnown: Int?, _ hostnameWriter: (PieceWriter) -> Void + ) where T: Collection, T.Element == UInt8 { + + assert(structure.hostnameLength == 0) + if let knownLength = lengthIfKnown { + structure.hostnameLength = knownLength + requiredCapacity += structure.hostnameLength + } else { + hostnameWriter { structure.hostnameLength += $0.count } + requiredCapacity += structure.hostnameLength + } + } + + @inlinable + internal mutating func writePort(_ port: UInt16) { + + assert(structure.portLength == 0) + structure.portLength = 1 /* ":" */ + switch port { + case 10000...UInt16.max: structure.portLength += 5 + case 1000..<10000: structure.portLength += 4 + case 100..<1000: structure.portLength += 3 + case 10..<100: structure.portLength += 2 + default /* 0..<10 */: structure.portLength += 1 + } + requiredCapacity += structure.portLength + } + + @inlinable + internal mutating func writeKnownAuthorityString( + _ authority: UnsafeBufferPointer, + usernameLength: Int, passwordLength: Int, hostnameLength: Int, portLength: Int + ) { + + assert(structure.usernameLength == 0 && structure.passwordLength == 0) + assert(structure.hostnameLength == 0 && structure.portLength == 0) + structure.usernameLength = usernameLength + structure.passwordLength = passwordLength + structure.hostnameLength = hostnameLength + structure.portLength = portLength + requiredCapacity += authority.count + } + + @inlinable + internal mutating func writePath( + firstComponentLength: Int, _ writer: (PieceWriter) -> Void + ) where T: Collection, T.Element == UInt8 { + + assert(structure.firstPathComponentLength == 0) + assert(structure.pathLength == 0) + structure.firstPathComponentLength = firstComponentLength + writer { structure.pathLength += $0.count } + requiredCapacity += structure.pathLength + } + + @inlinable + internal mutating func writePresizedPathUnsafely( + length: Int, firstComponentLength: Int, writer: (UnsafeMutableBufferPointer) -> Int + ) { + + assert(structure.firstPathComponentLength == 0) + assert(structure.pathLength == 0) + structure.firstPathComponentLength = firstComponentLength + structure.pathLength = length + requiredCapacity += length + } + + @inlinable + internal mutating func writeQueryContents( + isKnownFormEncoded: Bool, _ queryWriter: (PieceWriter) -> Void + ) where T: Collection, T.Element == UInt8 { + + assert(structure.queryLength == 0) + structure.queryLength = 1 /* "?" */ + queryWriter { structure.queryLength += $0.count } + structure.queryIsKnownFormEncoded = isKnownFormEncoded + requiredCapacity += structure.queryLength + } + + @inlinable + internal mutating func writeFragmentContents( + _ fragmentWriter: (PieceWriter) -> Void + ) where T: Collection, T.Element == UInt8 { + + assert(structure.fragmentLength == 0) + structure.fragmentLength = 1 + fragmentWriter { structure.fragmentLength += $0.count } + requiredCapacity += structure.fragmentLength + } + + @inlinable + internal mutating func finalize() { + precondition(requiredCapacity >= 0) + if structure.queryIsKnownFormEncoded == false { + // Empty and nil queries are considered form-encoded (i.e. they do not need to be re-encoded). + structure.queryIsKnownFormEncoded = (structure.queryLength == 0 || structure.queryLength == 1) + } + structure.checkInvariants() + } + + // Hints. + + @inlinable + internal mutating func writeHint(_ component: WebURL.Component, maySkipPercentEncoding: Bool) { + hints.componentsWhichMaySkipPercentEncoding[component] = maySkipPercentEncoding + } + + @inlinable + internal mutating func writePathMetricsHint(_ pathMetrics: PathMetrics) { + hints.pathMetrics = pathMetrics + } +} + +/// A `URLWriter` which writes a URL string to a pre-sized mutable buffer. +/// +/// The buffer must have precisely the correct capacity to store the URL string, or a runtime error will be triggered. This implies that its address may not be `nil`. +/// The fact that the exact capacity is known (and `URLWriterHints` available) is taken as proof that the number of bytes written will not overflow an `Int`. +/// +@usableFromInline +internal struct UnsafePresizedBufferWriter: URLWriter { + + @usableFromInline + internal let buffer: UnsafeMutableBufferPointer + + @usableFromInline + internal private(set) var bytesWritten: Int + + @usableFromInline + internal let knownHints: URLWriterHints + + @inlinable + internal init(buffer: UnsafeMutableBufferPointer, hints: URLWriterHints) { + self.buffer = buffer + self.bytesWritten = 0 + self.knownHints = hints + precondition(buffer.baseAddress != nil, "Invalid buffer") + } + + @inlinable + internal mutating func _writeByte(_ byte: UInt8) { + assert(bytesWritten < buffer.count) + (buffer.baseAddress.unsafelyUnwrapped + bytesWritten).initialize(to: byte) + bytesWritten &+= 1 + } + + @inlinable + internal mutating func _writeByte(_ byte: UInt8, count: Int) { + assert(bytesWritten < buffer.count || count == 0) + (buffer.baseAddress.unsafelyUnwrapped + bytesWritten).initialize(repeating: byte, count: count) + bytesWritten &+= count + } + + @inlinable + internal mutating func _writeBytes(_ bytes: T) where T: Collection, T.Element == UInt8 { + assert(bytesWritten < buffer.count || bytes.isEmpty) + let count = UnsafeMutableBufferPointer( + start: buffer.baseAddress.unsafelyUnwrapped + bytesWritten, + count: buffer.count &- bytesWritten + ).fastInitialize(from: bytes) + bytesWritten &+= count + } + + // URLWriter. + + @inlinable + internal mutating func writeFlags(schemeKind: WebURL.SchemeKind, cannotBeABaseURL: Bool) { + // This writer does not calculate a URLStructure. + } + + @inlinable + internal mutating func writeSchemeContents( + _ schemeBytes: T + ) where T: Collection, T.Element == UInt8 { + _writeBytes(schemeBytes) + _writeByte(ASCII.colon.codePoint) + } + + @inlinable + internal mutating func writeAuthoritySigil() { + _writeByte(ASCII.forwardSlash.codePoint, count: 2) + } + + @inlinable + internal mutating func writePathSigil() { + _writeByte(ASCII.forwardSlash.codePoint) + _writeByte(ASCII.period.codePoint) + } + + @inlinable + internal mutating func writeUsernameContents( + _ usernameWriter: (PieceWriter) -> Void + ) where T: Collection, T.Element == UInt8 { + usernameWriter { _writeBytes($0) } + } + + @inlinable + internal mutating func writePasswordContents( + _ passwordWriter: (PieceWriter) -> Void + ) where T: Collection, T.Element == UInt8 { + _writeByte(ASCII.colon.codePoint) + passwordWriter { _writeBytes($0) } + } + + @inlinable + internal mutating func writeCredentialsTerminator() { + _writeByte(ASCII.commercialAt.codePoint) + } + + @inlinable + internal mutating func writeHostname( + lengthIfKnown: Int?, _ hostnameWriter: (PieceWriter) -> Void + ) where T: Collection, T.Element == UInt8 { + hostnameWriter { _writeBytes($0) } + } + + @inlinable + internal mutating func writePort(_ port: UInt16) { + _writeByte(ASCII.colon.codePoint) + let rawPointer = UnsafeMutableRawPointer(buffer.baseAddress.unsafelyUnwrapped + bytesWritten) + bytesWritten &+= Int(ASCII.writeDecimalString(for: port, to: rawPointer)) + } + + @inlinable + internal mutating func writeKnownAuthorityString( + _ authority: UnsafeBufferPointer, + usernameLength: Int, passwordLength: Int, hostnameLength: Int, portLength: Int + ) { + _writeBytes(authority) + } + + @inlinable + internal mutating func writePath( + firstComponentLength: Int, _ writer: (PieceWriter) -> Void + ) where T: Collection, T.Element == UInt8 { + writer { _writeBytes($0) } + } + + @inlinable + internal mutating func writePresizedPathUnsafely( + length: Int, firstComponentLength: Int, writer: (UnsafeMutableBufferPointer) -> Int + ) { + let space = UnsafeMutableBufferPointer(start: buffer.baseAddress.unsafelyUnwrapped + bytesWritten, count: length) + let pathBytesWritten = writer(space) + assert(pathBytesWritten == length) + bytesWritten &+= pathBytesWritten + } + + @inlinable + internal mutating func writeQueryContents( + isKnownFormEncoded: Bool, _ queryWriter: (PieceWriter) -> Void + ) where T: Collection, T.Element == UInt8 { + _writeByte(ASCII.questionMark.codePoint) + queryWriter { _writeBytes($0) } + } + + @inlinable + internal mutating func writeFragmentContents( + _ fragmentWriter: (PieceWriter) -> Void + ) where T: Collection, T.Element == UInt8 { + _writeByte(ASCII.numberSign.codePoint) + fragmentWriter { _writeBytes($0) } + } + + @inlinable + internal func finalize() { + precondition(bytesWritten == buffer.count) + } + + // Hints. + + @inlinable + internal func getHint(maySkipPercentEncoding component: WebURL.Component) -> Bool { + knownHints.componentsWhichMaySkipPercentEncoding[component] + } + + @inlinable + internal func getPathMetricsHint() -> PathMetrics? { + knownHints.pathMetrics + } +} + + +// -------------------------------------------- +// MARK: - HostnameWriter +// -------------------------------------------- + + +/// An interface through which a `ParsedHost` writes its contents. +/// +@usableFromInline +internal protocol HostnameWriter { + + /// Writes the bytes given by `hostnameWriter`. + /// The content must already be percent-encoded/IDNA-transformed and not include any separators. + /// + mutating func writeHostname( + lengthIfKnown: Int?, _ hostnameWriter: ((T) -> Void) -> Void + ) where T: Collection, T.Element == UInt8 +} + +/// A `HostnameWriter` which computes the length of the hostname, were it to be written. +/// +@usableFromInline +internal struct HostnameLengthCounter: HostnameWriter { + + @usableFromInline + internal private(set) var length: Int + + @inlinable + internal init() { + self.length = 0 + } + + @inlinable + internal mutating func writeHostname( + lengthIfKnown: Int?, _ writerFunc: ((T) -> Void) -> Void + ) where T: Collection, T.Element == UInt8 { + if let knownLength = lengthIfKnown { + length = knownLength + return + } + writerFunc { piece in + length += piece.count + } + } +} + +/// A `HostnameWriter` which writes a hostname to a given buffer. +/// After writing, the buffer points to the space after written hostname. +/// +@usableFromInline +internal struct UnsafeBufferHostnameWriter: HostnameWriter { + + @usableFromInline + internal private(set) var buffer: UnsafeMutableBufferPointer + + @inlinable + internal init(buffer: UnsafeMutableBufferPointer) { + self.buffer = buffer + } + + @inlinable + internal mutating func writeHostname( + lengthIfKnown: Int?, _ writerFunc: ((T) -> Void) -> Void + ) where T: Collection, T.Element == UInt8 { + writerFunc { piece in + let n = buffer.fastInitialize(from: piece) + buffer = UnsafeMutableBufferPointer(rebasing: buffer.suffix(from: n)) + } + } +} diff --git a/Sources/WebURL/Parser/ValidationError.swift b/Sources/WebURL/Parser/ValidationError.swift new file mode 100644 index 000000000..f0ea39ee3 --- /dev/null +++ b/Sources/WebURL/Parser/ValidationError.swift @@ -0,0 +1,297 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +// -------------------------------------------- +// MARK: - Parser Callbacks +// -------------------------------------------- +// Almost no users care about the specific errors that occur during parsing +// - but there are use-cases, and it's helpful for testing. +// It's important that regular, release builds of the URL parser +// optimize out and around error reporting, so the callback needs to +// be a protocol in order to take advantage of generic specialization. +// -------------------------------------------- + + +/// An object which is informed by the URL parser if a validation error occurs. +/// +/// Most validation errors are non-fatal and parsing can continue regardless. If parsing fails, the last +/// validation error typically describes the issue which caused it to fail. +/// +@usableFromInline +internal protocol URLParserCallback { + mutating func validationError(_ error: ValidationError) +} + +/// A `URLParserCallback` which ignores all validation errors. +/// +@usableFromInline +internal struct IgnoreValidationErrors: URLParserCallback { + + @inlinable @inline(__always) + internal init() {} + + @inlinable @inline(__always) + internal mutating func validationError(_ error: ValidationError) {} +} + + +// -------------------------------------------- +// MARK: - Validation Errors +// -------------------------------------------- + + +/// A notification about a syntax oddity encountered by the URL parser. +/// +/// Even valid URLs which can be successfully parsed may emit validation errors - for instance, the mere _presence_ of credentials (username or password) +/// is considered grounds to report a `ValidationError`, even though such URLs can be parsed. +/// +@usableFromInline +internal struct ValidationError: Equatable { + + @usableFromInline + internal var _code: UInt8 + + @inlinable + internal init(_code: UInt8) { + self._code = _code + } +} + +// swift-format-ignore +extension ValidationError { + + // Named errors and their descriptions/examples from: https://github.com/whatwg/url/pull/502 + @inlinable internal static var unexpectedC0ControlOrSpace: Self { Self(_code: 0) } + @inlinable internal static var unexpectedASCIITabOrNewline: Self { Self(_code: 1) } + @inlinable internal static var invalidSchemeStart: Self { Self(_code: 2) } + @inlinable internal static var fileSchemeMissingFollowingSolidus: Self { Self(_code: 3) } + @inlinable internal static var invalidScheme: Self { Self(_code: 4) } + @inlinable internal static var missingSchemeNonRelativeURL: Self { Self(_code: 5) } + @inlinable internal static var relativeURLMissingBeginningSolidus: Self { Self(_code: 6) } + @inlinable internal static var unexpectedReverseSolidus: Self { Self(_code: 7) } + @inlinable internal static var missingSolidusBeforeAuthority: Self { Self(_code: 8) } + @inlinable internal static var unexpectedCommercialAt: Self { Self(_code: 9) } + @inlinable internal static var unexpectedCredentialsWithoutHost: Self { Self(_code: 10) } + @inlinable internal static var unexpectedPortWithoutHost: Self { Self(_code: 11) } + @inlinable internal static var emptyHostSpecialScheme: Self { Self(_code: 12) } + @inlinable internal static var hostInvalid: Self { Self(_code: 13) } + @inlinable internal static var portOutOfRange: Self { Self(_code: 14) } + @inlinable internal static var portInvalid: Self { Self(_code: 15) } + @inlinable internal static var unexpectedWindowsDriveLetter: Self { Self(_code: 16) } + @inlinable internal static var unexpectedWindowsDriveLetterHost: Self { Self(_code: 17) } + @inlinable internal static var invalidURLCodePoint: Self { Self(_code: 18) } + @inlinable internal static var unescapedPercentSign: Self { Self(_code: 19) } + @inlinable internal static var unclosedIPv6Address: Self { Self(_code: 20) } + @inlinable internal static var domainToASCIIFailure: Self { Self(_code: 21) } + @inlinable internal static var domainToASCIIEmptyDomainFailure: Self { Self(_code: 22) } + @inlinable internal static var hostForbiddenCodePoint: Self { Self(_code: 23) } + @inlinable internal static var invalidIPv6Address: Self { Self(_code: 24) } + @inlinable internal static var invalidIPv4Address: Self { Self(_code: 25) } + // This one is not in the standard. + @inlinable internal static var _invalidUTF8: Self { Self(_code: 99) } +} + +// swift-format-ignore +#if DEBUG +extension ValidationError: CustomStringConvertible { + + @usableFromInline + internal var description: String { + switch self { + case .unexpectedC0ControlOrSpace: + return #""" + The input to the URL parser contains a leading or trailing C0 control or space. + The URL parser subsequently strips any matching code points. + + Example: " https://example.org " + """# + case .unexpectedASCIITabOrNewline: + return #""" + The input to the URL parser contains ASCII tab or newlines. + The URL parser subsequently strips any matching code points. + + Example: "ht + tps://example.org" + """# + case .invalidSchemeStart: + return #""" + The first code point of a URL’s scheme is not an ASCII alpha. + + Example: "3ttps://example.org" + """# + case .fileSchemeMissingFollowingSolidus: + return #""" + The URL parser encounters a URL with a "file" scheme that is not followed by "//". + + Example: "file:c:/my-secret-folder" + """# + case .invalidScheme: + return #""" + The URL’s scheme contains an invalid code point. + + Example: "^_^://example.org" and "https//example.org" + """# + case .missingSchemeNonRelativeURL: + return #""" + The input is missing a scheme, because it does not begin with an ASCII alpha, + and either no base URL was provided or the base URL cannot be used as a base URL + because its cannot-be-a-base-URL flag is set. + + Example (Input’s scheme is missing and no base URL is given): + (url, base) = ("💩", nil) + + Example (Input’s scheme is missing, but the base URL’s cannot-be-a-base-URL flag is set): + (url, base) = ("💩", "mailto:user@example.org") + """# + case .relativeURLMissingBeginningSolidus: + return #""" + The input is a relative-URL String that does not begin with U+002F (/). + + Example: (url, base) = ("foo.html", "https://example.org/") + """# + case .unexpectedReverseSolidus: + return #""" + The URL has a special scheme and it uses U+005C (\) instead of U+002F (/). + + Example: "https://example.org\path\to\file" + """# + case .missingSolidusBeforeAuthority: + return #""" + The URL includes credentials that are not preceded by "//". + + Example: "https:user@example.org" + """# + case .unexpectedCommercialAt: + return #""" + The URL includes credentials, however this is considered invalid. + + Example: "https://user@example.org" + """# + case .unexpectedCredentialsWithoutHost: + return #""" + A U+0040 (@) is found between the URL’s scheme and host, but the URL does not include credentials. + + Example: "https://@example.org" + """# + case .unexpectedPortWithoutHost: + return #""" + The URL contains a port, but no host. + + Example: "https://:443" + """# + case .emptyHostSpecialScheme: + return #""" + The URL has a special scheme, but does not contain a host. + + Example: "https://#fragment" + """# + // TODO: This description could be improved. + case .hostInvalid: + return #""" + The host portion of the URL is an empty string when it includes credentials or a port and the basic URL parser’s state is overridden. + + Example: + var url = WebURL("https://example:9000")! + url.hostname = "" + """# + case .portOutOfRange: + return #""" + The input’s port is too big. + + Example: "https://example.org:70000" + """# + case .portInvalid: + return #""" + The input’s port is invalid. + + Example: "https://example.org:7z" + """# + case .unexpectedWindowsDriveLetter: + return #""" + The input is a relative-URL string that starts with a Windows drive letter and the base URL’s scheme is "file". + + Example: (url, base) = ("/c:/path/to/file", "file:///c:/") + """# + case .unexpectedWindowsDriveLetterHost: + return #""" + The file URL’s host is a Windows drive letter. + + Example: "file://c:" + """# + case .invalidURLCodePoint: + return #""" + A code point is found that is not a URL code point or U+0025 (%), in the URL’s path, query, or fragment. + + Example: "https://example.org/>" + """# + case .unescapedPercentSign: + return #""" + A U+0025 (%) is found that is not followed by two ASCII hex digits, in the URL’s path, query, or fragment. + + Example: "https://example.org/%s" + """# + case .unclosedIPv6Address: + return #""" + An IPv6 address is missing the closing U+005D (]). + + Example: "https://[::1" + """# + case .domainToASCIIFailure: + return #""" + The URL's domain contains non-ASCII characters, and IDNA processing failed. + + Note: For the time being, WebURL does not support non-ASCII domains. + """# + case .domainToASCIIEmptyDomainFailure: + return #""" + The URL's domain contains non-ASCII characters, and IDNA processing returned an empty string. + + This can be caused by many things, such as the domain consisting only of ignorable code points, + or if the domain is the string "xn--". + """# + case .hostForbiddenCodePoint: + return #""" + The input’s host contains a forbidden host code point. Note that hosts are percent-decoded before + being processed when the URL's scheme is special, which would result in the following URL having a hostname + of "exa#mple.org" (which contains the forbidden host code point "#"). + + Example: "https://exa%23mple.org" + """# + case .invalidIPv6Address: + return #""" + The URL's domain an invalid IPv6 address. + + Example: "https://[:::]/" + Example: "https://[::hello]/" + """# + case .invalidIPv4Address: + return #""" + The URL's domain an invalid IPv4 address. + + Example: "https://999999999999999/" + Example: "https://300.300.300.300/" + """# + // non-spec. + case ._invalidUTF8: + return #""" + The given input is not a valid sequence of UTF-8 code-units. + """# + // fallback. + default: + return "Internal error: \(_code)" + } + } +} +#endif diff --git a/Sources/WebURL/Parser/WebURL+Component.swift b/Sources/WebURL/Parser/WebURL+Component.swift new file mode 100644 index 000000000..5e4f2cac7 --- /dev/null +++ b/Sources/WebURL/Parser/WebURL+Component.swift @@ -0,0 +1,88 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +extension WebURL { + + // swift-format-ignore + /// A value representing a component in a URL. + /// + /// Each component has a unique bit set on its `rawValue`, making it suitable for use in bit-sets. + /// + /// - seealso: `WebURL.ComponentSet`. + /// + @usableFromInline + internal struct Component: Equatable { + + @usableFromInline + internal let rawValue: UInt8 + + @inlinable + internal init(_unchecked rawValue: UInt8) { + self.rawValue = rawValue + } + + @inlinable internal static var scheme: Self { Self(_unchecked: 1 << 0) } + @inlinable internal static var username: Self { Self(_unchecked: 1 << 1) } + @inlinable internal static var password: Self { Self(_unchecked: 1 << 2) } + @inlinable internal static var hostname: Self { Self(_unchecked: 1 << 3) } + @inlinable internal static var port: Self { Self(_unchecked: 1 << 4) } + @inlinable internal static var path: Self { Self(_unchecked: 1 << 5) } + @inlinable internal static var query: Self { Self(_unchecked: 1 << 6) } + @inlinable internal static var fragment: Self { Self(_unchecked: 1 << 7) } + } +} + +extension WebURL { + + /// An efficient set of `WebURL.Component` values. + /// + @usableFromInline + internal struct ComponentSet: Equatable, ExpressibleByArrayLiteral { + + @usableFromInline + internal var _rawValue: UInt8 + + @inlinable + internal init(arrayLiteral elements: Component...) { + self._rawValue = elements.reduce(into: 0) { $0 |= $1.rawValue } + } + + @inlinable + internal subscript(component: Component) -> Bool { + get { contains(component) } + set { newValue ? insert(component) : remove(component) } + } + + /// Inserts a component in to the set. + /// + @inlinable + internal mutating func insert(_ newMember: Component) { + _rawValue |= newMember.rawValue + } + + /// Removes a component from the set. + /// + @inlinable + internal mutating func remove(_ newMember: Component) { + _rawValue &= ~newMember.rawValue + } + + /// Whether or not the given component is a member of this set. + /// + @inlinable + internal func contains(_ member: Component) -> Bool { + return (_rawValue & member.rawValue) != 0 + } + } +} diff --git a/Sources/WebURL/PercentEncoding.swift b/Sources/WebURL/PercentEncoding.swift new file mode 100644 index 000000000..d525ca68e --- /dev/null +++ b/Sources/WebURL/PercentEncoding.swift @@ -0,0 +1,1339 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// A set of characters which should be transformed or substituted in order to percent-encode (or percent-escape) an ASCII string. +/// +/// Conforming types should be exposed as computed properties of `PercentEncodeSet`, and encode-sets which substitute characters +/// should additionally be added to `PercentDecodeSet`. These properties are never called, and only used to create a concise, +/// `KeyPath`-based generic interface. +/// +/// ```swift +/// struct MyEncodeSet: PercentEncodeSetProtocol { +/// // ... +/// } +/// +/// extension PercentEncodeSet { +/// var myEncodeSet: MyEncodeSet.Type { fatalError("Do not call") } +/// } +/// +/// "a string".percentEncoded(as: \.myEncodeSet) +/// "a%20string".percentDecoded(from: \.percentEncodedOnly) +/// ``` +/// +public protocol PercentEncodeSetProtocol { + + /// Whether or not the given ASCII `codePoint` should be percent-encoded. + /// + static func shouldPercentEncode(ascii codePoint: UInt8) -> Bool + + /// An optional function which allows the encode-set to replace a non-percent-encoded source codepoint with another codepoint. + /// + /// For example, the `application/x-www-form-urlencoded` encoding does not percent-encode ASCII spaces (`0x20`) as "%20", + /// instead replacing them with a "+" (`0x2B`). An implementation of this encoding would look like this: + /// + /// ```swift + /// struct FormEncodeSet: PercentEncodeSetProtocol { + /// + /// static func shouldPercentEncode(ascii codePoint: UInt8) -> Bool { + /// if codePoint == 0x20 { return false } // do not percent-encode spaces, substitute instead. + /// if codePoint == 0x2B { return true } // percent-encode "+"s in the source. + /// // other codepoints... + /// } + /// + /// static func substitute(ascii codePoint: UInt8) -> UInt8? { + /// if codePoint == 0x20 { return 0x2B } // Substitute spaces with "+". + /// return nil + /// } + /// + /// static func unsubstitute(ascii codePoint: UInt8) -> UInt8? { + /// if codePoint == 0x2B { return 0x20 } // Unsubstitute "+" to space. + /// return nil + /// } + /// } + /// ``` + /// + /// The ASCII percent sign (`0x25`) and upper- and lowercase alpha characters (`0x41...0x5A` and `0x61...0x7A`) must not be substituted. + /// Conforming types must also implement the reverse substitution function, `unsubstitute(ascii:)`, and should ensure that any codepoints emitted + /// as substitutes are percent-encoded by `shouldPercentEncode`. + /// + /// - parameters: + /// - codePoint: The ASCII codepoint from the source. Always in the range `0...127`. + /// - returns: The codepoint to emit instead of `codePoint`, or `nil` if the codepoint should not be substituted. + /// If not `nil`, must always be in the range `0...127` + /// + static func substitute(ascii codePoint: UInt8) -> UInt8? + + /// An optional function which recovers a non-percent-decoded codepoint from its substituted value. + /// + /// For example, the `application/x-www-form-urlencoded` encoding does not percent-encode ASCII spaces (`0x20`) as "%20", + /// instead replacing them with a "+" (`0x2B`). An implementation of this encoding would look like this: + /// + /// ```swift + /// struct FormEncodeSet: PercentEncodeSetProtocol { + /// + /// static func shouldPercentEncode(ascii codePoint: UInt8) -> Bool { + /// if codePoint == 0x20 { return false } // do not percent-encode spaces, substitute instead. + /// if codePoint == 0x2B { return true } // percent-encode "+"s in the source. + /// // other codepoints... + /// } + /// + /// static func substitute(ascii codePoint: UInt8) -> UInt8? { + /// if codePoint == 0x20 { return 0x2B } // Substitute spaces with "+". + /// return nil + /// } + /// + /// static func unsubstitute(ascii codePoint: UInt8) -> UInt8? { + /// if codePoint == 0x2B { return 0x20 } // Unsubstitute "+" to space. + /// return nil + /// } + /// } + /// ``` + /// + /// The ASCII percent sign (`0x25`) and upper- and lowercase alpha characters (`0x41...0x5A` and `0x61...0x7A`) must not be substituted. + /// Conforming types must also implement the substitution function, `substitute(ascii:)`, and should ensure that any codepoints emitted + /// as substitutes are percent-encoded by `shouldPercentEncode`. + /// + /// Codepoints emitted by this function are not recognised as being part of a percent-encoded byte sequence, and values decoded from percent-encoded + /// byte sequences are assumed not to have been substituted. + /// + /// - parameters: + /// - codePoint: The possibly-substituted ASCII codepoint from an encoded string. Always in the range `0...127`. + /// - returns: The codepoint to emit instead of `codePoint`, or `nil` if the codepoint was not substituted by this encode-set. + /// If not `nil`, must always be in the range `0...127` + /// + static func unsubstitute(ascii codePoint: UInt8) -> UInt8? +} + +extension PercentEncodeSetProtocol { + + @inlinable @inline(__always) + public static func substitute(ascii codePoint: UInt8) -> UInt8? { + nil + } + + @inlinable @inline(__always) + public static func unsubstitute(ascii codePoint: UInt8) -> UInt8? { + nil + } +} + + +// -------------------------------------------- +// MARK: - Encoding +// -------------------------------------------- + + +extension LazyCollectionProtocol where Element == UInt8 { + + /// Interprets this collection's elements as UTF8 code-units, and returns a collection of ASCII codepoints formed by lazily encoding + /// the source contents with the given `EncodeSet`. + /// + @inlinable @inline(__always) + public func percentEncoded( + as: KeyPath + ) -> LazilyPercentEncodedUTF8 { + LazilyPercentEncodedGroups(source: elements, encodeSet: EncodeSet.self).joined() + } + + /// Interprets this collection's elements as UTF8 code-units, and returns a collection of groups of ASCII codepoints, where each group is formed by lazily encoding + /// the source contents with `EncodeSet`. + /// + @inlinable @inline(__always) + internal func percentEncodedGroups( + as: KeyPath + ) -> LazilyPercentEncodedGroups { + LazilyPercentEncodedGroups(source: elements, encodeSet: EncodeSet.self) + } +} + +/// A `Collection` which lazily percent-encodes its `Source` UTF8 code-units using a given `EncodeSet`. +/// This collection only _adds_ percent-encoding or substitutions; it does not decode any pre-existing percent-encoded or substituted code-points in `Source`. +/// +/// Percent encoding transforms arbitrary Unicode strings to a limited set of ASCII code-points which are permitted by the `EncodeSet`. +/// If the `EncodeSet` performs substitutions, users should take care to decode the contents using the same `EncodeSet`. +/// +public typealias LazilyPercentEncodedUTF8 = + FlattenSequence> +where Source: Collection, Source.Element == UInt8, EncodeSet: PercentEncodeSetProtocol + +/// A `Collection` which lazily percent-encodes its `Source` UTF8 code-units using a given `EncodeSet`. +/// This collection only _adds_ percent-encoding or substitutions; it does not decode any pre-existing percent-encoded or substituted code-points in `Source`. +/// +/// The elements of this collection are `_PercentEncodedByte`s, which are small clusters of either 1 or 3 ASCII code-points depending on how the source +/// code-unit must be encoded. The overall, encoded ASCII string is obtained by flattening this 2-dimensional collection. +/// +/// Percent encoding transforms arbitrary Unicode strings to a limited set of ASCII code-points which are permitted by the `EncodeSet`. +/// If the `EncodeSet` performs substitutions, users should take care to decode the contents using the same `EncodeSet`. +/// +public struct LazilyPercentEncodedGroups: Collection, LazyCollectionProtocol +where Source: Collection, Source.Element == UInt8, EncodeSet: PercentEncodeSetProtocol { + + @usableFromInline + internal let source: Source + + @inlinable + internal init(source: Source, encodeSet: EncodeSet.Type) { + self.source = source + } + + public typealias Index = Source.Index + + @inlinable + public var startIndex: Index { + source.startIndex + } + + @inlinable + public var endIndex: Index { + source.endIndex + } + + @inlinable + public subscript(position: Index) -> _PercentEncodedByte { + let sourceByte = source[position] + if let asciiChar = ASCII(sourceByte), EncodeSet.shouldPercentEncode(ascii: asciiChar.codePoint) == false { + if let substitute = EncodeSet.substitute(ascii: asciiChar.codePoint) { + return _PercentEncodedByte(.substituted, substitute) + } else { + return _PercentEncodedByte(.unencoded, sourceByte) + } + } + return _PercentEncodedByte(.percentEncoded, sourceByte) + } + + @inlinable + public func index(after i: Index) -> Index { + source.index(after: i) + } + + @inlinable + public func formIndex(after i: inout Index) { + source.formIndex(after: &i) + } + + @inlinable + public func index(_ i: Index, offsetBy distance: Int, limitedBy limit: Index) -> Index? { + source.index(i, offsetBy: distance, limitedBy: limit) + } + + @inlinable + public func formIndex(_ i: inout Index, offsetBy distance: Int, limitedBy limit: Index) -> Bool { + source.formIndex(&i, offsetBy: distance, limitedBy: limit) + } + + @inlinable + public var isEmpty: Bool { + source.isEmpty + } + + @inlinable + public var underestimatedCount: Int { + source.underestimatedCount + } + + @inlinable + public var count: Int { + source.count + } + + @inlinable + public func distance(from start: Index, to end: Index) -> Int { + source.distance(from: start, to: end) + } +} + +extension LazilyPercentEncodedGroups: BidirectionalCollection where Source: BidirectionalCollection { + + @inlinable + public func index(before i: Index) -> Index { + source.index(before: i) + } + + @inlinable + public func formIndex(before i: inout Index) { + source.formIndex(before: &i) + } +} + +extension LazilyPercentEncodedGroups: RandomAccessCollection where Source: RandomAccessCollection {} + +/// A UTF8 code-unit which has been encoded by a `PercentEncodeSet` to a collection of ASCII codepoints. +/// +public struct _PercentEncodedByte: RandomAccessCollection { + + @usableFromInline + internal enum Encoding { + case unencoded + case substituted + case percentEncoded + } + + /// The method used to encode this UTF8 code-unit from its source. + /// If the method is `percentEncoded`, the collection will contain 3 ASCII codepoints. Otherwise, it will contain 1 ASCII codepoint. + /// + @usableFromInline + internal let encoding: Encoding + + @usableFromInline + internal let byte: UInt8 + + @inlinable + internal init(_ encoding: Encoding, _ byte: UInt8) { + self.encoding = encoding + self.byte = byte + } + + @inlinable + public var startIndex: Int { + 0 + } + + @inlinable + public var endIndex: Int { + switch encoding { + case .unencoded, .substituted: + return 1 + case .percentEncoded: + return 3 + } + } + + @inlinable + public subscript(position: Int) -> UInt8 { + switch encoding { + case .unencoded: + assert(position == 0, "Invalid index") + return byte + case .substituted: + assert(position == 0, "Invalid index") + return byte + case .percentEncoded: + assert((0..<3).contains(position), "Invalid index") + switch position { + case 0: return ASCII.percentSign.codePoint + case 1: return ASCII.uppercaseHexDigit(of: byte &>> 4).codePoint + default: return ASCII.uppercaseHexDigit(of: byte).codePoint + } + } + } + + @inlinable + public func index(after i: Int) -> Int { + i &+ 1 + } + + @inlinable + public func formIndex(after i: inout Int) { + i &+= 1 + } + + @inlinable + public func index(before i: Int) -> Int { + i &- 1 + } + + @inlinable + public func formIndex(before i: inout Int) { + i &-= 1 + } + + @inlinable + public var isEmpty: Bool { + false + } + + @inlinable + public var underestimatedCount: Int { + endIndex + } + + @inlinable + public var count: Int { + endIndex + } + + @inlinable + public func distance(from start: Int, to end: Int) -> Int { + end &- start + } +} + +extension LazilyPercentEncodedGroups { + + /// Calls `writer` for every `_PercentEncodedByte` in this collection, in `for`-loop order, + /// and returns whether any of the visited code-units were encoded by the `EncodeSet`. + /// + @inlinable @inline(__always) + internal func write(to writer: (_PercentEncodedByte) -> Void) -> Bool { + var didEncode = false + // This leads to significantly better code generation than a 'for' loop, especially after inlining. + var i = startIndex + while i < endIndex { + let byteGroup = self[i] + writer(byteGroup) + switch byteGroup.encoding { + case .percentEncoded, .substituted: + didEncode = true + case .unencoded: + break + } + formIndex(after: &i) + } + return didEncode + } + + /// Returns the total length of the encoded UTF-8 bytes, + /// and whether or not any code-units were altered by the `EncodeSet`. + /// + @inlinable @inline(__always) + internal var encodedLength: (count: Int, needsEncoding: Bool) { + var count = 0 + let needsEncoding = write { count += $0.count } + return (count, needsEncoding) + } +} + +// Eager encoding to String. + +extension Collection where Element == UInt8 { + + /// Interpets this collection's elements as UTF-8 code-units, and returns a `String` formed by encoding them using the given `EncodeSet`. + /// + /// - seealso: `StringProtocol.percentEncoded(as:)` + /// + @inlinable @inline(__always) + public func percentEncodedString( + as encodeSet: KeyPath + ) -> String { + withContiguousStorageIfAvailable { + String(decoding: $0.withoutTrappingOnIndexOverflow.lazy.percentEncoded(as: encodeSet), as: UTF8.self) + } ?? String(decoding: self.lazy.percentEncoded(as: encodeSet), as: UTF8.self) + } + + /// Interpets this collection's elements as UTF-8 code-units, and returns a `String` formed by encoding them using the `\.component` encoding-set. + /// + /// - seealso: `StringProtocol.urlComponentEncoded` + /// + @inlinable + public var urlComponentEncodedString: String { + percentEncodedString(as: \.component) + } + + /// Interpets this collection's elements as UTF-8 code-units, and returns a `String` formed by encoding them using the + /// `application/x-www-form-urlencoded` (`\.form`) encoding-set. + /// + /// - seealso: `StringProtocol.urlFormEncoded` + /// + @inlinable + public var urlFormEncodedString: String { + percentEncodedString(as: \.form) + } +} + +extension StringProtocol { + + /// Returns a copy of this string, encoded using the given `EncodeSet`. + /// + /// This function only _adds_ percent-encoding or substitutions as required by `EncodeSet`; it does not decode any percent-encoded or substituted characters + /// already contained in the string. + /// + /// Percent-encoding transforms strings containing arbitrary Unicode characters to ones containing a limited set of ASCII code-points permitted by + /// the `EncodeSet`. If the `EncodeSet` performs substitutions, users should take care to decode the contents using the same `EncodeSet`. + /// + /// ```swift + /// "hello, world!".percentEncoded(as: \.userInfo) // hello,%20world! + /// "/usr/bin/swift".percentEncoded(as: \.component) // %2Fusr%2Fbin%2Fswift + /// "got en%63oders?".percentEncoded(as: \.userInfo) // got%20en%63oders%3F + /// "king of the 🦆s".percentEncoded(as: \.form) // king+of+the+%F0%9F%A6%86s + /// ``` + /// + @inlinable @inline(__always) + public func percentEncoded( + as encodeSet: KeyPath + ) -> String { + utf8.percentEncodedString(as: encodeSet) + } + + /// Returns a copy of this string, encoded using the `\.component` encoding-set. + /// + /// The `\.component` encoding-set is suitable for encoding strings so they may be embedded in a URL's `path`, `query`, `fragment`, + /// or in the names of opaque `host`s. It does not perform substitutions. + /// + /// The URL standard confirms that encoding a string using the `\.component` set gives identical results + /// to JavaScript's `encodeURIComponent()` function. + /// + /// ```swift + /// "hello, world!".urlComponentEncoded // hello%2C%20world! + /// "/usr/bin/swift".urlComponentEncoded // %2Fusr%2Fbin%2Fswift + /// "😎".urlComponentEncoded // %F0%9F%98%8E + /// ``` + /// + @inlinable + public var urlComponentEncoded: String { + utf8.urlComponentEncodedString + } + + /// Returns a copy of this string, encoded using the `application/x-www-form-urlencoded` (`\.form`) encoding-set. + /// + /// To create an `application/x-www-form-urlencoded` key-value pair string from a collection of keys and values, encode each key and value, and join + /// the results using the format: `encoded-key-1=encoded-value-1&encoded-key-2=encoded-value-2...`. For example: + /// + /// ```swift + /// let myKVPs: KeyValuePairs = ["favourite pet": "🦆, of course", "favourite foods": "🍎 & 🍦" ] + /// let form = myKVPs.map { key, value in "\(key.urlFormEncoded)=\(value.urlFormEncoded)" } + /// .joined(separator: "&") + /// print(form) // favourite+pet=%F0%9F%A6%86%2C+of+course&favourite+foods=%F0%9F%8D%8E+%26+%F0%9F%8D%A6 + /// ``` + /// + /// This encoding-set performs substitutions. Users should take care to also decode the resulting strings using the `application/x-www-form-urlencoded` + /// decoding-set. + /// + @inlinable + public var urlFormEncoded: String { + utf8.urlFormEncodedString + } +} + + +// -------------------------------------------- +// MARK: - Decoding +// -------------------------------------------- + + +extension LazyCollectionProtocol where Element == UInt8 { + + /// Interprets this collection's elements as UTF-8 code-units, and returns a collection of bytes whose elements are computed lazily + /// by decoding all percent-encoded code-unit sequences and using `EncodeSet` to restore substituted code-units. + /// + /// If no code-points were substituted when this collection's contents were encoded, `\.percentEncodedOnly` may be used to only remove percent-encoding. + /// + /// - important: Users should beware that percent-encoding has frequently been used by attackers to smuggle malicious inputs + /// (e.g. extra path components which lead to sensitive data when used as a relative path, ASCII NULL bytes, or SQL injection), + /// sometimes under multiple layers of encoding. Users to be careful not to over-decode their strings, and every time a string + /// is percent-decoded, the result must be considered to be **entirely unvalidated**, even if the source contents were previously validated. + /// + @inlinable @inline(__always) + public func percentDecodedUTF8( + from: KeyPath + ) -> LazilyPercentDecodedUTF8 { + LazilyPercentDecodedUTF8(source: elements) + } + + /// Interprets this collection's elements as UTF-8 code-units, and returns a collection of bytes whose elements are computed lazily + /// by decoding all percent-encoded code-unit sequences. + /// + /// This is equivalent to calling `percentDecodedUTF8(from: \.percentEncodedOnly)`. If this collection's contents were encoded + /// with substitutions (e.g. using form-encoding), use `percentDecodedUTF8(from:)` instead, providing a `PercentDecodeSet` which is able + /// to reverse those substitutions. + /// + /// - important: Users should beware that percent-encoding has frequently been used by attackers to smuggle malicious inputs + /// (e.g. extra path components which lead to sensitive data when used as a relative path, ASCII NULL bytes, or SQL injection), + /// sometimes under multiple layers of encoding. Users to be careful not to over-decode their strings, and every time a string + /// is percent-decoded, the result must be considered to be **entirely unvalidated**, even if the source contents were previously validated. + /// + @inlinable + public var percentDecodedUTF8: LazilyPercentDecodedUTF8WithoutSubstitutions { + percentDecodedUTF8(from: \.percentEncodedOnly) + } +} + +/// A `Collection` which lazily replaces all percent-encoded UTF8 code-units from its `Source` with their decoded code-units. +/// It does not reverse any substitutions that may be a part of how `Source` is encoded. +/// +/// Percent decoding transforms certain sequences of ASCII code-points to arbitrary byte values ("%AB" to the byte 0xAB). +/// +/// - important: Users should beware that percent-encoding has frequently been used by attackers to smuggle malicious inputs +/// (e.g. extra path components which lead to sensitive data when used as a relative path, ASCII NULL bytes, or SQL injection), +/// sometimes under multiple layers of encoding. Users to be careful not to over-decode their strings, and every time a string +/// is percent-decoded, the result must be considered to be **entirely unvalidated**, even if the source contents were previously validated. +/// +public typealias LazilyPercentDecodedUTF8WithoutSubstitutions = + LazilyPercentDecodedUTF8 where Source: Collection, Source.Element == UInt8 + +/// A `Collection` which lazily replaces all percent-encoded UTF8 code-units from its `Source` with their decoded code-units, +/// and reverses substitutions of other code-units performed by `EncodeSet`. +/// +/// If the encode-set does not perform substitutions, `PercentEncodeSet._Passthrough` can be used to only remove percent-encoding. +/// +/// - important: Users should beware that percent-encoding has frequently been used by attackers to smuggle malicious inputs +/// (e.g. extra path components which lead to sensitive data when used as a relative path, ASCII NULL bytes, or SQL injection), +/// sometimes under multiple layers of encoding. Users to be careful not to over-decode their strings, and every time a string +/// is percent-decoded, the result must be considered to be **entirely unvalidated**, even if the source contents were previously validated. +/// +public struct LazilyPercentDecodedUTF8: Collection, LazyCollectionProtocol +where Source: Collection, Source.Element == UInt8, EncodeSet: PercentEncodeSetProtocol { + + @usableFromInline + internal let source: Source + + public let startIndex: Index + + @inlinable + internal init(source: Source) { + self.source = source + self.startIndex = Index(at: source.startIndex, in: source) + } + + public typealias Element = UInt8 + + @inlinable + public var endIndex: Index { + Index(endIndexOf: source) + } + + @inlinable + public func index(after i: Index) -> Index { + assert(i != endIndex, "Attempt to advance endIndex") + // Does not trap in release mode - just keeps returning 'endIndex'. + return Index(at: i.range.upperBound, in: source) + } + + @inlinable + public func formIndex(after i: inout Index) { + assert(i != endIndex, "Attempt to advance endIndex") + // Does not trap in release mode - just keeps returning 'endIndex'. + i = Index(at: i.range.upperBound, in: source) + } + + @inlinable + public subscript(position: Index) -> Element { + assert(position != endIndex, "Attempt to read element at endIndex") + return position.decodedValue + } + + public struct Index: Comparable { + + /// Always either 0, 1, or 3 bytes from the source: + /// - 0 bytes: `endIndex` only. + /// - 1 byte: non-encoded or substituted byte. + /// - 3 bytes: percent-encoded byte. + /// + @usableFromInline + internal let range: Range + + @usableFromInline + internal let isDecoded: Bool + + @usableFromInline + internal let decodedValue: UInt8 + + /// Creates an index referencing the given source collection's `endIndex`. + /// This index's `decodedValue` is always 0. It is meaningless and should not be read. + /// + @inlinable + internal init(endIndexOf source: Source) { + self.range = Range(uncheckedBounds: (source.endIndex, source.endIndex)) + self.isDecoded = false + self.decodedValue = 0 + } + + /// Decodes the UTF8 code-unit starting at the given index in the given `source` collection. + /// This index's successor may be obtained by creating another index starting at the index's `range.upperBound`. + /// + /// The index which starts at `source.endIndex` is also given by `Index(endIndexOf:)`. + /// + @inlinable + internal init(at i: Source.Index, in source: Source) { + guard i != source.endIndex else { + self = .init(endIndexOf: source) + return + } + let byte0 = source[i] + let byte1Index = source.index(after: i) + guard byte0 == ASCII.percentSign.codePoint else { + self.range = Range(uncheckedBounds: (i, byte1Index)) + self.isDecoded = false + self.decodedValue = ASCII(byte0).flatMap { EncodeSet.unsubstitute(ascii: $0.codePoint) } ?? byte0 + return + } + var tail = source.suffix(from: byte1Index) + guard + let decodedByte1 = ASCII(flatMap: tail.popFirst())?.hexNumberValue, + let decodedByte2 = ASCII(flatMap: tail.popFirst())?.hexNumberValue + else { + self.range = Range(uncheckedBounds: (i, byte1Index)) + self.isDecoded = false + self.decodedValue = ASCII.percentSign.codePoint // Percent-sign should never be substituted. + return + } + self.decodedValue = (decodedByte1 &* 16) &+ (decodedByte2) + self.isDecoded = true + self.range = Range(uncheckedBounds: (i, tail.startIndex)) + } + + @inlinable + public static func == (lhs: Self, rhs: Self) -> Bool { + return lhs.range.lowerBound == rhs.range.lowerBound + } + + @inlinable + public static func < (lhs: Self, rhs: Self) -> Bool { + return lhs.range.lowerBound < rhs.range.lowerBound + } + } +} + +// Eager decoding to String. + +extension Collection where Element == UInt8 { + + /// Interprets this collection's elements as UTF-8 code-units, and returns a string formed by decoding all percent-encoded code-unit sequences and + /// using `EncodeSet` to restore substituted code-units. + /// + /// If no code-points were substituted when this collection's contents were encoded, `\.percentEncodedOnly` may be used to only remove percent-encoding. + /// + /// - seealso: `StringProtocol.percentDecoded(from:)` + /// - important: Users should beware that percent-encoding has frequently been used by attackers to smuggle malicious inputs + /// (e.g. extra path components which lead to sensitive data when used as a relative path, ASCII NULL bytes, or SQL injection), + /// sometimes under multiple layers of encoding. Users to be careful not to over-decode their strings, and every time a string + /// is percent-decoded, the result must be considered to be **entirely unvalidated**, even if the source contents were previously validated. + /// + @inlinable @inline(__always) + public func percentDecodedString( + from decodeSet: KeyPath + ) -> String where EncodeSet: PercentEncodeSetProtocol { + withContiguousStorageIfAvailable { + String(decoding: $0.withoutTrappingOnIndexOverflow.lazy.percentDecodedUTF8(from: decodeSet), as: UTF8.self) + } ?? String(decoding: self.lazy.percentDecodedUTF8(from: decodeSet), as: UTF8.self) + } + + /// Interprets this collection's elements as UTF-8 code-units, and returns a string formed by decoding all percent-encoded code-unit sequences. + /// + /// This is equivalent to calling `percentDecodedString(from: \.percentEncodedOnly)`. If this collection's contents were encoded + /// with substitutions (e.g. form-encoding), use `percentDecodedString(from:)` instead, providing a `PercentDecodeSet` which is able to + /// reverse those substitutions. + /// + /// - seealso: `StringProtocol.percentDecoded` + /// - important: Users should beware that percent-encoding has frequently been used by attackers to smuggle malicious inputs + /// (e.g. extra path components which lead to sensitive data when used as a relative path, ASCII NULL bytes, or SQL injection), + /// sometimes under multiple layers of encoding. Users to be careful not to over-decode their strings, and every time a string + /// is percent-decoded, the result must be considered to be **entirely unvalidated**, even if the source contents were previously validated. + /// + @inlinable + public var percentDecodedString: String { + percentDecodedString(from: \.percentEncodedOnly) + } + + /// Interprets this collection's elements as UTF-8 code-units, and returns a string formed by decoding all percent-encoded code-unit sequences and + /// reversing substitutions made by the `application/x-www-form-urlencoded` encode-set. + /// + /// This is equivalent to callling `percentDecodedString(from: \.form)`. + /// + /// - seealso: `StringProtocol.urlFormDecoded` + /// - important: Users should beware that percent-encoding has frequently been used by attackers to smuggle malicious inputs + /// (e.g. extra path components which lead to sensitive data when used as a relative path, ASCII NULL bytes, or SQL injection), + /// sometimes under multiple layers of encoding. Users to be careful not to over-decode their strings, and every time a string + /// is percent-decoded, the result must be considered to be **entirely unvalidated**, even if the source contents were previously validated. + /// + @inlinable + public var urlFormDecodedString: String { + percentDecodedString(from: \.form) + } +} + +extension StringProtocol { + + /// Returns a string formed by decoding all percent-encoded code-units in this string, and using `EncodeSet` to restore substituted code-units. + /// + /// If no code-points were substituted when this string was encoded, `\.percentEncodingOnly` may be used to only remove percent-encoding. + /// + /// ```swift + /// "hello,%20world!".percentDecoded(from: \.percentEncodingOnly) // "hello, world!" + /// "%2Fusr%2Fbin%2Fswift".percentDecoded(\.percentEncodingOnly) // "/usr/bin/swift" + /// "king+of+the+%F0%9F%A6%86s".percentDecoded(\.form) // "king of the 🦆s" + /// ``` + /// + /// - important: Users should beware that percent-encoding has frequently been used by attackers to smuggle malicious inputs + /// (e.g. extra path components which lead to sensitive data when used as a relative path, ASCII NULL bytes, or SQL injection), + /// sometimes under multiple layers of encoding. Users to be careful not to over-decode their strings, and every time a string + /// is percent-decoded, the result must be considered to be **entirely unvalidated**, even if the source contents were previously validated. + /// + @inlinable @inline(__always) + public func percentDecoded( + from decodeSet: KeyPath + ) -> String where EncodeSet: PercentEncodeSetProtocol { + utf8.percentDecodedString(from: decodeSet) + } + + /// Returns a string formed by decoding all percent-encoded code-units in the contents of this string. + /// + /// ```swift + /// "hello%2C%20world!".percentDecoded // hello, world! + /// "%2Fusr%2Fbin%2Fswift".percentDecoded // /usr/bin/swift + /// "%F0%9F%98%8E".percentDecoded // 😎 + /// ``` + /// + /// This is equivalent to calling `percentDecodedString(from: \.percentEncodedOnly)`. If this collection's contents were encoded + /// with substitutions (e.g. form-encoding), use `percentDecoded(from:)` instead, providing a `PercentDecodeSet` which is able to + /// reverse those substitutions. + /// + /// Equivalent to JavaScript's `decodeURIComponent()` function. + /// + /// - important: Users should beware that percent-encoding has frequently been used by attackers to smuggle malicious inputs + /// (e.g. extra path components which lead to sensitive data when used as a relative path, ASCII NULL bytes, or SQL injection), + /// sometimes under multiple layers of encoding. Users to be careful not to over-decode their strings, and every time a string + /// is percent-decoded, the result must be considered to be **entirely unvalidated**, even if the source contents were previously validated. + /// + @inlinable + public var percentDecoded: String { + utf8.percentDecodedString + } + + /// Returns a string formed by decoding all percent-encoded code-units in this string and reversing substitutions made by + /// the `application/x-www-form-urlencoded` encode-set. + /// + /// This is equivalent to callling `percentDecoded(from: \.form)`. + /// + /// The following example decodes a form-encoded URL query by splitting a string in to key-value pairs at the "&" character, splitting the key from the value + /// at the "=" character, and decoding each key and value from its encoded representation: + /// + /// ```swift + /// let form = "favourite+pet=%F0%9F%A6%86%2C+of+course&favourite+foods=%F0%9F%8D%8E+%26+%F0%9F%8D%A6" + /// let decoded = form.split(separator: "&").map { joined_kvp in joined_kvp.split(separator: "=") } + /// .map { kvp in (kvp[0].urlFormDecoded, kvp[1].urlFormDecoded) } + /// print(decoded) // [("favourite pet", "🦆, of course"), ("favourite foods", "🍎 & 🍦")] + /// ``` + /// + /// - important: Users should beware that percent-encoding has frequently been used by attackers to smuggle malicious inputs + /// (e.g. extra path components which lead to sensitive data when used as a relative path, ASCII NULL bytes, or SQL injection), + /// sometimes under multiple layers of encoding. Users to be careful not to over-decode their strings, and every time a string + /// is percent-decoded, the result must be considered to be **entirely unvalidated**, even if the source contents were previously validated. + /// + @inlinable + public var urlFormDecoded: String { + utf8.urlFormDecodedString + } +} + + +// -------------------------------------------- +// MARK: - Encode Sets +// -------------------------------------------- + + +/// A namespace for percent-decode sets. +/// +/// Decoding is thankfully much simpler than encoding; in almost all cases, simply removing percent-encoding is sufficient, as regular URL encode-sets do +/// not substitute characters. Form-encoding is the only exception specified in the URL Standard. +/// +/// Since percent-decode sets are not stateful, you only ever need to refer to their type, never an instance. The types are exposed as properties so you +/// can use a convenient KeyPath syntax to refer to them. +/// +public enum PercentDecodeSet { + + /// A decoding set which only decodes percent-encoded characters and assumes that no substituted characters need to be restored. + /// + public var percentEncodedOnly: PercentEncodeSet._Passthrough.Type { PercentEncodeSet._Passthrough.self } + + /// The [application/x-www-form-urlencoded](https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set) + /// percent-encode set. + /// + public var form: PercentEncodeSet.FormEncoded.Type { PercentEncodeSet.FormEncoded.self } +} + +/// A namespace for percent-encode sets. +/// +/// Since percent-encode sets are not stateful, you only ever need to refer to their type, never an instance. The types are exposed as properties so you +/// can use a convenient KeyPath syntax to refer to them. +/// +public enum PercentEncodeSet { + + /// The [C0 control](https://url.spec.whatwg.org/#c0-control-percent-encode-set) percent-encode set. + /// + public var c0Control: C0Control.Type { C0Control.self } + + /// The [fragment](https://url.spec.whatwg.org/#fragment-percent-encode-set) percent-encode set. + /// + public var fragment: Fragment.Type { Fragment.self } + + /// The [query](https://url.spec.whatwg.org/#query-percent-encode-set) percent-encode set. + /// + public var query_notSpecial: Query_NotSpecial.Type { Query_NotSpecial.self } + + /// The [special query](https://url.spec.whatwg.org/#special-query-percent-encode-set) percent-encode set. + /// + public var query_special: Query_Special.Type { Query_Special.self } + + /// The [path](https://url.spec.whatwg.org/#path-percent-encode-set) percent-encode set. + /// + public var path: Path.Type { Path.self } + + /// The [userinfo](https://url.spec.whatwg.org/#userinfo-percent-encode-set) percent-encode set. + /// + public var userInfo: UserInfo.Type { UserInfo.self } + + /// The [component](https://url.spec.whatwg.org/#component-percent-encode-set) percent-encode set. + /// + public var component: Component.Type { Component.self } + + /// The [application/x-www-form-urlencoded](https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set) + /// percent-encode set. + /// + public var form: FormEncoded.Type { FormEncoded.self } + + /// An internal percent-encode set for manipulating path components. + /// + @usableFromInline + internal var pathComponent: _PathComponent.Type { _PathComponent.self } + + /// An internal percent-encode set for when content is already known to be correctly percent-encoded. + /// + @usableFromInline + internal var alreadyEncoded: _Passthrough.Type { _Passthrough.self } +} + +// URL encode-set implementations. + +// ARM and x86 seem to have wildly different performance characteristics. +// The lookup table seems to be about 8-12% better than bitshifting on x86, but can be 90% slower on ARM. +@usableFromInline +internal protocol DualImplementedPercentEncodeSet: PercentEncodeSetProtocol { + static func shouldEscape_binary(ascii codePoint: UInt8) -> Bool + static func shouldEscape_table(ascii codePoint: UInt8) -> Bool +} + +@inlinable @inline(__always) +internal func __shouldPercentEncode( + _: Encoder.Type, ascii codePoint: UInt8 +) -> Bool where Encoder: DualImplementedPercentEncodeSet { + #if arch(x86_64) + return Encoder.shouldEscape_table(ascii: codePoint) + #else + return Encoder.shouldEscape_binary(ascii: codePoint) + #endif +} + +extension PercentEncodeSet { + + public struct C0Control: PercentEncodeSetProtocol, DualImplementedPercentEncodeSet { + + @inlinable @inline(__always) + public static func shouldPercentEncode(ascii codePoint: UInt8) -> Bool { + __shouldPercentEncode(Self.self, ascii: codePoint) + } + + @inlinable @inline(__always) + internal static func shouldEscape_binary(ascii codePoint: UInt8) -> Bool { + // TODO: [performance]: Benchmark alternative: + // `codePoint & 0b11100000 == 0 || codePoint == 0x7F` + // C0Control percent-encoding is used for cannot-be-a-base URL paths and opaque host names, + // which currently are not benchmarked. + + // FEDCBA98_76543210_FEDCBA98_76543210_FEDCBA98_76543210_FEDCBA98_76543210 + let lo: UInt64 = 0b00000000_00000000_00000000_00000000_11111111_11111111_11111111_11111111 + let hi: UInt64 = 0b10000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000 + if codePoint < 64 { + return lo & (1 &<< codePoint) != 0 + } else { + return hi & (1 &<< ((codePoint &- 64) & 0x7F)) != 0 + } + } + + @inlinable @inline(__always) + internal static func shouldEscape_table(ascii codePoint: UInt8) -> Bool { + percent_encoding_table.withUnsafeBufferPointer { $0[Int(codePoint & 0x7F)] }.contains(.c0) + } + } + + public struct Fragment: PercentEncodeSetProtocol, DualImplementedPercentEncodeSet { + + @inlinable @inline(__always) + public static func shouldPercentEncode(ascii codePoint: UInt8) -> Bool { + __shouldPercentEncode(Self.self, ascii: codePoint) + } + + @inlinable @inline(__always) + internal static func shouldEscape_binary(ascii codePoint: UInt8) -> Bool { + // FEDCBA98_76543210_FEDCBA98_76543210_FEDCBA98_76543210_FEDCBA98_76543210 + let lo: UInt64 = 0b01010000_00000000_00000000_00000101_11111111_11111111_11111111_11111111 + let hi: UInt64 = 0b10000000_00000000_00000000_00000001_00000000_00000000_00000000_00000000 + if codePoint < 64 { + return lo & (1 &<< codePoint) != 0 + } else { + return hi & (1 &<< ((codePoint &- 64) & 0x7F)) != 0 + } + } + + @inlinable @inline(__always) + internal static func shouldEscape_table(ascii codePoint: UInt8) -> Bool { + percent_encoding_table.withUnsafeBufferPointer { $0[Int(codePoint & 0x7F)] }.contains(.fragment) + } + } + + public struct Query_NotSpecial: PercentEncodeSetProtocol, DualImplementedPercentEncodeSet { + + @inlinable @inline(__always) + public static func shouldPercentEncode(ascii codePoint: UInt8) -> Bool { + __shouldPercentEncode(Self.self, ascii: codePoint) + } + + @inlinable @inline(__always) + internal static func shouldEscape_binary(ascii codePoint: UInt8) -> Bool { + // FEDCBA98_76543210_FEDCBA98_76543210_FEDCBA98_76543210_FEDCBA98_76543210 + let lo: UInt64 = 0b01010000_00000000_00000000_00001101_11111111_11111111_11111111_11111111 + let hi: UInt64 = 0b10000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000 + if codePoint < 64 { + return lo & (1 &<< codePoint) != 0 + } else { + return hi & (1 &<< ((codePoint &- 64) & 0x7F)) != 0 + } + } + + @inlinable @inline(__always) + internal static func shouldEscape_table(ascii codePoint: UInt8) -> Bool { + percent_encoding_table.withUnsafeBufferPointer { $0[Int(codePoint & 0x7F)] }.contains(.query) + } + } + + public struct Query_Special: PercentEncodeSetProtocol, DualImplementedPercentEncodeSet { + + @inlinable @inline(__always) + public static func shouldPercentEncode(ascii codePoint: UInt8) -> Bool { + __shouldPercentEncode(Self.self, ascii: codePoint) + } + + @inlinable @inline(__always) + internal static func shouldEscape_binary(ascii codePoint: UInt8) -> Bool { + // FEDCBA98_76543210_FEDCBA98_76543210_FEDCBA98_76543210_FEDCBA98_76543210 + let lo: UInt64 = 0b01010000_00000000_00000000_10001101_11111111_11111111_11111111_11111111 + let hi: UInt64 = 0b10000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000 + if codePoint < 64 { + return lo & (1 &<< codePoint) != 0 + } else { + return hi & (1 &<< ((codePoint &- 64) & 0x7F)) != 0 + } + } + + @inlinable @inline(__always) + internal static func shouldEscape_table(ascii codePoint: UInt8) -> Bool { + percent_encoding_table.withUnsafeBufferPointer { $0[Int(codePoint & 0x7F)] }.contains(.specialQuery) + } + } + + public struct Path: PercentEncodeSetProtocol, DualImplementedPercentEncodeSet { + + @inlinable @inline(__always) + public static func shouldPercentEncode(ascii codePoint: UInt8) -> Bool { + __shouldPercentEncode(Self.self, ascii: codePoint) + } + + @inlinable @inline(__always) + internal static func shouldEscape_binary(ascii codePoint: UInt8) -> Bool { + // FEDCBA98_76543210_FEDCBA98_76543210_FEDCBA98_76543210_FEDCBA98_76543210 + let lo: UInt64 = 0b11010000_00000000_00000000_00001101_11111111_11111111_11111111_11111111 + let hi: UInt64 = 0b10101000_00000000_00000000_00000001_00000000_00000000_00000000_00000000 + if codePoint < 64 { + return lo & (1 &<< codePoint) != 0 + } else { + return hi & (1 &<< ((codePoint &- 64) & 0x7F)) != 0 + } + } + + @inlinable @inline(__always) + internal static func shouldEscape_table(ascii codePoint: UInt8) -> Bool { + percent_encoding_table.withUnsafeBufferPointer { $0[Int(codePoint & 0x7F)] }.contains(.path) + } + } + + public struct UserInfo: PercentEncodeSetProtocol, DualImplementedPercentEncodeSet { + + @inlinable @inline(__always) + public static func shouldPercentEncode(ascii codePoint: UInt8) -> Bool { + __shouldPercentEncode(Self.self, ascii: codePoint) + } + + @inlinable @inline(__always) + internal static func shouldEscape_binary(ascii codePoint: UInt8) -> Bool { + // FEDCBA98_76543210_FEDCBA98_76543210_FEDCBA98_76543210_FEDCBA98_76543210 + let lo: UInt64 = 0b11111100_00000000_10000000_00001101_11111111_11111111_11111111_11111111 + let hi: UInt64 = 0b10111000_00000000_00000000_00000001_01111000_00000000_00000000_00000001 + if codePoint < 64 { + return lo & (1 &<< codePoint) != 0 + } else { + return hi & (1 &<< ((codePoint &- 64) & 0x7F)) != 0 + } + } + + @inlinable @inline(__always) + internal static func shouldEscape_table(ascii codePoint: UInt8) -> Bool { + percent_encoding_table.withUnsafeBufferPointer { $0[Int(codePoint & 0x7F)] }.contains(.userInfo) + } + } + + /// This encode-set is not used for any particular component, but can be used to encode data which is compatible with the escaping for + /// the path, query, and fragment. It should give the same results as Javascript's `.encodeURIComponent()` method. + /// + public struct Component: PercentEncodeSetProtocol, DualImplementedPercentEncodeSet { + + @inlinable @inline(__always) + public static func shouldPercentEncode(ascii codePoint: UInt8) -> Bool { + __shouldPercentEncode(Self.self, ascii: codePoint) + } + + @inlinable @inline(__always) + internal static func shouldEscape_binary(ascii codePoint: UInt8) -> Bool { + // FEDCBA98_76543210_FEDCBA98_76543210_FEDCBA98_76543210_FEDCBA98_76543210 + let lo: UInt64 = 0b11111100_00000000_10011000_01111101_11111111_11111111_11111111_11111111 + let hi: UInt64 = 0b10111000_00000000_00000000_00000001_01111000_00000000_00000000_00000001 + if codePoint < 64 { + return lo & (1 &<< codePoint) != 0 + } else { + return hi & (1 &<< ((codePoint &- 64) & 0x7F)) != 0 + } + } + + @inlinable @inline(__always) + internal static func shouldEscape_table(ascii codePoint: UInt8) -> Bool { + percent_encoding_table.withUnsafeBufferPointer { $0[Int(codePoint & 0x7F)] }.contains(.component) + } + } + + public struct FormEncoded: PercentEncodeSetProtocol, DualImplementedPercentEncodeSet { + + @inlinable @inline(__always) + public static func shouldPercentEncode(ascii codePoint: UInt8) -> Bool { + __shouldPercentEncode(Self.self, ascii: codePoint) + } + + @inlinable @inline(__always) + internal static func shouldEscape_binary(ascii codePoint: UInt8) -> Bool { + // FEDCBA98_76543210_FEDCBA98_76543210_FEDCBA98_76543210_FEDCBA98_76543210 + let lo: UInt64 = 0b11111100_00000000_10011011_11111110_11111111_11111111_11111111_11111111 + let hi: UInt64 = 0b11111000_00000000_00000000_00000001_01111000_00000000_00000000_00000001 + if codePoint < 64 { + return lo & (1 &<< codePoint) != 0 + } else { + return hi & (1 &<< ((codePoint &- 64) & 0x7F)) != 0 + } + } + + @inlinable @inline(__always) + internal static func shouldEscape_table(ascii codePoint: UInt8) -> Bool { + percent_encoding_table.withUnsafeBufferPointer { $0[Int(codePoint & 0x7F)] }.contains(.form) + } + + @inlinable @inline(__always) + public static func substitute(ascii codePoint: UInt8) -> UInt8? { + codePoint == ASCII.space.codePoint ? ASCII.plus.codePoint : nil + } + + @inlinable @inline(__always) + public static func unsubstitute(ascii codePoint: UInt8) -> UInt8? { + codePoint == ASCII.plus.codePoint ? ASCII.space.codePoint : nil + } + } +} + +// Non-standard encode-sets. + +extension PercentEncodeSet { + + /// An encode-set which does not escape or substitute any characters. + /// + /// This may be used as a decoding set in order to percent-decode content which does not have substitutions. + /// + public struct _Passthrough: PercentEncodeSetProtocol { + + @inlinable @inline(__always) + public static func shouldPercentEncode(ascii codePoint: UInt8) -> Bool { + false + } + } + + /// An encode-set used for escaping the contents path components. **Not defined by the URL standard.** + /// + /// The URL 'path' encode-set, as defined in the standard, does not include the forward-slash character, as the URL parser won't ever see them in a path component. + /// This is problematic for APIs which allow the user to insert path-components, as they might insert content which would be re-parsed as multiple components, + /// possibly including hidden "." or ".." components and leading to non-idempotent URL strings. + /// + /// A solution with true minimal-escaping would be split this encode-set for special/non-special URLs, with only the former including the forwardSlash character. + /// For simplicity, we include them both, which means that we will unnecessarily escape forwardSlashes in the path components of non-special URLs. + /// + @usableFromInline + internal struct _PathComponent: PercentEncodeSetProtocol { + + @inlinable @inline(__always) + internal static func shouldPercentEncode(ascii codePoint: UInt8) -> Bool { + PercentEncodeSet.Path.shouldPercentEncode(ascii: codePoint) + || codePoint == ASCII.forwardSlash.codePoint + || codePoint == ASCII.backslash.codePoint + } + } +} + +//swift-format-ignore +/// A set of `URLEncodeSet`s. +@usableFromInline +internal struct URLEncodeSetSet: OptionSet { + + @usableFromInline + internal var rawValue: UInt8 + + @usableFromInline + internal init(rawValue: UInt8) { + self.rawValue = rawValue + } + + @inlinable internal static var none: Self { Self(rawValue: 0) } + @inlinable internal static var c0: Self { Self(rawValue: 1 << 0) } + @inlinable internal static var fragment: Self { Self(rawValue: 1 << 1) } + @inlinable internal static var query: Self { Self(rawValue: 1 << 2) } + @inlinable internal static var specialQuery: Self { Self(rawValue: 1 << 3) } + @inlinable internal static var path: Self { Self(rawValue: 1 << 4) } + @inlinable internal static var userInfo: Self { Self(rawValue: 1 << 5) } + @inlinable internal static var form: Self { Self(rawValue: 1 << 6) } + @inlinable internal static var component: Self { Self(rawValue: 1 << 7) } +} + +// swift-format-ignore +@usableFromInline +internal let percent_encoding_table: [URLEncodeSetSet] = [ + // Control Characters --------------------------------------------------------------------- + /* 0x00 null */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x01 startOfHeading */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x02 startOfText */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x03 endOfText */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x04 endOfTransmission */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x05 enquiry */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x06 acknowledge */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x07 bell */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x08 backspace */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x09 horizontalTab */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x0A lineFeed */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x0B verticalTab */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x0C formFeed */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x0D carriageReturn */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x0E shiftOut */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x0F shiftIn */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x10 dataLinkEscape */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x11 deviceControl1 */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x12 deviceControl2 */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x13 deviceControl3 */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x14 deviceControl4 */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x15 negativeAcknowledge */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x16 synchronousIdle */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x17 endOfTransmissionBlock */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x18 cancel */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x19 endOfMedium */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x1A substitute */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x1B escape */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x1C fileSeparator */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x1D groupSeparator */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x1E recordSeparator */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x1F unitSeparator */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + // Special Characters --------------------------------------------------------------------- + /* 0x20 space */ [.fragment, .query, .specialQuery, .path, .userInfo, .component], // form substitutes instead. + /* 0x21 exclamationMark */ .form, + /* 0x22 doubleQuotationMark */ [.fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x23 numberSign */ [.query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x24 dollarSign */ [.form, .component], + /* 0x25 percentSign */ [.form, .component], + /* 0x26 ampersand */ [.form, .component], + /* 0x27 apostrophe */ [.specialQuery, .form], + /* 0x28 leftParenthesis */ .form, + /* 0x29 rightParenthesis */ .form, + /* 0x2A asterisk */ .none, + /* 0x2B plus */ [.form, .component], + /* 0x2C comma */ [.form, .component], + /* 0x2D minus */ .none, + /* 0x2E period */ .none, + /* 0x2F forwardSlash */ [.userInfo, .form, .component], + // Numbers ---------------------------------------------------------------------- + /* 0x30 digit 0 */ .none, + /* 0x31 digit 1 */ .none, + /* 0x32 digit 2 */ .none, + /* 0x33 digit 3 */ .none, + /* 0x34 digit 4 */ .none, + /* 0x35 digit 5 */ .none, + /* 0x36 digit 6 */ .none, + /* 0x37 digit 7 */ .none, + /* 0x38 digit 8 */ .none, + /* 0x39 digit 9 */ .none, + // Punctuation ---------------------------------------------------------------------- + /* 0x3A colon */ [.userInfo, .form, .component], + /* 0x3B semicolon */ [.userInfo, .form, .component], + /* 0x3C lessThanSign */ [.fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x3D equalSign */ [.userInfo, .form, .component], + /* 0x3E greaterThanSign */ [.fragment, .query, .specialQuery, .path, .userInfo, .form, .component], + /* 0x3F questionMark */ [.path, .userInfo, .form, .component], + /* 0x40 commercialAt */ [.userInfo, .form, .component], + // Uppercase letters ---------------------------------------------------------------------- + /* 0x41 A */ .none, + /* 0x42 B */ .none, + /* 0x43 C */ .none, + /* 0x44 D */ .none, + /* 0x45 E */ .none, + /* 0x46 F */ .none, + /* 0x47 G */ .none, + /* 0x48 H */ .none, + /* 0x49 I */ .none, + /* 0x4A J */ .none, + /* 0x4B K */ .none, + /* 0x4C L */ .none, + /* 0x4D M */ .none, + /* 0x4E N */ .none, + /* 0x4F O */ .none, + /* 0x50 P */ .none, + /* 0x51 Q */ .none, + /* 0x52 R */ .none, + /* 0x53 S */ .none, + /* 0x54 T */ .none, + /* 0x55 U */ .none, + /* 0x56 V */ .none, + /* 0x57 W */ .none, + /* 0x58 X */ .none, + /* 0x59 Y */ .none, + /* 0x5A Z */ .none, + // More special characters --------------------------------------------------------------------- + /* 0x5B leftSquareBracket */ [.userInfo, .form, .component], + /* 0x5C backslash */ [.userInfo, .form, .component], + /* 0x5D rightSquareBracket */ [.userInfo, .form, .component], + /* 0x5E circumflexAccent */ [.userInfo, .form, .component], + /* 0x5F underscore */ .none, + /* 0x60 backtick */ [.fragment, .path, .userInfo, .form, .component], + // Lowercase letters --------------------------------------------------------------------- + /* 0x61 a */ .none, + /* 0x62 b */ .none, + /* 0x63 c */ .none, + /* 0x64 d */ .none, + /* 0x65 e */ .none, + /* 0x66 f */ .none, + /* 0x67 g */ .none, + /* 0x68 h */ .none, + /* 0x69 i */ .none, + /* 0x6A j */ .none, + /* 0x6B k */ .none, + /* 0x6C l */ .none, + /* 0x6D m */ .none, + /* 0x6E n */ .none, + /* 0x6F o */ .none, + /* 0x70 p */ .none, + /* 0x71 q */ .none, + /* 0x72 r */ .none, + /* 0x73 s */ .none, + /* 0x74 t */ .none, + /* 0x75 u */ .none, + /* 0x76 v */ .none, + /* 0x77 w */ .none, + /* 0x78 x */ .none, + /* 0x79 y */ .none, + /* 0x7A z */ .none, + // More special characters --------------------------------------------------------------------- + /* 0x7B leftCurlyBracket */ [.path, .userInfo, .form, .component], + /* 0x7C verticalBar */ [.userInfo, .form, .component], + /* 0x7D rightCurlyBracket */ [.path, .userInfo, .form, .component], + /* 0x7E tilde */ .form, + /* 0x7F delete */ [.c0, .fragment, .query, .specialQuery, .path, .userInfo, .form, .component] + // The End. --------------------------------------------------------------------- +] diff --git a/Sources/WebURL/URLStorage+Setters.swift b/Sources/WebURL/URLStorage+Setters.swift new file mode 100644 index 000000000..5dc3252a4 --- /dev/null +++ b/Sources/WebURL/URLStorage+Setters.swift @@ -0,0 +1,906 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +// -------------------------------------------- +// MARK: - Scheme +// -------------------------------------------- + + +extension URLStorage { + + /// Attempts to set the scheme component to the given UTF8-encoded string. + /// The new value may contain a trailing colon (e.g. `http`, `http:`). Colons are only allowed as the last character of the string. + /// + @inlinable + internal mutating func setScheme( + to newValue: UTF8Bytes + ) -> (AnyURLStorage, URLSetterError?) where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8 { + + // Check that the new value is a valid scheme. + guard let (idx, newSchemeKind) = parseScheme(newValue), + idx == newValue.endIndex || newValue.index(after: idx) == newValue.endIndex + else { + return (AnyURLStorage(self), .invalidScheme) + } + + // Check that the operation is semantically valid for the existing structure. + let newSchemeBytes = newValue[.. 1, "invalid URLStructure: port must either be nil or >1 character") + if newStructure.schemeKind.isDefaultPort(utf8: portBytes.dropFirst()) { + newStructure.portLength = 0 + commands.append(.remove(subrange: portRange)) + } + } + return (multiReplaceSubrange(commands, newStructure: newStructure), nil) + } +} + + +// -------------------------------------------- +// MARK: - Username, Password +// -------------------------------------------- + + +extension URLStorage { + + /// Attempts to set the username component to the given UTF8-encoded string. The value will be percent-encoded as appropriate. + /// + /// - Note: Usernames and Passwords are never filtered of ASCII tab or newline characters. + /// If the given `newValue` contains any such characters, they will be percent-encoded in to the result. + /// + @inlinable + internal mutating func setUsername( + to newValue: UTF8Bytes? + ) -> (AnyURLStorage, URLSetterError?) where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8 { + + let oldStructure = header.structure + + // Check that the operation is semantically valid for the existing structure. + if oldStructure.cannotHaveCredentialsOrPort { + return (AnyURLStorage(self), .cannotHaveCredentialsOrPort) + } + + // The operation is valid. Calculate the new structure and replace the code-units. + var newStructure = oldStructure + newStructure.usernameLength = 0 + + guard let newValue = newValue, newValue.isEmpty == false else { + guard let oldUsername = oldStructure.range(of: .username) else { + return (AnyURLStorage(self), nil) + } + let toRemove = oldUsername.lowerBound..<(oldUsername.upperBound + (newStructure.hasCredentialSeparator ? 0 : 1)) + return (removeSubrange(toRemove, newStructure: newStructure).newStorage, nil) + } + + let (newLength, needsEncoding) = newValue.lazy.percentEncodedGroups(as: \.userInfo).encodedLength + newStructure.usernameLength = newLength + + let oldRange = oldStructure.rangeForReplacingCodeUnits(of: .username) + let addSeparator = (oldStructure.hasCredentialSeparator == false) + let bytesToWrite = newLength + (addSeparator ? 1 : 0) + + let result = replaceSubrange(oldRange, withUninitializedSpace: bytesToWrite, newStructure: newStructure) { dest in + var bytesWritten = 0 + if needsEncoding { + bytesWritten += dest.fastInitialize(from: newValue.lazy.percentEncoded(as: \.userInfo)) + } else { + bytesWritten += dest.fastInitialize(from: newValue) + } + if addSeparator { + dest[bytesWritten] = ASCII.commercialAt.codePoint + bytesWritten += 1 + } + return bytesWritten + } + return (result.newStorage, nil) + } + + /// Attempts to set the password component to the given UTF8-encoded string. The value will be percent-encoded as appropriate. + /// + /// - Note: Usernames and Passwords are never filtered of ASCII tab or newline characters. + /// If the given `newValue` contains any such characters, they will be percent-encoded in to the result. + /// + @inlinable + internal mutating func setPassword( + to newValue: UTF8Bytes? + ) -> (AnyURLStorage, URLSetterError?) where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8 { + + let oldStructure = header.structure + + // Check that the operation is semantically valid for the existing structure. + if oldStructure.cannotHaveCredentialsOrPort { + return (AnyURLStorage(self), .cannotHaveCredentialsOrPort) + } + + // The operation is valid. Calculate the new structure and replace the code-units. + var newStructure = oldStructure + newStructure.passwordLength = 0 + + guard let newValue = newValue, newValue.isEmpty == false else { + guard let oldPassword = oldStructure.range(of: .password) else { + return (AnyURLStorage(self), nil) + } + let toRemove = oldPassword.lowerBound..<(oldPassword.upperBound + (newStructure.hasCredentialSeparator ? 0 : 1)) + return (removeSubrange(toRemove, newStructure: newStructure).newStorage, nil) + } + + let (newLength, needsEncoding) = newValue.lazy.percentEncodedGroups(as: \.userInfo).encodedLength + newStructure.passwordLength = 1 /* : */ + newLength + + // Always write the trailing '@'. + var oldRange = oldStructure.rangeForReplacingCodeUnits(of: .password) + oldRange = oldRange.lowerBound.. `unix:///some/path`). + /// A `nil` hostname removes the `//` separator after the scheme, resulting in a so-called "path-only" URL (e.g. `unix://oldhost/some/path` -> `unix:/some/path`). + /// + @inlinable + internal mutating func setHostname( + to newValue: UTF8Bytes? + ) -> (AnyURLStorage, URLSetterError?) where UTF8Bytes: BidirectionalCollection, UTF8Bytes.Element == UInt8 { + + let oldStructure = header.structure + + // Check that the operation is semantically valid for the existing structure. + if oldStructure.cannotBeABaseURL { + return (AnyURLStorage(self), .cannotSetHostOnCannotBeABaseURL) + } + + guard let newHostnameBytes = newValue, newHostnameBytes.isEmpty == false else { + + if oldStructure.schemeKind.isSpecial, oldStructure.schemeKind != .file { + return (AnyURLStorage(self), .schemeDoesNotSupportNilOrEmptyHostnames) + } + if oldStructure.schemeKind == .file, newValue == nil { + return (AnyURLStorage(self), .schemeDoesNotSupportNilOrEmptyHostnames) + } + if oldStructure.hasCredentialsOrPort { + return (AnyURLStorage(self), .cannotSetEmptyHostnameWithCredentialsOrPort) + } + if oldStructure.pathLength == 0, newValue == nil { + return (AnyURLStorage(self), .cannotRemoveHostnameWithoutPath) + } + + // The operation is valid. Calculate the new structure and replace the code-units. + var newStructure = oldStructure + newStructure.hostnameLength = 0 + + switch oldStructure.range(of: .hostname) { + case .none: + assert(oldStructure.sigil != .authority, "URL has authority, but told us it had a nil hostname?!") + + // nil -> nil. + guard newValue != nil else { + return (AnyURLStorage(self), nil) + } + // nil -> empty string: Insert authority sigil, overwriting path sigil if present. + newStructure.sigil = .authority + let result = replaceSubrange( + oldStructure.rangeForReplacingSigil, + withUninitializedSpace: Sigil.authority.length, + newStructure: newStructure, + initializer: Sigil.authority.unsafeWrite + ) + return (result.newStorage, nil) + + case .some(let hostnameRange): + assert(oldStructure.sigil == .authority, "URL has a hostname, but apparently no authority?!") + + // hostname -> empty string: Preserve existing sigil, only remove the hostname contents. + guard newValue == nil else { + return (removeSubrange(hostnameRange, newStructure: newStructure).newStorage, nil) + } + // hostname -> nil: Remove authority sigil, replacing it with a path sigil if required. + let needsPathSigil = + oldStructure.range(of: .path).map { + PathComponentParser.doesNormalizedPathRequirePathSigil(codeUnits[$0]) + } ?? false + newStructure.sigil = needsPathSigil ? .path : .none + let commands: [ReplaceSubrangeOperation] = [ + .replace( + subrange: oldStructure.rangeForReplacingSigil, + withCount: needsPathSigil ? Sigil.path.length : 0, + writer: needsPathSigil ? Sigil.path.unsafeWrite : { _ in 0 }), + .remove(subrange: hostnameRange), + ] + return (multiReplaceSubrange(commands, newStructure: newStructure), nil) + } + } + + // Check that the new value is a valid hostname. + var callback = IgnoreValidationErrors() + guard let newHost = ParsedHost(newHostnameBytes, schemeKind: oldStructure.schemeKind, callback: &callback) else { + return (AnyURLStorage(self), .invalidHostname) + } + + // The operation is valid. Calculate the new structure and replace the code-units. + var newStructure = oldStructure + + var newLengthCounter = HostnameLengthCounter() + newHost.write(bytes: newHostnameBytes, using: &newLengthCounter) + newStructure.hostnameLength = newLengthCounter.length + + // Always insert/overwrite the existing sigil. + newStructure.sigil = .authority + + let commands: [ReplaceSubrangeOperation] = [ + .replace( + subrange: oldStructure.rangeForReplacingSigil, + withCount: Sigil.authority.length, + writer: Sigil.authority.unsafeWrite), + .replace( + subrange: oldStructure.rangeForReplacingCodeUnits(of: .hostname), + withCount: newStructure.hostnameLength + ) { dest in + var writer = UnsafeBufferHostnameWriter(buffer: dest) + newHost.write(bytes: newHostnameBytes, using: &writer) + return dest.baseAddress?.distance(to: writer.buffer.baseAddress!) ?? 0 + }, + ] + return (multiReplaceSubrange(commands, newStructure: newStructure), nil) + } +} + + +// -------------------------------------------- +// MARK: - Port +// -------------------------------------------- + + +extension URLStorage { + + /// Attempts to set the port component to the given value. A value of `nil` removes the port. + /// + @inlinable + internal mutating func setPort( + to newValue: UInt16? + ) -> (AnyURLStorage, URLSetterError?) { + + var newValue = newValue + let oldStructure = header.structure + + // Check that the operation is semantically valid for the existing structure. + guard oldStructure.cannotHaveCredentialsOrPort == false else { + return (AnyURLStorage(self), .cannotHaveCredentialsOrPort) + } + + // The operation is valid. Calculate the new structure and replace the code-units. + // This is a pretty straightforward code-unit replacement, so it can go through setSimpleComponent. + if newValue == oldStructure.schemeKind.defaultPort { + newValue = nil + } + + if let newPort = newValue { + var stackBuffer = 0 as UInt64 + let result = withUnsafeMutableBytes(of: &stackBuffer) { stackBytes -> AnyURLStorage in + let count = ASCII.writeDecimalString(for: newPort, to: stackBytes.baseAddress!) + let utf8Bytes = UnsafeRawBufferPointer(start: stackBytes.baseAddress!, count: Int(count)) + assert(count > 0) + return setSimpleComponent( + .port, + to: utf8Bytes, + prefix: .colon, + lengthKey: \.portLength, + encodeSet: \.alreadyEncoded + ).newStorage + } + return (result, nil) + + } else { + let result = setSimpleComponent( + .port, + to: UnsafeBufferPointer?.none, + prefix: .colon, + lengthKey: \.portLength, + encodeSet: \.alreadyEncoded + ).newStorage + return (result, nil) + } + } +} + + +// -------------------------------------------- +// MARK: - Path +// -------------------------------------------- + + +extension URLStorage { + + /// Attempts to set the path component to the given UTF8-encoded string. + /// + @inlinable + internal mutating func setPath( + to newPath: UTF8Bytes + ) -> (AnyURLStorage, URLSetterError?) where UTF8Bytes: BidirectionalCollection, UTF8Bytes.Element == UInt8 { + + let oldStructure = header.structure + + // Check that the operation is semantically valid for the existing structure. + guard oldStructure.cannotBeABaseURL == false else { + return (AnyURLStorage(self), .cannotSetPathOnCannotBeABaseURL) + } + + // The operation is valid. Calculate the new structure and replace the code-units. + + // Note: absolutePathsCopyWindowsDriveFromBase models a quirk from the URL Standard's "file slash" state, + // whereby parsing a "relative URL string" which turns out to be an absolute path copies the Windows drive + // from its base URL (so parsing "/usr/bin" against "file:///C:/Windows" returns "file:///C:/usr/bin", + // not "file:///usr/bin", even though "/usr/bin" is absolute). + // + // The 'pathname' setter defined in the standard always goes through the "path start" state, + // which never reaches "file slash" and does not include this quirk. Therefore APCWDFB should be 'false'. + let pathInfo = PathMetrics( + parsing: newPath, schemeKind: oldStructure.schemeKind, baseURL: nil, + absolutePathsCopyWindowsDriveFromBase: false) + + var newStructure = oldStructure + newStructure.pathLength = pathInfo.requiredCapacity + newStructure.firstPathComponentLength = pathInfo.firstComponentLength + + var commands: [ReplaceSubrangeOperation] = [] + switch (oldStructure.sigil, pathInfo.requiresPathSigil) { + case (.authority, _), (.path, true), (.none, false): + break + case (.path, false): + newStructure.sigil = .none + commands.append(.remove(subrange: oldStructure.rangeForReplacingSigil)) + case (.none, true): + newStructure.sigil = .path + commands.append( + .replace( + subrange: oldStructure.rangeForReplacingSigil, + withCount: Sigil.path.length, + writer: Sigil.path.unsafeWrite) + ) + } + commands.append( + .replace( + subrange: oldStructure.rangeForReplacingCodeUnits(of: .path), + withCount: pathInfo.requiredCapacity, + writer: { dest in + dest.writeNormalizedPath( + parsing: newPath, schemeKind: newStructure.schemeKind, + baseURL: nil, + absolutePathsCopyWindowsDriveFromBase: false, + needsPercentEncoding: pathInfo.needsPercentEncoding + ) + }) + ) + return (multiReplaceSubrange(commands, newStructure: newStructure), nil) + } +} + + +// -------------------------------------------- +// MARK: - Query, Fragment. +// -------------------------------------------- + + +extension URLStorage { + + /// Attempts to set the query component to the given UTF8-encoded string. + /// + /// A value of `nil` removes the query. + /// + @inlinable + internal mutating func setQuery( + to newValue: UTF8Bytes? + ) -> AnyURLStorage where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8 { + + if self.header.structure.schemeKind.isSpecial { + return setSimpleComponent( + .query, + to: newValue, + prefix: .questionMark, + lengthKey: \.queryLength, + encodeSet: \.query_special, + adjustStructure: { structure in + // Empty and nil queries are considered form-encoded (in that they do not need to be re-encoded). + structure.queryIsKnownFormEncoded = (structure.queryLength == 0 || structure.queryLength == 1) + } + ).newStorage + } else { + return setSimpleComponent( + .query, + to: newValue, + prefix: .questionMark, + lengthKey: \.queryLength, + encodeSet: \.query_notSpecial, + adjustStructure: { structure in + structure.queryIsKnownFormEncoded = (structure.queryLength == 0 || structure.queryLength == 1) + } + ).newStorage + } + } + + /// Set the query component to the given UTF8-encoded string, assuming that the string is already `application/x-www-form-urlencoded`. + /// + @inlinable + internal mutating func setQuery( + toKnownFormEncoded newValue: UTF8Bytes? + ) -> AnyURLStorage where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8 { + + return setSimpleComponent( + .query, + to: newValue, + prefix: .questionMark, + lengthKey: \.queryLength, + encodeSet: \.alreadyEncoded, + adjustStructure: { structure in + structure.queryIsKnownFormEncoded = true + } + ).newStorage + } + + /// Attempts to set the query component to the given UTF8-encoded string. + /// + /// A value of `nil` removes the query. + /// + @inlinable + internal mutating func setFragment( + to newValue: UTF8Bytes? + ) -> AnyURLStorage where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8 { + + return setSimpleComponent( + .fragment, + to: newValue, + prefix: .numberSign, + lengthKey: \.fragmentLength, + encodeSet: \.fragment + ).newStorage + } +} + + +// -------------------------------------------- +// MARK: - Errors +// -------------------------------------------- + + +/// An error which may be returned when a `URLStorage` setter operation fails. +/// +@usableFromInline +internal enum URLSetterError: Error, Equatable { + + // scheme. + case invalidScheme + case changeOfSchemeSpecialness + case newSchemeCannotHaveCredentialsOrPort + case newSchemeCannotHaveEmptyHostname + // credentials and port. + case cannotHaveCredentialsOrPort + case portValueOutOfBounds + // hostname. + case cannotSetHostOnCannotBeABaseURL + case schemeDoesNotSupportNilOrEmptyHostnames + case cannotSetEmptyHostnameWithCredentialsOrPort + case invalidHostname + case cannotRemoveHostnameWithoutPath + // path. + case cannotSetPathOnCannotBeABaseURL +} + +extension URLSetterError: CustomStringConvertible { + + @usableFromInline + internal var description: String { + switch self { + case .invalidScheme: + return #""" + The new scheme is not valid. Valid schemes consist of ASCII alphanumerics, '+', '-' and '.', and the + first character must be an ASCII alpha. If setting a scheme, you may include its trailing ':' separator. + + Valid schemes: 'http', 'file', 'ftp:', 'http+unix:' + Invalid schemes: ' http', 'http$', '👹', 'ftp://example.com' + """# + case .changeOfSchemeSpecialness: + return #""" + The new scheme is special/not-special, but the URL's existing scheme is not-special/special. + URLs with special schemes are encoded in a significantly different way from those with non-special schemes, + and switching from one style to the other via the 'scheme' property is not supported. + + The special schemes are: 'http', 'https', 'file', 'ftp', 'ws', 'wss'. + Gopher was considered a special scheme by previous standards, but no longer is. + """# + case .newSchemeCannotHaveCredentialsOrPort: + return #""" + The URL contains credentials or a port number, which is unsupported by the new scheme. + The only scheme which does not support credentials or a port number is 'file'. + """# + case .newSchemeCannotHaveEmptyHostname: + return #""" + The URL has an empty hostname, which is unsupported by the new scheme. + The schemes which do not support empty hostnames are 'http', 'https', 'ftp', 'ws', and 'wss'. + """# + case .cannotHaveCredentialsOrPort: + return #""" + Attempt to set credentials or a port number, but the URL's scheme does not support them. + The only scheme which does not support credentials or a port number is 'file'. + """# + case .portValueOutOfBounds: + return #""" + Attempt to set the port number to an invalid value. Valid port numbers are in the range 0 ..< 65536. + """# + case .cannotSetHostOnCannotBeABaseURL: + return #""" + Attempt to set the hostname on a 'cannot be a base' URL. + URLs without hostnames, and whose path does not begin with '/', are considered invalid base URLs and + cannot be made valid by adding a hostname or changing their path. + + Examples include: 'mailto:somebody@example.com', 'javascript:alert("hi")', '...' + """# + case .schemeDoesNotSupportNilOrEmptyHostnames: + return #""" + Attempt to set the hostname to 'nil' or the empty string, but the URL's scheme requires a non-empty hostname. + The schemes which do not support empty hostnames are 'http', 'https', 'ftp', 'ws', and 'wss'. + The schemes which do not support 'nil' hostnames are as above, plus 'file'. + """# + case .cannotSetEmptyHostnameWithCredentialsOrPort: + return #""" + Attempt to set the hostname to 'nil' or the empty string, but the URL contains credentials or a port number. + Credentials and port numbers require a non-empty hostname to be present. + """# + case .invalidHostname: + return #""" + Attempt to set the hostname to an invalid value. Invalid values include invalid IPv4/v6 addresses + (e.g. "10.0.0.999" or "[:::]"), as well as strings containing forbidden host code points. + + A forbidden host code point is U+0000 NULL, U+0009 TAB, U+000A LF, U+000D CR, + U+0020 SPACE, U+0023 (#), U+0025 (%), U+002F (/), U+003A (:), U+003C (<), U+003E (>), + U+003F (?), U+0040 (@), U+005B ([), U+005C (\), U+005D (]), or U+005E (^). + + These code points are forbidden (even if percent-encoded) in 'http', 'https', 'file', 'ftp', 'ws', and 'wss' URLs. + They may only be present in hostnames of other schemes if they are percent-encoded. + """# + case .cannotRemoveHostnameWithoutPath: + return #""" + Attempt to set the hostname to 'nil' on a URL which also does not have a path. + This is not allowed, as the result would be an invalid base URL (for example, "foo://examplehost?aQuery" would become "foo:?aQuery"). + """# + case .cannotSetPathOnCannotBeABaseURL: + return #""" + Attempt to set the path on a 'cannot be a base' URL. + URLs without hostnames, and whose path does not begin with '/', are considered invalid base URLs and + cannot be made valid by adding a hostname or changing their path. + + Examples include: 'mailto:somebody@example.com', 'javascript:alert("hi")', '...' + """# + } + } +} + + +// -------------------------------------------- +// MARK: - Utilities +// -------------------------------------------- + + +/// A command object which represents a replacement operation on some URL code-units. For use with `URLStorage.multiReplaceSubrange`. +/// +@usableFromInline +internal struct ReplaceSubrangeOperation { + + @usableFromInline + internal var subrange: Range + + @usableFromInline + internal var newElementCount: Int + + @usableFromInline + internal var writer: (inout UnsafeMutableBufferPointer) -> Int + + @inlinable + internal init( + subrange: Range, newElementCount: Int, writer: @escaping (inout UnsafeMutableBufferPointer) -> Int + ) { + self.subrange = subrange + self.newElementCount = newElementCount + self.writer = writer + } + + /// - seealso: `URLStorage.replaceSubrange` + @inlinable + internal static func replace( + subrange: Range, withCount: Int, writer: @escaping (inout UnsafeMutableBufferPointer) -> Int + ) -> Self { + ReplaceSubrangeOperation(subrange: subrange, newElementCount: withCount, writer: writer) + } + + /// - seealso: `URLStorage.removeSubrange` + @inlinable + internal static func remove(subrange: Range) -> Self { + ReplaceSubrangeOperation(subrange: subrange, newElementCount: 0, writer: { _ in return 0 }) + } +} + +extension URLStorage { + + /// Performs a code-unit and URL structure replacement, copying to new storage with a different header type if necessary. + /// + /// The `initializer` closure is invoked to write the new code-units, and must return the number of code-units initialized. + /// + /// - parameters: + /// - subrange: The range of code-units to replace + /// - newElementCount: The number of UTF8 code-units that `initializer` will write to replace the indicated code-units. + /// - newStructure: The structure of the normalized URL string after replacement. + /// - initializer: A closure which must initialize exactly `newElementCount` code-units in the buffer pointer it is given. + /// The closure returns the number of bytes actually written to storage, which should be calculated by the closure independently + /// as it writes the contents, which serves as a safety and correctness check. + /// + /// - returns: A tuple consisting of: + /// - An `AnyURLStorage` with the given range of code-units replaced and with the new structure. If the existing storage was already capable + /// of supporting the new structure, this will wrap `self`. Otherwise, it will wrap a new storage object. + /// - The range of the replaced code-units in the new storage object. + /// + @inlinable + internal mutating func replaceSubrange( + _ subrange: Range, + withUninitializedSpace newElementCount: Int, + newStructure: URLStructure, + initializer: (inout UnsafeMutableBufferPointer) -> Int + ) -> (newStorage: AnyURLStorage, newSubrange: Range) { + + newStructure.checkInvariants() + let newCount = codeUnits.count - subrange.count + newElementCount + + if AnyURLStorage.isOptimalStorageType(Self.self, requiredCapacity: newCount, structure: newStructure) { + let newSubrange = codeUnits.unsafeReplaceSubrange( + subrange, withUninitializedCapacity: newElementCount, initializingWith: initializer + ) + header.copyStructure(from: newStructure) + return (AnyURLStorage(self), newSubrange) + } + let newSubrange = subrange.lowerBound..<(subrange.lowerBound + newElementCount) + let newStorage = AnyURLStorage(optimalStorageForCapacity: newCount, structure: newStructure) { dest in + return codeUnits.withUnsafeBufferPointer { src in + dest.initialize(from: src, replacingSubrange: subrange, withElements: newElementCount) { rgnStart, count in + var rgnPtr = UnsafeMutableBufferPointer(start: rgnStart, count: count) + let written = initializer(&rgnPtr) + precondition(written == count, "Subrange initializer did not initialize the expected number of code-units") + } + } + } + return (newStorage, newSubrange) + } + + /// Removes the given code-units and replaces the URL structure, copying to new storage with a different header type if necessary. + /// + /// - parameters: + /// - subrange: The range of code-units to remove + /// - newStructure: The structure of the normalized URL string after removing the specified code-units. + /// + /// - returns: A tuple consisting of: + /// - An `AnyURLStorage` with the given range of code-units replaced and with the new structure. If the existing storage was already capable + /// of supporting the new structure, this will wrap `self`. Otherwise, it will wrap a new storage object. + /// - The range of the replaced code-units in the new storage object. + /// + @inlinable + internal mutating func removeSubrange( + _ subrange: Range, newStructure: URLStructure + ) -> (newStorage: AnyURLStorage, newSubrange: Range) { + return replaceSubrange(subrange, withUninitializedSpace: 0, newStructure: newStructure) { _ in 0 } + } + + /// Performs a series of code-unit replacements and a URL structure replacement, allocating and writing to new storage if a different header type is necessary. + /// + /// - parameters: + /// - commands: The list of code-unit replacement operations to perform. + /// This list must be sorted by the operations' subrange, and operations may not work on overlapping subranges. + /// - newStructure: The new structure of the URL after all replacement operations have been performed. + /// + /// - returns: An `AnyURLStorage` with the new code-units and structure. If the existing storage was already capable + /// of supporting the new structure, this will wrap `self`. Otherwise, it will wrap a new storage object. + /// + @inlinable + internal mutating func multiReplaceSubrange( + _ operations: [ReplaceSubrangeOperation], + newStructure: URLStructure + ) -> AnyURLStorage { + + #if DEBUG + do { + newStructure.checkInvariants() + var cursor = 0 + for operation in operations { + assert(operation.subrange.lowerBound >= cursor, "Overlapping commands") + cursor = operation.subrange.upperBound + } + } + #endif + + let newCount = operations.reduce(into: codeUnits.count) { count, op in + count += (op.newElementCount - op.subrange.count) + assert(count > 0, "count became negative") + } + + if AnyURLStorage.isOptimalStorageType(Self.self, requiredCapacity: newCount, structure: newStructure) { + // Perform the operations in reverse order to avoid clobbering. + for operation in operations.reversed() { + codeUnits.unsafeReplaceSubrange( + operation.subrange, + withUninitializedCapacity: operation.newElementCount, + initializingWith: operation.writer + ) + } + header.copyStructure(from: newStructure) + return AnyURLStorage(self) + } + + let newStorage = AnyURLStorage(optimalStorageForCapacity: newCount, structure: newStructure) { dest in + return codeUnits.withUnsafeBufferPointer { src in + var destHead = dest.baseAddress.unsafelyUnwrapped + let sourceAddr = src.baseAddress.unsafelyUnwrapped + var sourceOffset = 0 + for operation in operations { + // Copy from source until command range. + let bytesToCopyFromSource = operation.subrange.lowerBound - sourceOffset + destHead.initialize(from: sourceAddr + sourceOffset, count: bytesToCopyFromSource) + destHead += bytesToCopyFromSource + sourceOffset += bytesToCopyFromSource + // Initialize space using command. + var buffer = UnsafeMutableBufferPointer(start: destHead, count: operation.newElementCount) + let actualBytesWritten = operation.writer(&buffer) + precondition( + actualBytesWritten == operation.newElementCount, + "Subrange initializer did not initialize the expected number of code-units" + ) + destHead += actualBytesWritten + // Advance source to command end. + sourceOffset = operation.subrange.upperBound + } + // Copy from end of last command until end of source. + let bytesToCopyFromSource = src.count - sourceOffset + destHead.initialize(from: sourceAddr + sourceOffset, count: bytesToCopyFromSource) + destHead += bytesToCopyFromSource + return dest.baseAddress.unsafelyUnwrapped.distance(to: destHead) + } + } + return newStorage + } + + /// A general setter which works for some URL components which do not have component-specific normalization logic. + /// + /// If the new value is `nil`, the component's code-units (as given by `URLStructure.range(of: Component)`) are removed, + /// and the structure's `lengthKey` is set to 0. + /// + /// Otherwise, the component's code-units are replaced with `[prefix][encoded-content]`, where `prefix` is a given ASCII character and + /// `encoded-content` is the result of percent-encoding the new value with `encodeSet`. The structure's `lengthKey` is set to the length + /// of the new code-units, including the single-character prefix. + /// + /// This simple strategy is sufficient for components which do not modify other components when they are modified. + /// For example, the query, fragment and port components may be changed without modifying any other parts of the URL. + /// However, components such as scheme, hostname, username and password require more complex logic to produce a normalized URL string -- + /// when changing the scheme, the port may also need to be modified; the hostname setter needs to deal with authority sigils, + /// and credentials have special logic for the credential separators. + /// This setter is sufficient for the former kind of components, but **does not include the necessary component-specific logic for the latter**. + /// + /// - parameters: + /// - component: The component to modify. + /// - newValue: The new value of the component. + /// - prefix: A single ASCII character to write before the new value. If `newValue` is not `nil`, this is _always_ written. + /// - lengthKey: The `URLStructure` field to update with the component's new length. Said length will include the single-character prefix. + /// - encodeSet: The `PercentEncodeSet` which should be used to encode the new value. + /// - adjustStructure: A closure which allows setting additional properties of the structure to be tweaked before writing. + /// This closure is invoked after the structure's `lengthKey` has been updated with the component's new length. + /// + @inlinable + internal mutating func setSimpleComponent( + _ component: WebURL.Component, + to newValue: UTF8Bytes?, + prefix: ASCII, + lengthKey: WritableKeyPath, Int>, + encodeSet: KeyPath, + adjustStructure: (inout URLStructure) -> Void = { _ in } + ) -> (newStorage: AnyURLStorage, newSubrange: Range) + where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8, EncodeSet: PercentEncodeSetProtocol { + + let oldStructure = header.structure + + guard let newBytes = newValue else { + guard let existingFragment = oldStructure.range(of: component) else { + return (AnyURLStorage(self), oldStructure.rangeForReplacingCodeUnits(of: component)) + } + var newStructure = oldStructure + newStructure[keyPath: lengthKey] = 0 + adjustStructure(&newStructure) + return removeSubrange(existingFragment, newStructure: newStructure) + } + + let (newLength, needsEncoding) = newBytes.lazy.percentEncodedGroups(as: encodeSet).encodedLength + + let bytesToWrite = 1 /* prefix char */ + newLength + let oldRange = oldStructure.rangeForReplacingCodeUnits(of: component) + + var newStructure = oldStructure + newStructure[keyPath: lengthKey] = bytesToWrite + adjustStructure(&newStructure) + + return replaceSubrange(oldRange, withUninitializedSpace: bytesToWrite, newStructure: newStructure) { dest in + dest[0] = prefix.codePoint + var bytesWritten = 1 + if needsEncoding { + bytesWritten += + UnsafeMutableBufferPointer(rebasing: dest.dropFirst()) + .fastInitialize(from: newBytes.lazy.percentEncoded(as: encodeSet)) + } else { + bytesWritten += + UnsafeMutableBufferPointer(rebasing: dest.dropFirst()) + .fastInitialize(from: newBytes.lazy.percentEncoded(as: encodeSet)) + } + return bytesWritten + } + } +} diff --git a/Sources/WebURL/URLStorage.swift b/Sources/WebURL/URLStorage.swift new file mode 100644 index 000000000..97c637edf --- /dev/null +++ b/Sources/WebURL/URLStorage.swift @@ -0,0 +1,967 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// -------------------------------------------- +// This file contains the primary types relating to storage and manipulation of URL strings. +// +// - URLStructure: The basic description of where the components are. +// +// - URLHeader [protocol]: A type which stores a particular kind of URLStructure. +// - URLStorage
: A type which owns a ManagedBuffer containing the header and URL code-units. +// - AnyURLStorage: A wrapper around URLStorage which erases its header type so you can use the URL without +// caring about that detail. +// +// - BasicURLHeader: A basic URLHeader which stores a URLStructure in its entirety. +// -------------------------------------------- + + +// -------------------------------------------- +// MARK: - URLStructure +// -------------------------------------------- + + +/// An object which can store the structure of any normalized URL string whose size is not greater than `SizeType.max`. +/// +/// The stored URL must be of the format: +/// [scheme + ":"] + [sigil]? + [username]? + [":" + password]? + ["@"]? + [hostname]? + [":" + port]? + ["/" + path]? + ["?" + query]? + ["#" + fragment]? +/// +/// If present, `sigil` must be either "//" to mark the beginning of an authority, or "/." to mark the beginning of the path. +/// A URL with an authority component requires an authority sigil; a URL without only an authority only requires a path sigil if the beginning if the path begins with "//". +/// +@usableFromInline +internal struct URLStructure { + + /// The length of the scheme, including trailing `:`. Must be greater than 1. + /// + @usableFromInline + internal var schemeLength: SizeType + + /// The length of the username component, not including any username-password or username-hostname separator which may be present. + /// If zero, no username is present. If the URL does not have an authority, this component cannot be present. + /// + /// If _either_ `usernameLength` or `passwordLength` are non-zero, there is a separator before the hostname. + /// Otherwise, there is no separator before the hostname. + /// + @usableFromInline + internal var usernameLength: SizeType + + /// The length of the password component, including leading `:`. Must be either 0 or greater than 1. + /// If zero, no password is present. A password may be present even if a username is not (e.g. `foo://:pass@host.com/`). + /// If the URL does not have an authority, this component cannot be present. + /// + /// If _either_ `usernameLength` or `passwordLength` are non-zero, there is a separator before the hostname. + /// Otherwise, there is no separator before the hostname. + /// + @usableFromInline + internal var passwordLength: SizeType + + /// The length of the hostname, not including any leading or trailing separators. + /// + /// The difference between an empty and not-present host is the presence of an authority (as denoted by the presence of an authority sigil). + /// If `sigil == .authority`, a length of zero indicates an empty hostname (e.g. `foo://?query`). + /// If the URL does not have an authority, this component cannot be present. + /// + @usableFromInline + internal var hostnameLength: SizeType + + /// The length of the port, including leading `:`. Must be either 0 or greater than 1. + /// If zero, no port is present. If the URL does not have an authority, this component cannot be present. + /// + @usableFromInline + internal var portLength: SizeType + + /// The length of the path. If zero, no path is present. + /// + @usableFromInline + internal var pathLength: SizeType + + /// The length of the query. If zero, no query is present. + /// + @usableFromInline + internal var queryLength: SizeType + + /// The length of the fragment. If zero, no query is present. + /// + @usableFromInline + internal var fragmentLength: SizeType + + /// The length of the first path component. If zero, the path does not contain any components (e.g. it may not have a path, or may be a 'cannot-be-a-base' URL). + /// + @usableFromInline + internal var firstPathComponentLength: SizeType + + /// The sigil, if present. The sigil comes immediately after the scheme and identifies the component following it. + /// + /// If `sigil == .authority`, the next component is an authority, consisting of username/password/hostname/port components. + /// If `sigil == .path` or `sigil == nil`, the next component is a path/query/fragment and no username/password/hostname/port is present. + /// + @usableFromInline + internal var sigil: Sigil? + + /// A summary of this URL's `scheme`. + /// + /// `SchemeKind` only contains information about which kind of special scheme this URL has. All non-special schemes are represented as the same, + /// so comparing the `schemeKind` doesn't necessarily mean that they have the same scheme. + /// + @usableFromInline + internal var schemeKind: WebURL.SchemeKind + + /// Whether this is a 'cannot-be-a-base' URL. + /// + /// Parsing a relative URL string against 'cannot-be-a-base' URLs will fail. This is the case for non-special URLs without an authority and whose path + /// does not begin with a `/` (e.g. `mailto:somebody@somehost.com` or `javascript:alert("hello")`. + /// + @usableFromInline + internal var cannotBeABaseURL: Bool + + /// Whether this URL's query string is known to be `application/x-www-form-urlencoded`. + /// + /// Only URLs without a query, or with an empty query, are required to set this flag when they are constructed. + /// + @usableFromInline + internal var queryIsKnownFormEncoded: Bool + + @inlinable + internal init( + schemeLength: SizeType, + usernameLength: SizeType, + passwordLength: SizeType, + hostnameLength: SizeType, + portLength: SizeType, + pathLength: SizeType, + queryLength: SizeType, + fragmentLength: SizeType, + firstPathComponentLength: SizeType, + sigil: Sigil?, + schemeKind: WebURL.SchemeKind, + cannotBeABaseURL: Bool, + queryIsKnownFormEncoded: Bool + ) { + self.schemeLength = schemeLength + self.usernameLength = usernameLength + self.passwordLength = passwordLength + self.hostnameLength = hostnameLength + self.portLength = portLength + self.pathLength = pathLength + self.queryLength = queryLength + self.fragmentLength = fragmentLength + self.firstPathComponentLength = firstPathComponentLength + self.sigil = sigil + self.schemeKind = schemeKind + self.cannotBeABaseURL = cannotBeABaseURL + self.queryIsKnownFormEncoded = queryIsKnownFormEncoded + } +} + +@usableFromInline +internal enum Sigil { + case authority + case path +} + +extension URLStructure { + + /// The code-unit offset where the scheme starts. Always 0. + /// + @inlinable + internal var schemeStart: SizeType { + 0 + } + + /// The code-unit offset after the scheme terminator. + /// For example, the string in `codeUnits[schemeStart..? { + guard hasAuthority else { return nil } + return Range(uncheckedBounds: (authorityStart, pathStart)) + } + + /// The range of code-units which must be replaced in order to change the URL's sigil. + /// If the URL does not contain a sigil, this property returns an empty range starting at the place where the sigil would go. + /// + @inlinable + internal var rangeForReplacingSigil: Range { + let start = schemeEnd + let length: SizeType + switch sigil { + case .none: length = 0 + case .authority, .path: length = 2 + } + return Range(uncheckedBounds: (start, start &+ length)) + } + + /// Returns the range of code-units which must be replaced in order to change the content of the given component. + /// If the component is not present, this method returns an empty range starting at the place where the component would go. + /// + /// - important: Replacing/inserting code-units alone may not be sufficient to produce a normalized URL string. + /// For example, inserting a `username` where there was none before may require a credentials separator (@) to be inserted, + /// and removing an authority may require the introduction of a path sigil, etc. + /// + @inlinable + internal func rangeForReplacingCodeUnits(of component: WebURL.Component) -> Range { + checkInvariants() + let start: SizeType + let length: SizeType + switch component { + case .scheme: + start = schemeStart + length = schemeLength + case .hostname: + start = hostnameStart + length = hostnameLength + case .username: + start = usernameStart + length = usernameLength + case .password: + start = passwordStart + length = passwordLength + case .port: + start = portStart + length = portLength + case .path: + start = pathStart + length = pathLength + case .query: + start = queryStart + length = queryLength + case .fragment: + start = fragmentStart + length = fragmentLength + default: + preconditionFailure("Invalid component") + } + return Range(uncheckedBounds: (start, start &+ length)) + } + + /// Returns the range of code-units containing the content of the given component. If the component is not present, this method returns `nil`. + /// The returned range may contain leading/trailing separators, depending on the component. + /// + @inlinable + internal func range(of component: WebURL.Component) -> Range? { + let range = rangeForReplacingCodeUnits(of: component) + switch component { + case .scheme: + break + // Hostname may be empty. Presence is indicated by authority sigil. + case .hostname: + guard hasAuthority else { return nil } + // Other components may not be both present and empty. + // A length of 0 means "not present"/nil. + case .username: + guard usernameLength > 0 else { return nil } + case .password: + guard passwordLength > 0 else { return nil } + case .port: + guard portLength > 0 else { return nil } + case .path: + guard pathLength > 0 else { return nil } + case .query: + guard queryLength > 0 else { return nil } + case .fragment: + guard fragmentLength > 0 else { return nil } + default: + preconditionFailure("Invalid component") + } + return range + } +} + +extension URLStructure { + + /// Whether or not this URL has an authority, as denoted by the presence of an authority sigil. + /// + @inlinable + internal var hasAuthority: Bool { + if case .authority = sigil { + return true + } else { + return false + } + } + + @inlinable + internal var hasPathSigil: Bool { + if case .path = sigil { + return true + } else { + return false + } + } + + /// Whether the path described by this structure requires a path sigil when no authority is present. + /// + @inlinable + internal var pathRequiresSigil: Bool { + firstPathComponentLength == 1 && pathLength > 1 + } + + /// If the string has credentials, it must contain a '@' separating them from the hostname. If it doesn't, it mustn't. + /// + @inlinable + internal var hasCredentialSeparator: Bool { + usernameLength != 0 || passwordLength != 0 + } + + /// Whether the URL string has one or more of username/password/port. + /// + @inlinable + internal var hasCredentialsOrPort: Bool { + usernameLength != 0 || passwordLength != 0 || portLength != 0 + } + + /// > A URL cannot have a username/password/port if its host is null or the empty string, + /// > its cannot-be-a-base-URL is true, or its scheme is "file". + /// + /// https://url.spec.whatwg.org/#url-miscellaneous + /// + @inlinable + internal var cannotHaveCredentialsOrPort: Bool { + schemeKind == .file || cannotBeABaseURL || hostnameLength == 0 + } +} + +extension URLStructure { + + /// Creates a new URL structure with the same information as `other`, but whose values are stored using this structure's integer type. + /// This initializer will trigger a runtime error if this structure's integer type is not capable of exactly representing the structure described by `other`. + /// + @inlinable + internal init(copying other: URLStructure) { + if let sameTypeOtherStructure = other as? Self { + self = sameTypeOtherStructure + return + } + self.init( + schemeLength: SizeType(other.schemeLength), + usernameLength: SizeType(other.usernameLength), + passwordLength: SizeType(other.passwordLength), + hostnameLength: SizeType(other.hostnameLength), + portLength: SizeType(other.portLength), + pathLength: SizeType(other.pathLength), + queryLength: SizeType(other.queryLength), + fragmentLength: SizeType(other.fragmentLength), + firstPathComponentLength: SizeType(other.firstPathComponentLength), + sigil: other.sigil, + schemeKind: other.schemeKind, + cannotBeABaseURL: other.cannotBeABaseURL, + queryIsKnownFormEncoded: other.queryIsKnownFormEncoded + ) + checkInvariants() + } + + /// An `URLStructure` whose component lengths are all 0 and flags are bogus values. + /// Since the scheme length is 0, this structure **does not describe a valid URL string**. + /// + /// This should only be used by the `StructureAndMetricsCollector`. + /// + @inlinable + internal static func invalidEmptyStructure() -> URLStructure { + return URLStructure( + schemeLength: 0, + usernameLength: 0, + passwordLength: 0, + hostnameLength: 0, + portLength: 0, + pathLength: 0, + queryLength: 0, + fragmentLength: 0, + firstPathComponentLength: 0, + sigil: nil, + schemeKind: .other, + cannotBeABaseURL: false, + queryIsKnownFormEncoded: false + ) + } +} + +extension URLStructure { + + @usableFromInline + internal func describesSameStructure(as other: Self) -> Bool { + schemeLength == other.schemeLength && usernameLength == other.usernameLength + && passwordLength == other.passwordLength && hostnameLength == other.hostnameLength + && portLength == other.portLength && pathLength == other.pathLength + && firstPathComponentLength == other.firstPathComponentLength && queryLength == other.queryLength + && fragmentLength == other.fragmentLength && sigil == other.sigil && schemeKind == other.schemeKind + && cannotBeABaseURL == other.cannotBeABaseURL + } + + /// Performs debug-mode checks to ensure that this URL structure does not contain invalid combinations of values. + /// + /// This method does not check the _contents_ of the URL string (e.g. it does not check that `schemeKind` matches the code-units of the scheme, that the sigil + /// or any other expected separators are actually present, etc). + /// + #if DEBUG + @usableFromInline + internal func checkInvariants() { + + // No values may be negative. + assert(schemeLength >= 0, "Scheme has negative length") + assert(usernameLength >= 0, "Username has negative length") + assert(passwordLength >= 0, "Password has negative length") + assert(hostnameLength >= 0, "Hostname has negative length") + assert(portLength >= 0, "Port has negative length") + assert(pathLength >= 0, "Path has negative length") + assert(queryLength >= 0, "Query has negative length") + assert(fragmentLength >= 0, "Fragment has negative length") + assert(firstPathComponentLength >= 0, "First Path Component has negative length") + + assert(schemeLength > 1, "Scheme must be present, cannot be empty") + assert(passwordLength != 1, "Password is an orphaned separator, which is invalid") + assert(portLength != 1, "Port is an orphaned separator, which is invalid") + + switch sigil { + case .authority: + break + case .path: + assert(firstPathComponentLength == 1, "Path sigil present, but path does not begin with an empty component") + assert(pathLength > 1, "Path sigil present, but path is too short to need one") + fallthrough + default: + assert(usernameLength == 0, "A URL without authority cannot have a username") + assert(passwordLength == 0, "A URL without authority cannot have a password") + assert(hostnameLength == 0, "A URL without authority cannot have a hostname") + assert(portLength == 0, "A URL without authority cannot have a port") + } + + if cannotBeABaseURL { + assert(sigil == nil, "cannot-be-a-base URLs cannot have an authority or path sigil") + } + if schemeKind.isSpecial { + assert(sigil == .authority, "URLs with special schemes must have an authority") + assert(pathLength != 0, "URLs with special schemes must have a path") + assert(!cannotBeABaseURL, "URLs with special schemes are never cannot-be-a-base") + } + + if cannotHaveCredentialsOrPort { + assert(usernameLength == 0, "URL cannot have credentials or port, but has a username") + assert(passwordLength == 0, "URL cannot have credentials or port, but has a password") + assert(portLength == 0, "URL cannot have credentials or port, but has a port") + } + + if queryLength == 0 || queryLength == 1 { + assert(queryIsKnownFormEncoded, "Empty and nil queries must always be flagged as being form-encoded") + } + + if cannotBeABaseURL { + assert(firstPathComponentLength == 0, "cannot-be-a-base URLs do not have path components") + } else { + assert(firstPathComponentLength <= pathLength, "First path component is longer than the entire path") + if pathLength != 0 { + assert(firstPathComponentLength != 0, "First path component length not set") + } + } + } + #else + @inlinable @inline(__always) + func checkInvariants() {} + #endif +} + +extension Sigil { + + /// The number of bytes required to write the sigil's code-units. + /// + @inlinable + internal var length: Int { + return 2 + } + + /// Writes the sigil's code-units to the given buffer. The buffer must contain at least 2 bytes of space. + /// + /// - returns: The actual number of bytes written (always 2, unless the buffer is `nil`). + /// + @inlinable + internal func unsafeWrite(to buffer: inout UnsafeMutableBufferPointer) -> Int { + guard let ptr = buffer.baseAddress else { return 0 } + switch self { + case .authority: + ptr.initialize(repeating: ASCII.forwardSlash.codePoint, count: 2) + case .path: + ptr[0] = ASCII.forwardSlash.codePoint + ptr[1] = ASCII.period.codePoint + } + return 2 + } +} + + +// -------------------------------------------- +// MARK: - URLStorage +// -------------------------------------------- + + +/// A `ManagedBufferHeader` which stores a URL's structure. +/// +/// When a URL is constructed or mutated, the parser or setter function first calculates the structure and required capacity of the resulting normalized URL string. +/// +/// For mutations, `AnyURLStorage.isOptimalStorageType(_:requiredCapacity:structure:)` is consulted to check if the existing +/// header type is appropriate for the resulting string. If it is, the existing capacity is sufficient, and the storage is uniquely referenced, the modification occurs in-place. +/// Otherwise, `AnyURLStorage(optimalStorageForCapacity:structure:initializingCodeUnitsWith:)` is used to create storage with +/// the appropriate header type. +/// +@usableFromInline +internal protocol URLHeader: ManagedBufferHeader { + + /// Returns an `AnyURLStorage` which wraps the given storage object. + /// + /// - Important: This means the only types that may conform to `URLHeader` are those supported by `AnyURLStorage`. + /// + static func eraseToAnyURLStorage(_ storage: URLStorage) -> AnyURLStorage + + /// Creates a new header with the given structure. The header's `capacity` and `count` are not specified, and through the `ManagedBufferHeader` + /// interface when the header is attached to storage and that storage populated with code-units. + /// + /// The header must be capable of exactly reproducing the given structure. Otherwise, this initializer must trigger a runtime error. + /// + init(structure: URLStructure) + + /// Updates the URL structure stored by this header to reflect some prior change to the associated code-units. + /// + /// This method only updates the description of the URL's structure; it **does not** alter the header's `count` or `capacity`, + /// which the operations modifying the code-units are expected to keep accurate. + /// + /// The header must be capable of exactly reproducing the given structure. Otherwise, this initializer must trigger a runtime error. + /// + mutating func copyStructure(from newStructure: URLStructure) + + /// The structure of the URL string stored in the code-units associated with this header. + /// + var structure: URLStructure { get } +} + +/// The primary type responsible for URL storage. +/// +/// An `URLStorage` object wraps a `ManagedArrayBuffer`, containing the normalized URL string's contiguous code-units, together +/// with a header describing the structure of the URL components within those code-units. Headers may store that description in different ways, +/// and may not support all possible URL strings; mutating functions must make sure to allocate storage with an appropriate header type for the +/// resulting URL string. The `AnyURLStorage` type is able to advise, create, and abstract over variations in header type. +/// +/// `URLStorage` has value semantics via `ManagedArrayBuffer`, with modifications to multiply-referenced storage copying on write. +/// +@usableFromInline +internal struct URLStorage { + + @usableFromInline + internal var codeUnits: ManagedArrayBuffer + + @inlinable + internal var header: Header { + get { return codeUnits.header } + _modify { yield &codeUnits.header } + } + + /// Allocates new storage with sufficient capacity to store `count` code-units, and a header describing the given `structure`. + /// The `initializer` closure is invoked to write the code-units, and must return the number of code-units initialized. + /// + /// If the header cannot exactly reproduce the given `structure`, a runtime error is triggered. + /// Use `AnyURLStorage` to allocate storage with the appropriate header for a given structure. + /// + /// - parameters: + /// - count: The number of UTF8 code-units contained in the normalized URL string that `initializer` will write to the new storage. + /// - structure: The structure of the normalized URL string that `initializer` will write to the new storage. + /// - initializer: A closure which must initialize exactly `count` code-units in the buffer pointer it is given, matching the normalized URL string + /// described by `structure`. The closure returns the number of bytes actually written to storage, which should be + /// calculated by the closure independently as it writes the contents, which serves as a safety check to avoid exposing uninitialized storage. + /// + @inlinable + internal init( + count: Int, + structure: URLStructure, + initializingCodeUnitsWith initializer: (inout UnsafeMutableBufferPointer) -> Int + ) { + self.codeUnits = ManagedArrayBuffer(minimumCapacity: count, initialHeader: Header(structure: structure)) + assert(self.codeUnits.count == 0) + assert(self.codeUnits.header.capacity >= count) + self.codeUnits.unsafeAppend(uninitializedCapacity: count) { buffer in initializer(&buffer) } + assert(self.codeUnits.header.count == count) + } +} + +extension URLStorage { + + @inlinable + internal func withUTF8OfAllAuthorityComponents( + _ body: ( + _ authorityString: UnsafeBufferPointer?, + _ usernameLength: Int, + _ passwordLength: Int, + _ hostnameLength: Int, + _ portLength: Int + ) -> T + ) -> T { + let structure = header.structure + guard let range = structure.rangeOfAuthorityString else { return body(nil, 0, 0, 0, 0) } + // Note: ManagedArrayBuffer.withUnsafeBufferPointer(range:) is bounds-checked. + return codeUnits.withUnsafeBufferPointer(range: range) { buffer in + body(buffer, structure.usernameLength, structure.passwordLength, structure.hostnameLength, structure.portLength) + } + } +} + + +// -------------------------------------------- +// MARK: - AnyURLStorage +// -------------------------------------------- + + +/// This enum serves like an existential for `URLStorage` with a limited set of supported header types. +/// It is also able to determine the optimal header type for a `URLStructure`. +/// +@usableFromInline +internal enum AnyURLStorage { + case small(URLStorage>) + case large(URLStorage>) + + @inlinable + internal init(_ storage: URLStorage) { + self = T.eraseToAnyURLStorage(storage) + } +} + +extension AnyURLStorage { + + /// Allocates a new storage object, with the header type best-suited for a normalized URL string with the given size and structure. + /// The `initializer` closure is invoked to write the code-units, and must return the number of code-units initialized. + /// + /// - parameters: + /// - count: The number of UTF8 code-units contained in the normalized URL string that `initializer` will write to the new storage. + /// - structure: The structure of the normalized URL string that `initializer` will write to the new storage. + /// - initializer: A closure which must initialize exactly `count` code-units in the buffer pointer it is given, matching the normalized URL string + /// described by `structure`. The closure returns the number of bytes actually written to storage, which should be + /// calculated by the closure independently as it writes the contents, which serves as a safety check to avoid exposing uninitialized storage. + /// + @inlinable + internal init( + optimalStorageForCapacity count: Int, + structure: URLStructure, + initializingCodeUnitsWith initializer: (inout UnsafeMutableBufferPointer) -> Int + ) { + if count <= UInt8.max { + self = .small( + URLStorage>(count: count, structure: structure, initializingCodeUnitsWith: initializer) + ) + } else { + self = .large( + URLStorage>(count: count, structure: structure, initializingCodeUnitsWith: initializer) + ) + } + } + + /// Whether or not `type` is the optimal storage type for a normalized URL string of the given size and structure. + /// It should be assumed that types which return `false` cannot store a URL with the given structure at all, + /// and that attempting to do so will trigger a runtime error. + /// + @inlinable + internal static func isOptimalStorageType( + _ type: URLStorage.Type, requiredCapacity: Int, structure: URLStructure + ) -> Bool { + if requiredCapacity <= UInt8.max { + return type == URLStorage>.self + } + return type == URLStorage>.self + } +} + +extension AnyURLStorage { + + @inlinable + internal var structure: URLStructure { + switch self { + case .small(let storage): return storage.header.structure + case .large(let storage): return storage.header.structure + } + } + + @inlinable + internal var schemeKind: WebURL.SchemeKind { + structure.schemeKind + } + + @inlinable + internal var cannotBeABaseURL: Bool { + structure.cannotBeABaseURL + } + + @inlinable + internal func withUTF8OfAllAuthorityComponents( + _ body: ( + _ authorityString: UnsafeBufferPointer?, + _ usernameLength: Int, + _ passwordLength: Int, + _ hostnameLength: Int, + _ portLength: Int + ) -> R + ) -> R { + switch self { + case .small(let storage): return storage.withUTF8OfAllAuthorityComponents(body) + case .large(let storage): return storage.withUTF8OfAllAuthorityComponents(body) + } + } +} + +/// The URL `a:` - essentially the smallest valid URL string. This is a used to temporarily occupy an `AnyURLStorage`, +/// so that its _actual_ storage can be moved to a uniquely-referenced local variable. +/// +/// It should not be possible to observe a URL whose storage is set to this object. +/// +@usableFromInline +internal let _tempStorage = AnyURLStorage( + URLStorage>( + count: 2, + structure: URLStructure( + schemeLength: 2, usernameLength: 0, passwordLength: 0, hostnameLength: 0, + portLength: 0, pathLength: 0, queryLength: 0, fragmentLength: 0, firstPathComponentLength: 0, + sigil: nil, schemeKind: .other, cannotBeABaseURL: true, queryIsKnownFormEncoded: true), + initializingCodeUnitsWith: { buffer in + buffer[0] = ASCII.a.codePoint + buffer[1] = ASCII.colon.codePoint + return 2 + } + ) +) + +extension AnyURLStorage { + + @inlinable + internal mutating func withUnwrappedMutableStorage( + _ small: (inout URLStorage>) -> (AnyURLStorage), + _ large: (inout URLStorage>) -> (AnyURLStorage) + ) { + // We need to go through a bit of a dance in order to get a unique reference to the storage. + // It's like if you have something stuck to one hand and try to remove it with the other hand. + // + // Basically: + // 1. Swap our storage to temporarily point to some read-only global, so our only storage reference is + // via a local variable. + // 2. Extract the URLStorage (which is a COW value type) from local variable's enum payload, and set + // the local to also point that read-only global. + // 3. Hand that extracted storage off to closure `inout`, which does what it wants and + // returns a storage object back (possibly the same storage object). + // 4. We round it all off by assigning that value as our new storage. Phew. + var localRef = self + self = _tempStorage + switch localRef { + case .large(var extracted_storage): + localRef = _tempStorage + self = large(&extracted_storage) + case .small(var extracted_storage): + localRef = _tempStorage + self = small(&extracted_storage) + } + } + + @inlinable + internal mutating func withUnwrappedMutableStorage( + _ small: (inout URLStorage>) -> (AnyURLStorage, URLSetterError?), + _ large: (inout URLStorage>) -> (AnyURLStorage, URLSetterError?) + ) throws { + // As above, but allows the closure to return a URLSetterError. + var error: URLSetterError? + var localRef = self + self = _tempStorage + switch localRef { + case .large(var extracted_storage): + localRef = _tempStorage + (self, error) = large(&extracted_storage) + case .small(var extracted_storage): + localRef = _tempStorage + (self, error) = small(&extracted_storage) + } + if let error = error { + throw error + } + } +} + + +// -------------------------------------------- +// MARK: - BasicURLHeader +// -------------------------------------------- + + +/// A marker protocol for integer types supported by `AnyURLStorage` when wrapping a `URLStorage>`. +/// +@usableFromInline +internal protocol AnyURLStorageSupportedBasicHeaderSize: FixedWidthInteger { + + /// Wraps the given `storage` in the appropriate `AnyURLStorage`. + /// + static func _eraseToAnyURLStorage(_ storage: URLStorage>) -> AnyURLStorage +} + +extension Int: AnyURLStorageSupportedBasicHeaderSize { + + @inlinable + internal static func _eraseToAnyURLStorage(_ storage: URLStorage>) -> AnyURLStorage { + return .large(storage) + } +} + +extension UInt8: AnyURLStorageSupportedBasicHeaderSize { + + @inlinable + internal static func _eraseToAnyURLStorage(_ storage: URLStorage>) -> AnyURLStorage { + return .small(storage) + } +} + +/// A `ManagedBufferHeader` containing a complete `URLStructure` and size-appropriate `count` and `capacity` fields. +/// +@usableFromInline +internal struct BasicURLHeader { + + @usableFromInline + internal var _count: SizeType + + @usableFromInline + internal var _capacity: SizeType + + @usableFromInline + internal var _structure: URLStructure + + @inlinable + internal init(_count: SizeType, _capacity: SizeType, structure: URLStructure) { + self._count = _count + self._capacity = _capacity + self._structure = structure + } + + @inlinable + internal static func _closestAddressableCapacity(to idealCapacity: Int) -> SizeType { + if idealCapacity <= Int(SizeType.max) { + return SizeType(idealCapacity) + } else { + return SizeType.max + } + } +} + +extension BasicURLHeader: ManagedBufferHeader { + + @inlinable + internal var count: Int { + get { return Int(_count) } + set { _count = SizeType(newValue) } + } + + @inlinable + internal var capacity: Int { + return Int(_capacity) + } + + @inlinable + internal func withCapacity(minimumCapacity: Int, maximumCapacity: Int) -> Self? { + let newCapacity = Self._closestAddressableCapacity(to: maximumCapacity) + guard newCapacity >= minimumCapacity else { + return nil + } + return Self(_count: _count, _capacity: newCapacity, structure: _structure) + } +} + +extension BasicURLHeader: URLHeader where SizeType: AnyURLStorageSupportedBasicHeaderSize { + + @inlinable + internal static func eraseToAnyURLStorage(_ storage: URLStorage) -> AnyURLStorage { + return SizeType._eraseToAnyURLStorage(storage) + } + + @inlinable + internal init(structure: URLStructure) { + self = .init(_count: 0, _capacity: 0, structure: URLStructure(copying: structure)) + } + + @inlinable + internal mutating func copyStructure(from newStructure: URLStructure) { + self._structure = URLStructure(copying: newStructure) + } + + @inlinable + internal var structure: URLStructure { + return URLStructure(copying: _structure) + } +} diff --git a/Sources/WebURL/Util/ASCII+LazyTextTransformations.swift b/Sources/WebURL/Util/ASCII+LazyTextTransformations.swift new file mode 100644 index 000000000..96be3f7d7 --- /dev/null +++ b/Sources/WebURL/Util/ASCII+LazyTextTransformations.swift @@ -0,0 +1,250 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +// -------------------------------------------- +// MARK: - Newline and tab filtering +// -------------------------------------------- + + +extension ASCII { + + /// Returns a collection which has the same contents as the given collection, but without any newline characters or horizontal tabs. + /// + /// If the only newline or tab characters are at the ends of the given collection, this method returns a trimmed `SubSequence` of the original data in order to + /// maintain the collection's performance characteristics. If the collection contains additional newlines or tabs, a lazily-filtering wrapper is returned instead. + /// + @inlinable + internal static func filterNewlinesAndTabs( + from utf8: UTF8Bytes + ) -> Either, UTF8Bytes.SubSequence> + where UTF8Bytes: BidirectionalCollection, UTF8Bytes.Element == UInt8 { + + let trimmedSlice = utf8.trim { isNewlineOrTab($0) } + if trimmedSlice.isEmpty == false, trimmedSlice.contains(where: { isNewlineOrTab($0) }) { + return .left(ASCII.NewlineAndTabFiltered(unchecked: trimmedSlice)) + } + return .right(trimmedSlice) + } + + /// If `true`, this character is a newline or tab (`0x0A` carriage return, `0x0D` line feed, or `0x09` horizontal tab). + /// + @inlinable + internal static func isNewlineOrTab(_ codeUnit: UInt8) -> Bool { + (codeUnit & 0b1111_1011 == 0b0000_1001) // horizontal tab (0x09) and carriage return (0x0D) + || codeUnit == ASCII.lineFeed.codePoint + } + + /// A collection of UTF8-encoded bytes with ASCII newline and tab characters lazily removed. + /// + @usableFromInline + internal struct NewlineAndTabFiltered where Base: Collection, Base.Element == UInt8 { + + @usableFromInline + internal let base: Base.SubSequence + + /// Creates a view over a slice whose start has already been trimmed of tabs and newlines. + /// + @inlinable + internal init(unchecked base: Base.SubSequence) { + assert(base.first.map { !ASCII.isNewlineOrTab($0) } ?? true, "slice has not been trimmed") + self.base = base + } + + /// Creates a view over a collection which may start with a tab or newline. + /// + @inlinable + internal init(_ base: Base) { + self.init(unchecked: base.drop(while: ASCII.isNewlineOrTab)) + } + } +} + +extension ASCII.NewlineAndTabFiltered: Collection { + + @usableFromInline typealias Index = Base.Index + @usableFromInline typealias Element = Base.Element + + @inlinable + internal var startIndex: Index { + base.startIndex + } + + @inlinable + internal var endIndex: Index { + base.endIndex + } + + @inlinable + internal subscript(position: Index) -> UInt8 { + base[position] + } + + @inlinable + internal subscript(bounds: Range) -> Self { + Self(unchecked: base[bounds]) + } + + @inlinable + internal func index(after i: Index) -> Index { + let next = base.index(after: i) + return base[Range(uncheckedBounds: (next, endIndex))].firstIndex { !ASCII.isNewlineOrTab($0) } ?? endIndex + } + + @inlinable + internal func formIndex(after i: inout Index) { + base.formIndex(after: &i) + i = base[Range(uncheckedBounds: (i, endIndex))].firstIndex { !ASCII.isNewlineOrTab($0) } ?? endIndex + } +} + +extension ASCII.NewlineAndTabFiltered: BidirectionalCollection where Base: BidirectionalCollection { + + @inlinable + internal func index(before i: Index) -> Index { + // Note that decrementing startIndex does not trap (BidirectionalCollection does not require it); + // it just keeps returning startIndex. + return base[Range(uncheckedBounds: (startIndex, i))].lastIndex { !ASCII.isNewlineOrTab($0) } ?? startIndex + } + + @inlinable + internal func formIndex(before i: inout Index) { + // Note that decrementing startIndex does not trap (BidirectionalCollection does not require it); + // it just keeps returning startIndex. + i = base[Range(uncheckedBounds: (startIndex, i))].lastIndex { !ASCII.isNewlineOrTab($0) } ?? startIndex + } +} + + +// -------------------------------------------- +// MARK: - Lowercasing +// -------------------------------------------- + + +extension ASCII { + + /// A collection of UTF8-encoded bytes with ASCII uppercase alpha characters (A-Z) lazily replaced with their lowercase counterparts. + /// Other characters are left unchanged. + /// + @usableFromInline + internal struct Lowercased where Base: Sequence, Base.Element == UInt8 { + + @usableFromInline + internal var base: Base + + @inlinable + internal init(_ base: Base) { + self.base = base + } + } +} + +extension ASCII.Lowercased: Sequence { + + @usableFromInline typealias Element = UInt8 + + @usableFromInline + internal struct Iterator: IteratorProtocol { + + @usableFromInline + internal var baseIterator: Base.Iterator + + @inlinable + internal init(baseIterator: Base.Iterator) { + self.baseIterator = baseIterator + } + + @inlinable + internal mutating func next() -> UInt8? { + baseIterator.next().flatMap { ASCII($0)?.lowercased.codePoint ?? $0 } + } + } + + @inlinable + internal func makeIterator() -> Iterator { + Iterator(baseIterator: base.makeIterator()) + } +} + +extension ASCII.Lowercased: Collection where Base: Collection { + + @usableFromInline typealias Index = Base.Index + + @inlinable + internal var startIndex: Index { + base.startIndex + } + + @inlinable + internal var endIndex: Index { + base.endIndex + } + + @inlinable + internal subscript(position: Index) -> UInt8 { + let byte = base[position] + return ASCII(byte)?.lowercased.codePoint ?? byte + } + + @inlinable + internal func index(after i: Index) -> Index { + base.index(after: i) + } + + @inlinable + internal func formIndex(after i: inout Index) { + base.formIndex(after: &i) + } + + @inlinable + internal func index(_ i: Index, offsetBy distance: Int, limitedBy limit: Index) -> Index? { + base.index(i, offsetBy: distance, limitedBy: limit) + } + + @inlinable + internal func formIndex(_ i: inout Index, offsetBy distance: Int, limitedBy limit: Index) -> Bool { + base.formIndex(&i, offsetBy: distance, limitedBy: limit) + } + + @inlinable + internal var count: Int { + base.count + } + + @inlinable + internal var isEmpty: Bool { + base.isEmpty + } + + @inlinable + internal func distance(from start: Index, to end: Index) -> Int { + base.distance(from: start, to: end) + } +} + +extension ASCII.Lowercased: BidirectionalCollection where Base: BidirectionalCollection { + + @inlinable + internal func index(before i: Index) -> Index { + base.index(before: i) + } + + @inlinable + internal func formIndex(before i: inout Index) { + base.formIndex(before: &i) + } +} + +extension ASCII.Lowercased: RandomAccessCollection where Base: RandomAccessCollection { +} diff --git a/Sources/WebURL/Util/ASCII.swift b/Sources/WebURL/Util/ASCII.swift new file mode 100644 index 000000000..da01ad9e3 --- /dev/null +++ b/Sources/WebURL/Util/ASCII.swift @@ -0,0 +1,550 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// An ASCII character. +/// +@usableFromInline +internal struct ASCII { + + /// The Unicode codepoint of this character (also the value of this character's UTF8 code-unit). + /// This value is validated at construction to be in the range `0..<128`. + /// + @usableFromInline + internal let codePoint: UInt8 +} + +extension ASCII { + + @inlinable @inline(__always) + internal init(_unchecked v: UInt8) { + assert(v & 0x7F == v, "Not an ASCII code point") + self.codePoint = v + } + + @inlinable @inline(__always) + internal init?(_ v: UInt8) { + guard v & 0x7F == v else { return nil } + self.init(_unchecked: v) + } + + @inlinable @inline(__always) + internal init?(flatMap v: UInt8?) { + guard let byte = v, byte & 0x7F == byte else { return nil } + self.init(_unchecked: byte) + } +} + +// Standard protocols. + +extension ASCII: Comparable, Equatable, CustomStringConvertible { + + @inlinable @inline(__always) + internal static func < (lhs: ASCII, rhs: ASCII) -> Bool { + lhs.codePoint < rhs.codePoint + } + + @inlinable @inline(__always) + internal static func == (lhs: ASCII, rhs: ASCII) -> Bool { + lhs.codePoint == rhs.codePoint + } + + @inlinable + internal var description: String { + String(decoding: CollectionOfOne(codePoint), as: UTF8.self) + } +} + + +// -------------------------------------------- +// MARK: - Character table +// -------------------------------------------- + + +// swift-format-ignore +extension ASCII { + + // C0 Control Characters. + @inlinable static var null : ASCII { ASCII(_unchecked: 0x00) } + @inlinable static var startOfHeading : ASCII { ASCII(_unchecked: 0x01) } + @inlinable static var startOfText : ASCII { ASCII(_unchecked: 0x02) } + @inlinable static var endOfText : ASCII { ASCII(_unchecked: 0x03) } + @inlinable static var endOfTransmission : ASCII { ASCII(_unchecked: 0x04) } + @inlinable static var enquiry : ASCII { ASCII(_unchecked: 0x05) } + @inlinable static var acknowledge : ASCII { ASCII(_unchecked: 0x06) } + @inlinable static var bell : ASCII { ASCII(_unchecked: 0x07) } + @inlinable static var backspace : ASCII { ASCII(_unchecked: 0x08) } + @inlinable static var horizontalTab : ASCII { ASCII(_unchecked: 0x09) } + @inlinable static var lineFeed : ASCII { ASCII(_unchecked: 0x0A) } + @inlinable static var verticalTab : ASCII { ASCII(_unchecked: 0x0B) } + @inlinable static var formFeed : ASCII { ASCII(_unchecked: 0x0C) } + @inlinable static var carriageReturn : ASCII { ASCII(_unchecked: 0x0D) } + @inlinable static var shiftOut : ASCII { ASCII(_unchecked: 0x0E) } + @inlinable static var shiftIn : ASCII { ASCII(_unchecked: 0x0F) } + @inlinable static var dataLinkEscape : ASCII { ASCII(_unchecked: 0x10) } + @inlinable static var deviceControl1 : ASCII { ASCII(_unchecked: 0x11) } + @inlinable static var deviceControl2 : ASCII { ASCII(_unchecked: 0x12) } + @inlinable static var deviceControl3 : ASCII { ASCII(_unchecked: 0x13) } + @inlinable static var deviceControl4 : ASCII { ASCII(_unchecked: 0x14) } + @inlinable static var negativeAcknowledge : ASCII { ASCII(_unchecked: 0x15) } + @inlinable static var synchronousIdle : ASCII { ASCII(_unchecked: 0x16) } + @inlinable static var endOfTransmissionBlock: ASCII { ASCII(_unchecked: 0x17) } + @inlinable static var cancel : ASCII { ASCII(_unchecked: 0x18) } + @inlinable static var endOfMedium : ASCII { ASCII(_unchecked: 0x19) } + @inlinable static var substitute : ASCII { ASCII(_unchecked: 0x1A) } + @inlinable static var escape : ASCII { ASCII(_unchecked: 0x1B) } + @inlinable static var fileSeparator : ASCII { ASCII(_unchecked: 0x1C) } + @inlinable static var groupSeparator : ASCII { ASCII(_unchecked: 0x1D) } + @inlinable static var recordSeparator : ASCII { ASCII(_unchecked: 0x1E) } + @inlinable static var unitSeparator : ASCII { ASCII(_unchecked: 0x1F) } + // Special Characters. + @inlinable static var space : ASCII { ASCII(_unchecked: 0x20) } + @inlinable static var exclamationMark : ASCII { ASCII(_unchecked: 0x21) } + @inlinable static var doubleQuotationMark: ASCII { ASCII(_unchecked: 0x22) } + @inlinable static var numberSign : ASCII { ASCII(_unchecked: 0x23) } + @inlinable static var dollarSign : ASCII { ASCII(_unchecked: 0x24) } + @inlinable static var percentSign : ASCII { ASCII(_unchecked: 0x25) } + @inlinable static var ampersand : ASCII { ASCII(_unchecked: 0x26) } + @inlinable static var apostrophe : ASCII { ASCII(_unchecked: 0x27) } + @inlinable static var leftParenthesis : ASCII { ASCII(_unchecked: 0x28) } + @inlinable static var rightParenthesis : ASCII { ASCII(_unchecked: 0x29) } + @inlinable static var asterisk : ASCII { ASCII(_unchecked: 0x2A) } + @inlinable static var plus : ASCII { ASCII(_unchecked: 0x2B) } + @inlinable static var comma : ASCII { ASCII(_unchecked: 0x2C) } + @inlinable static var minus : ASCII { ASCII(_unchecked: 0x2D) } + @inlinable static var period : ASCII { ASCII(_unchecked: 0x2E) } + @inlinable static var forwardSlash : ASCII { ASCII(_unchecked: 0x2F) } + // Numbers. + @inlinable static var n0: ASCII { ASCII(_unchecked: 0x30) } + @inlinable static var n1: ASCII { ASCII(_unchecked: 0x31) } + @inlinable static var n2: ASCII { ASCII(_unchecked: 0x32) } + @inlinable static var n3: ASCII { ASCII(_unchecked: 0x33) } + @inlinable static var n4: ASCII { ASCII(_unchecked: 0x34) } + @inlinable static var n5: ASCII { ASCII(_unchecked: 0x35) } + @inlinable static var n6: ASCII { ASCII(_unchecked: 0x36) } + @inlinable static var n7: ASCII { ASCII(_unchecked: 0x37) } + @inlinable static var n8: ASCII { ASCII(_unchecked: 0x38) } + @inlinable static var n9: ASCII { ASCII(_unchecked: 0x39) } + // Some punctuation. + @inlinable static var colon : ASCII { ASCII(_unchecked: 0x3A) } + @inlinable static var semicolon : ASCII { ASCII(_unchecked: 0x3B) } + @inlinable static var lessThanSign : ASCII { ASCII(_unchecked: 0x3C) } + @inlinable static var equalSign : ASCII { ASCII(_unchecked: 0x3D) } + @inlinable static var greaterThanSign: ASCII { ASCII(_unchecked: 0x3E) } + @inlinable static var questionMark : ASCII { ASCII(_unchecked: 0x3F) } + @inlinable static var commercialAt : ASCII { ASCII(_unchecked: 0x40) } + // Upper-case letters. + @inlinable static var A: ASCII { ASCII(_unchecked: 0x41) } + @inlinable static var B: ASCII { ASCII(_unchecked: 0x42) } + @inlinable static var C: ASCII { ASCII(_unchecked: 0x43) } + @inlinable static var D: ASCII { ASCII(_unchecked: 0x44) } + @inlinable static var E: ASCII { ASCII(_unchecked: 0x45) } + @inlinable static var F: ASCII { ASCII(_unchecked: 0x46) } + @inlinable static var G: ASCII { ASCII(_unchecked: 0x47) } + @inlinable static var H: ASCII { ASCII(_unchecked: 0x48) } + @inlinable static var I: ASCII { ASCII(_unchecked: 0x49) } + @inlinable static var J: ASCII { ASCII(_unchecked: 0x4A) } + @inlinable static var K: ASCII { ASCII(_unchecked: 0x4B) } + @inlinable static var L: ASCII { ASCII(_unchecked: 0x4C) } + @inlinable static var M: ASCII { ASCII(_unchecked: 0x4D) } + @inlinable static var N: ASCII { ASCII(_unchecked: 0x4E) } + @inlinable static var O: ASCII { ASCII(_unchecked: 0x4F) } + @inlinable static var P: ASCII { ASCII(_unchecked: 0x50) } + @inlinable static var Q: ASCII { ASCII(_unchecked: 0x51) } + @inlinable static var R: ASCII { ASCII(_unchecked: 0x52) } + @inlinable static var S: ASCII { ASCII(_unchecked: 0x53) } + @inlinable static var T: ASCII { ASCII(_unchecked: 0x54) } + @inlinable static var U: ASCII { ASCII(_unchecked: 0x55) } + @inlinable static var V: ASCII { ASCII(_unchecked: 0x56) } + @inlinable static var W: ASCII { ASCII(_unchecked: 0x57) } + @inlinable static var X: ASCII { ASCII(_unchecked: 0x58) } + @inlinable static var Y: ASCII { ASCII(_unchecked: 0x59) } + @inlinable static var Z: ASCII { ASCII(_unchecked: 0x5A) } + // More special characters. + @inlinable static var leftSquareBracket : ASCII { ASCII(_unchecked: 0x5B) } + @inlinable static var backslash : ASCII { ASCII(_unchecked: 0x5C) } + @inlinable static var rightSquareBracket: ASCII { ASCII(_unchecked: 0x5D) } + @inlinable static var circumflexAccent : ASCII { ASCII(_unchecked: 0x5E) } + @inlinable static var underscore : ASCII { ASCII(_unchecked: 0x5F) } + @inlinable static var backtick : ASCII { ASCII(_unchecked: 0x60) } + // Lower-case letters. + @inlinable static var a: ASCII { ASCII(_unchecked: 0x61) } + @inlinable static var b: ASCII { ASCII(_unchecked: 0x62) } + @inlinable static var c: ASCII { ASCII(_unchecked: 0x63) } + @inlinable static var d: ASCII { ASCII(_unchecked: 0x64) } + @inlinable static var e: ASCII { ASCII(_unchecked: 0x65) } + @inlinable static var f: ASCII { ASCII(_unchecked: 0x66) } + @inlinable static var g: ASCII { ASCII(_unchecked: 0x67) } + @inlinable static var h: ASCII { ASCII(_unchecked: 0x68) } + @inlinable static var i: ASCII { ASCII(_unchecked: 0x69) } + @inlinable static var j: ASCII { ASCII(_unchecked: 0x6A) } + @inlinable static var k: ASCII { ASCII(_unchecked: 0x6B) } + @inlinable static var l: ASCII { ASCII(_unchecked: 0x6C) } + @inlinable static var m: ASCII { ASCII(_unchecked: 0x6D) } + @inlinable static var n: ASCII { ASCII(_unchecked: 0x6E) } + @inlinable static var o: ASCII { ASCII(_unchecked: 0x6F) } + @inlinable static var p: ASCII { ASCII(_unchecked: 0x70) } + @inlinable static var q: ASCII { ASCII(_unchecked: 0x71) } + @inlinable static var r: ASCII { ASCII(_unchecked: 0x72) } + @inlinable static var s: ASCII { ASCII(_unchecked: 0x73) } + @inlinable static var t: ASCII { ASCII(_unchecked: 0x74) } + @inlinable static var u: ASCII { ASCII(_unchecked: 0x75) } + @inlinable static var v: ASCII { ASCII(_unchecked: 0x76) } + @inlinable static var w: ASCII { ASCII(_unchecked: 0x77) } + @inlinable static var x: ASCII { ASCII(_unchecked: 0x78) } + @inlinable static var y: ASCII { ASCII(_unchecked: 0x79) } + @inlinable static var z: ASCII { ASCII(_unchecked: 0x7A) } + // More special characters. + @inlinable static var leftCurlyBracket : ASCII { ASCII(_unchecked: 0x7B) } + @inlinable static var verticalBar : ASCII { ASCII(_unchecked: 0x7C) } + @inlinable static var rightCurlyBracket: ASCII { ASCII(_unchecked: 0x7D) } + @inlinable static var tilde : ASCII { ASCII(_unchecked: 0x7E) } + @inlinable static var delete : ASCII { ASCII(_unchecked: 0x7F) } +} + + +// -------------------------------------------- +// MARK: - Character classes +// -------------------------------------------- + + +extension ASCII { + + @usableFromInline + internal struct ranges { + + @inlinable + internal static var c0Control: Range { + ASCII(_unchecked: 0x00).. { + ASCII(_unchecked: 0x30).. { + ASCII(_unchecked: 0x41).. { + ASCII(_unchecked: 0x61).. ASCII { + let table: StaticString = "0123456789ABCDEF" + return table.withUTF8Buffer { table in + ASCII(_unchecked: table[Int(number & 0x0F)]) + } + } + + /// Returns the lowercase hex digit corresponding to the low nibble of `number`. + /// + @inlinable + internal static func lowercaseHexDigit(of number: UInt8) -> ASCII { + let table: StaticString = "0123456789abcdef" + return table.withUTF8Buffer { table in + ASCII(_unchecked: table[Int(number & 0x0F)]) + } + } + + /// If `number` is in the range `0..<10`, returns the decimal digit corresponding to the value of `number`. + /// + @inlinable + internal static func decimalDigit(of number: UInt8) -> ASCII? { + return number < 10 ? uppercaseHexDigit(of: number) : nil + } + + /// Prints the decimal representation of `number` to the memory location given by `stringBuffer`. + /// A maximum of 3 bytes will be written. + /// + /// - returns: The number of bytes written to `stringBuffer`. + /// + @usableFromInline + internal static func writeDecimalString(for number: UInt8, to stringBuffer: UnsafeMutableRawPointer) -> UInt8 { + + var count: UInt8 = 0 + var remaining = number + do { + let digit: UInt8 + (digit, remaining) = remaining.quotientAndRemainder(dividingBy: 100) + if digit != 0 { + stringBuffer.storeBytes( + of: ASCII.decimalDigit(of: UInt8(truncatingIfNeeded: digit))!.codePoint, + toByteOffset: 0, + as: UInt8.self + ) + count += 1 + } + } + do { + let digit: UInt8 + (digit, remaining) = remaining.quotientAndRemainder(dividingBy: 10) + if count != 0 || digit != 0 { + stringBuffer.storeBytes( + of: ASCII.decimalDigit(of: UInt8(truncatingIfNeeded: digit))!.codePoint, + toByteOffset: Int(count), + as: UInt8.self + ) + count += 1 + } + } + stringBuffer.storeBytes( + of: ASCII.decimalDigit(of: UInt8(truncatingIfNeeded: remaining))!.codePoint, + toByteOffset: Int(count), + as: UInt8.self + ) + count += 1 + return count + } + + /// Prints the decimal representation of `number` to the memory location given by `stringBuffer`. + /// A maximum of 5 bytes will be written. + /// + /// - returns: The number of bytes written to `stringBuffer`. + /// + @usableFromInline + internal static func writeDecimalString(for number: UInt16, to stringBuffer: UnsafeMutableRawPointer) -> UInt8 { + + var count: UInt8 = 0 + var remaining = number + do { + let digit: UInt16 + (digit, remaining) = remaining.quotientAndRemainder(dividingBy: 10000) + if digit != 0 { + stringBuffer.storeBytes( + of: ASCII.decimalDigit(of: UInt8(truncatingIfNeeded: digit))!.codePoint, + toByteOffset: 0, + as: UInt8.self + ) + count += 1 + } + } + do { + let digit: UInt16 + (digit, remaining) = remaining.quotientAndRemainder(dividingBy: 1000) + if count != 0 || digit != 0 { + stringBuffer.storeBytes( + of: ASCII.decimalDigit(of: UInt8(truncatingIfNeeded: digit))!.codePoint, + toByteOffset: Int(count), + as: UInt8.self + ) + count += 1 + } + } + do { + let digit: UInt16 + (digit, remaining) = remaining.quotientAndRemainder(dividingBy: 100) + if count != 0 || digit != 0 { + stringBuffer.storeBytes( + of: ASCII.decimalDigit(of: UInt8(truncatingIfNeeded: digit))!.codePoint, + toByteOffset: Int(count), + as: UInt8.self + ) + count += 1 + } + } + do { + let digit: UInt16 + (digit, remaining) = remaining.quotientAndRemainder(dividingBy: 10) + if count != 0 || digit != 0 { + stringBuffer.storeBytes( + of: ASCII.decimalDigit(of: UInt8(truncatingIfNeeded: digit))!.codePoint, + toByteOffset: Int(count), + as: UInt8.self + ) + count += 1 + } + } + stringBuffer.storeBytes( + of: ASCII.decimalDigit(of: UInt8(truncatingIfNeeded: remaining))!.codePoint, + toByteOffset: Int(count), + as: UInt8.self + ) + count += 1 + return count + } + + /// Parses a 16-bit unsigned integer from a decimal representation contained in the given UTF-8 code-units. + /// + /// If parsing fails, it means the code-units contained a character which was not a decimal digit, or that the number overflows a 16-bit integer. + /// + @inlinable @inline(never) + internal static func parseDecimalU16( + from utf8: UTF8Bytes + ) -> UInt16? where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8 { + + var value: UInt16 = 0 + var idx = utf8.startIndex + while idx < utf8.endIndex, let digit = ASCII(utf8[idx])?.decimalNumberValue { + var (overflowM, overflowA) = (false, false) + (value, overflowM) = value.multipliedReportingOverflow(by: 10) + (value, overflowA) = value.addingReportingOverflow(UInt16(digit)) + if overflowM || overflowA { + return nil + } + idx = utf8.index(after: idx) + } + return idx < utf8.endIndex ? nil : value + } + + /// Prints the hex representation of `number` to the memory location given by `stringBuffer`. + /// A maximum of `B.bitWidth / 4` bytes will be written (e.g. 2 bytes for an 8-bit integer, 4 bytes for a 16-bit integer, etc). + /// + /// The hex representation is written without any leading zeroes, and in lowercase. + /// + /// - returns: The number of bytes written to `stringBuffer`. + /// + @inlinable + internal static func writeHexString( + for number: B, to stringBuffer: UnsafeMutableRawPointer + ) -> UInt8 where B: FixedWidthInteger & UnsignedInteger { + + var count: UInt8 = 0 + for nibbleIdx in 1..<(B.bitWidth / 4) { + let digit = number &>> (B.bitWidth - (nibbleIdx * 4)) + if count != 0 || digit != 0 { + stringBuffer.storeBytes( + of: ASCII.lowercaseHexDigit(of: UInt8(truncatingIfNeeded: digit)).codePoint, + toByteOffset: Int(count), + as: UInt8.self + ) + count += 1 + } + } + stringBuffer.storeBytes( + of: ASCII.lowercaseHexDigit(of: UInt8(truncatingIfNeeded: number)).codePoint, + toByteOffset: Int(count), + as: UInt8.self + ) + count += 1 + return count + } +} + + +// -------------------------------------------- +// MARK: - Misc +// -------------------------------------------- + + +extension ASCII { + + /// If this is an uppercase alpha character, returns its lowercase counterpart. Otherwise, returns `self`. + /// + @inlinable + internal var lowercased: ASCII { + guard ASCII.ranges.uppercaseAlpha.contains(self) else { return self } + return ASCII(_unchecked: codePoint | 0b00100000) + } + + /// A sequence of all possible ASCII characters. + /// + internal static var allCharacters: AnySequence { + AnySequence( + sequence(first: ASCII(_unchecked: 0x00)) { character in + ASCII(character.codePoint + 1) + } + ) + } +} diff --git a/Sources/WebURL/Util/Collection+longestRange.swift b/Sources/WebURL/Util/Collection+longestRange.swift new file mode 100644 index 000000000..4e7327646 --- /dev/null +++ b/Sources/WebURL/Util/Collection+longestRange.swift @@ -0,0 +1,73 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +extension Collection { + + /// Returns the longest subrange of elements satisfying the given predicate. + /// + /// In the case of a tie, the range closest to the start of the Collection is returned. + /// If no elements match the predicate, the returned range is empty and the returned length is 0. + /// + /// - parameters: + /// - predicate: The condition which elements should match. + /// - returns: A tuple containing the longest subrange matching the predicate, + /// as well as how many elements are contained within that range. + /// + @inlinable + internal func longestSubrange(satisfying predicate: (Element) throws -> Bool) rethrows + -> (subrange: Range, length: Int) + { + var idx = startIndex + var longest: (Range, length: Int) = (idx.. longest.length { longest = (current.start.. longest.length { + longest = (current.start.. (subrange: Range, length: Int) { + return longestSubrange { $0 == value } + } +} diff --git a/Sources/WebURL/Util/Collection+trim.swift b/Sources/WebURL/Util/Collection+trim.swift new file mode 100644 index 000000000..97d924bd4 --- /dev/null +++ b/Sources/WebURL/Util/Collection+trim.swift @@ -0,0 +1,43 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +extension BidirectionalCollection { + + /// Returns a `SubSequence` formed by discarding all elements at the start and end of the Collection + /// which satisfy the given predicate. + /// + /// e.g. `[2, 10, 11, 15, 20, 21, 100].trim(where: { $0.isMultiple(of: 2) })` == `[11, 15, 20, 21]` + /// + /// - parameters: + /// - predicate: A closure which determines if the element should be omitted from the resulting slice. + /// + @inlinable + internal func trim(where predicate: (Element) throws -> Bool) rethrows -> SubSequence { + var sliceStart = startIndex + var sliceEnd = endIndex + // Consume elements from the front. + while sliceStart != sliceEnd, try predicate(self[sliceStart]) { + sliceStart = index(after: sliceStart) + } + // Consume elements from the back only if the element at the "before" index matches the predicate. + while sliceStart != sliceEnd { + let idxBeforeSliceEnd = index(before: sliceEnd) + guard try predicate(self[idxBeforeSliceEnd]) else { + return self[sliceStart.. { + case left(Left) + case right(Right) +} + +extension Either { + + @inlinable + internal func map( + left transformLeft: (Left) -> NewLeft, + right transformRight: (Right) -> NewRight + ) -> Either { + switch self { + case .left(let value): return .left(transformLeft(value)) + case .right(let value): return .right(transformRight(value)) + } + } +} + +extension Either where Left == Right { + + /// Returns the value held by this container. + /// + @inlinable + internal func get() -> Left { + switch self { + case .left(let value): return value + case .right(let value): return value + } + } +} diff --git a/Sources/WebURL/Util/ManagedArrayBuffer.swift b/Sources/WebURL/Util/ManagedArrayBuffer.swift new file mode 100644 index 000000000..9bbc5549f --- /dev/null +++ b/Sources/WebURL/Util/ManagedArrayBuffer.swift @@ -0,0 +1,546 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +// -------------------------------------------- +// MARK: - AltManagedBufferReference +// -------------------------------------------- + + +/// A header for a managed buffer which includes `count` and `capacity` information. +/// These values are set by `ManagedArrayBuffer` and should never be modified outside of that. +/// +@usableFromInline +internal protocol ManagedBufferHeader { + + /// The number of initialized elements that are stored in the allocation attached to this header. + /// + /// It is imperative that this value be kept up-to-date at all times. + /// When `AltManagedBuffer` is deinitialized, elements in the range `0.. Self? +} + +/// An alternative `ManagedBuffer` interface, with the following differences: +/// +/// - It is a `struct` with reference semantics, rather than a `class`. +/// - The initial `header` must be given as a value at initialisation time. +/// - The `header` must store the buffer's `count`and `capacity`: +/// - The `capacity` is automatically set to the correct value when attaching the header to storage. +/// - Elements in the range `0.. { + + @usableFromInline + internal final class _Storage: ManagedBuffer { + + @inlinable + internal static func newBuffer(minimumCapacity: Int, initialHeader: Header) -> Self { + let buffer = Self.create(minimumCapacity: minimumCapacity) { unsafeBuffer in + let actualCapacity = unsafeBuffer.capacity + guard var newHdr = initialHeader.withCapacity(minimumCapacity: minimumCapacity, maximumCapacity: actualCapacity) + else { + preconditionFailure("Failed to create header with required capacity") + } + precondition((minimumCapacity...actualCapacity).contains(newHdr.capacity), "Header stored incorrect capacity") + // The header's `count` must be set to 0 (as the uninitialized buffer does not contain anything). + newHdr.count = 0 + return newHdr + } + return unsafeDowncast(buffer, to: Self.self) + } + + @inlinable + deinit { + // Swift does not specialize generic classes (often? maybe? not sure, but it's flaky). + // That means this deinit will never be eliminated, and everything we do here will be + // unspecialized generic code (which is **extremely** expensive). + // https://bugs.swift.org/browse/SR-13221 + if Swift._isPOD(Element.self) == false { + _ = withUnsafeMutablePointers { headerPtr, elemsPtr in + elemsPtr.deinitialize(count: headerPtr.pointee.count) + } + } + } + } + + @usableFromInline + internal var _wrapped: _Storage + + /// Creates a new, uninitialized buffer with the given header. + /// Note that the header's `capacity` is automatically set to the actual, allocated capacity, and the header's `count` is set to `0`. + /// + @inlinable + internal init(minimumCapacity: Int, initialHeader: Header) { + self._wrapped = _Storage.newBuffer(minimumCapacity: minimumCapacity, initialHeader: initialHeader) + } + + /// The stored `Header` instance. + /// + @inlinable + internal var header: Header { + get { return _wrapped.header } + _modify { yield &_wrapped.header } + } + + /// The number of elements that have been initialized in this buffer. + /// + @inlinable + internal var count: Int { + header.count + } + + /// The number of elements that can be stored in this buffer. + /// + @inlinable + internal var capacity: Int { + header.capacity + } + + /// Call `body` with an `UnsafeMutablePointer` to the stored `Header`. + /// + /// - Note: This pointer is valid only for the duration of the call to `body`. + /// + @inlinable + internal func withUnsafeMutablePointerToHeader( + _ body: (UnsafeMutablePointer
) throws -> R + ) rethrows -> R { + try _wrapped.withUnsafeMutablePointerToHeader(body) + } + + /// Call `body` with an `UnsafeMutablePointer` to the `Element` storage. + /// + /// - Note: This pointer is valid only for the duration of the call to `body`. + /// + @inlinable + internal func withUnsafeMutablePointerToElements( + _ body: (UnsafeMutablePointer) throws -> R + ) rethrows -> R { + try _wrapped.withUnsafeMutablePointerToElements(body) + } + + /// Call `body` with `UnsafeMutablePointer`s to the stored `Header` and raw `Element` storage. + /// + /// - Note: These pointers are valid only for the duration of the call to `body`. + /// + @inlinable + internal func withUnsafeMutablePointers( + _ body: (UnsafeMutablePointer
, UnsafeMutablePointer) throws -> R + ) rethrows -> R { + try _wrapped.withUnsafeMutablePointers(body) + } + + /// Whether or not this buffer is known to be uniquely-referenced. + /// + @inlinable @inline(__always) + internal mutating func isKnownUniqueReference() -> Bool { + return isKnownUniquelyReferenced(&_wrapped) + } + + /// Moves the contents of this buffer to a new buffer with the given capacity. + /// + /// The header is copied, although its `capacity` will be adjusted to reflect the new buffer's capacity. Afterwards, this buffer's `count` will be 0. + /// + /// - precondition: The given capacity must be sufficient to store all of this buffer's contents. + /// + @inlinable + internal func moveToNewBuffer(minimumCapacity: Int) -> Self { + let numElements = count + precondition(minimumCapacity >= numElements) + let newBuffer = Self.init(minimumCapacity: minimumCapacity, initialHeader: header) + assert(newBuffer.count == 0) + newBuffer.withUnsafeMutablePointers { destHeader, destElems in + self.withUnsafeMutablePointers { srcHeader, srcElems in + destElems.moveInitialize(from: srcElems, count: numElements) + srcHeader.pointee.count = 0 + } + destHeader.pointee.count = numElements + } + assert(newBuffer.count == numElements) + assert(count == 0) + return newBuffer + } + + /// Copies the contents of this buffer to a new buffer with the given capacity. + /// + /// The header is copied, although its `capacity` will be adjusted to reflect the new buffer's capacity. This buffer remains unchanged. + /// + /// - precondition: The given capacity must be sufficient to store all of this buffer's contents. + /// + @inlinable + internal func copyToNewBuffer(minimumCapacity: Int) -> Self { + let numElements = count + precondition(minimumCapacity >= numElements) + let newBuffer = Self.init(minimumCapacity: minimumCapacity, initialHeader: header) + assert(newBuffer.count == 0) + newBuffer.withUnsafeMutablePointers { destHeader, destElems in + self.withUnsafeMutablePointerToElements { srcElems in + destElems.initialize(from: srcElems, count: numElements) + } + destHeader.pointee.count = numElements + } + assert(newBuffer.count == numElements) + assert(count == numElements) + return newBuffer + } +} + + +// -------------------------------------------- +// MARK: - ManagedArrayBuffer +// -------------------------------------------- + + +/// A wrapper for an `AltManagedBufferReference` which aims to provide similar convenience methods and semantics to `Array`. +/// +/// In particular: +/// - This is a Copy-on-Write value type. +/// - The buffer's header is exposed as a property, mutations to which will trigger the buffer to copy to new storage if not a unique reference. +/// - The buffer's elements are exposed as a `RandomAccessCollection` for reading and a `MutableCollection` for writing. Again, mutations +/// will trigger a copy if not a unique reference. +/// - Indicies are bounds-checked. +/// +/// Some features of `Array` are not supported: +/// - No `RangeReplaceableCollection` conformance. However, this type does provide the `replaceSubrange` and `append` methods. +/// - No predictive growth strategy (although capacity may allocated in advance using `reserveCapacity`). +/// - No shrinking of storage, although a fresh allocation produced by copy-on-write during a no-op operation such as `reserveCapacity(0)` +/// will occupy the smallest possible space. +/// +@usableFromInline +internal struct ManagedArrayBuffer { + + @usableFromInline + internal var _storage: AltManagedBufferReference + + /// Creates a new `ManagedArrayBuffer` with the given minimum capacity and header. + /// + /// The new header's `count` is automatically set to `0`, and its `capacity` is set appropriately for the allocated storage. + /// + @inlinable + internal init(minimumCapacity: Int, initialHeader: Header) { + self._storage = .init(minimumCapacity: minimumCapacity, initialHeader: initialHeader) + } + + @inlinable @inline(__always) + internal mutating func ensureUnique() { + if !_storage.isKnownUniqueReference() { + _storage = _storage.copyToNewBuffer(minimumCapacity: count) + } + assert(_storage.isKnownUniqueReference()) + } + + /// The stored `Header` instance. + /// + @inlinable @inline(__always) + internal var header: Header { + get { + return _storage.header + } + _modify { + ensureUnique() + let preModifyCapacity = _storage.header.capacity + yield &_storage.header + assert(_storage.header.capacity == preModifyCapacity, "Invalid change of capacity") + } + } +} + +extension ManagedArrayBuffer { + + /// Ensures that this buffer's has sufficient capacity to store at least the specified number of elements. + /// + /// If the buffer already has sufficient capacity, calling this function will also ensure that it has a unique reference to its storage. + /// + @inlinable + internal mutating func reserveCapacity(_ minimumCapacity: Int) { + let isUnique = _storage.isKnownUniqueReference() + if _slowPath(!isUnique || _storage.capacity < minimumCapacity) { + let newCapacity = Swift.max(minimumCapacity, _storage.count) + if isUnique { + _storage = _storage.moveToNewBuffer(minimumCapacity: newCapacity) + } else { + _storage = _storage.copyToNewBuffer(minimumCapacity: newCapacity) + } + } + precondition(_storage.capacity >= minimumCapacity) + precondition(_storage.isKnownUniqueReference()) + } + + /// Appends space for the given number of objects, but leaves the initialization of that space to the given closure. + /// + /// - important: The closure must initialize **exactly** `uninitializedCapacity` elements, else a runtime error will be triggered. + /// - returns: The collection's new `endIndex`. + /// + @discardableResult @inlinable + internal mutating func unsafeAppend( + uninitializedCapacity: Int, initializingWith initializer: (inout UnsafeMutableBufferPointer) -> Int + ) -> Index { + + precondition(uninitializedCapacity >= 0, "Cannot append a negative number of elements") + let oldCount = self.count + let newCount = oldCount + uninitializedCapacity + reserveCapacity(newCount) + assert(_storage.isKnownUniqueReference(), "reserveCapacity should have made this unique") + + _storage.withUnsafeMutablePointerToElements { elements in + var uninitializedBuffer = UnsafeMutableBufferPointer(start: elements + oldCount, count: uninitializedCapacity) + let n = initializer(&uninitializedBuffer) + precondition(n == uninitializedCapacity) + } + _storage.header.count = newCount + return newCount + } + + @usableFromInline + internal struct _StorageHolder: BufferContainer { + + @usableFromInline + internal var bufferRef: AltManagedBufferReference + + @inlinable + internal init(bufferRef: AltManagedBufferReference) { + self.bufferRef = bufferRef + } + + @inlinable + func withUnsafeMutablePointerToElements(_ body: (UnsafeMutablePointer) throws -> R) rethrows -> R { + return try bufferRef.withUnsafeMutablePointerToElements(body) + } + } + + /// Replaces the given subrange with uninitialized space for a given number of objects, but leaves the initialization of that space to the given closure. + /// + /// - important: The closure must initialize **exactly** `uninitializedCapacity` elements, else a runtime error will be triggered. + /// - returns: The indices of the initialized elements. + /// + @discardableResult @inlinable + internal mutating func unsafeReplaceSubrange( + _ subrange: Range, + withUninitializedCapacity newSubrangeCount: Int, + initializingWith initializer: (inout UnsafeMutableBufferPointer) -> Int + ) -> Range { + + precondition(subrange.lowerBound >= startIndex && subrange.upperBound <= endIndex, "Range is out of bounds") + precondition(newSubrangeCount >= 0, "Cannot replace subrange with a negative number of elements") + let isUnique = _storage.isKnownUniqueReference() + let result = _storage.withUnsafeMutablePointerToElements { elems in + return replaceElements( + in: UnsafeMutableBufferPointer(start: elems, count: _storage.capacity), + initializedCount: _storage.count, + isUnique: isUnique, + subrange: subrange, + withElements: newSubrangeCount, + initializedWith: initializer, + storageConstructor: { _StorageHolder(bufferRef: .init(minimumCapacity: $0, initialHeader: _storage.header)) } + ) + } + // Update the count of our existing storage. Its contents may have been moved out. + self._storage.header.count = result.bufferCount + // Adopt any new storage that was allocated. + if var newStorage = result.newStorage?.bufferRef { + newStorage.header.count = result.newStorageCount + self._storage = newStorage + } + return subrange.lowerBound..<(subrange.lowerBound + result.insertedCount) + } + + /// Invokes `body` with a pointer to the mutable elements in the given range. + /// + /// `body` returns the new number of elements in the range, `n`, which must be less than or equal to the existing number of elements. + /// When `body` completes, the `n` elements at the start of the buffer must be initialized, and elements from `n` until the end of the given buffer + /// must be deinitialized. + /// + @discardableResult @inlinable + internal mutating func unsafeTruncate( + _ subrange: Range, _ body: (inout UnsafeMutableBufferPointer) -> Int + ) -> Index { + precondition(subrange.lowerBound >= startIndex && subrange.upperBound <= endIndex, "Range is out of bounds") + var removedElements = 0 + withUnsafeMutableBufferPointer { buffer in + var slice = UnsafeMutableBufferPointer(rebasing: buffer[subrange]) + let newSliceCount = body(&slice) + precondition(newSliceCount <= slice.count, "unsafeTruncate cannot initialize more content than it had space for") + // Space in the range newSliceCount.. Index { + i &+ 1 + } + + @inlinable + internal func index(before i: Index) -> Index { + i &- 1 + } + + @inlinable + internal func withContiguousStorageIfAvailable(_ body: (UnsafeBufferPointer) throws -> R) rethrows -> R? { + try withUnsafeBufferPointer(body) + } + + @inlinable + internal mutating func withContiguousMutableStorageIfAvailable( + _ body: (inout UnsafeMutableBufferPointer) throws -> R + ) rethrows -> R? { + try withUnsafeMutableBufferPointer(body) + } +} + +extension ManagedArrayBuffer: MutableCollection { + + @inlinable + internal subscript(position: Index) -> Element { + get { + precondition(position >= startIndex && position < endIndex, "Index out of bounds") + return _storage.withUnsafeMutablePointerToElements { $0.advanced(by: position).pointee } + } + set { + precondition(position >= startIndex && position < endIndex, "Index out of bounds") + ensureUnique() + return _storage.withUnsafeMutablePointerToElements { + $0.advanced(by: position).pointee = newValue + } + } + } +} + +// RRC-lite. + +extension ManagedArrayBuffer { + + @discardableResult @inlinable + internal mutating func replaceSubrange( + _ subrange: Range, with newElements: C + ) -> Range where C: Collection, Self.Element == C.Element { + unsafeReplaceSubrange(subrange, withUninitializedCapacity: newElements.count) { buffer in + buffer.fastInitialize(from: newElements) + } + } + + @discardableResult @inlinable + internal mutating func append( + contentsOf newElements: S + ) -> Range where S: Sequence, Self.Element == S.Element { + + let preAppendEnd = endIndex + + // TODO: [performance]: Use withContiguousStorageIfAvailable + var result: (S.Iterator, Int)? + unsafeAppend(uninitializedCapacity: newElements.underestimatedCount) { ptr in + result = ptr.initialize(from: newElements) + return result.unsafelyUnwrapped.1 + } + while let remaining = result?.0.next() { + append(remaining) + } + return Range(uncheckedBounds: (preAppendEnd, endIndex)) + } + + @discardableResult @inlinable + internal mutating func append(_ element: Element) -> Index { + append(contentsOf: CollectionOfOne(element)).lowerBound + } + + @discardableResult @inlinable + internal mutating func removeSubrange(_ subrange: Range) -> Index { + unsafeTruncate(subrange) { buffer in + buffer.baseAddress?.deinitialize(count: buffer.count) + return 0 + } + } +} + +// Extensions. + +extension ManagedArrayBuffer { + + @inlinable + internal func withUnsafeBufferPointer( + _ body: (UnsafeBufferPointer) throws -> R + ) rethrows -> R { + try _storage.withUnsafeMutablePointerToElements { + try body(UnsafeBufferPointer(start: $0, count: count)) + } + } + + @inlinable + internal mutating func withUnsafeMutableBufferPointer( + _ body: (inout UnsafeMutableBufferPointer) throws -> R + ) rethrows -> R { + ensureUnique() + return try _storage.withUnsafeMutablePointerToElements { + var ptr = UnsafeMutableBufferPointer(start: $0, count: count) + return try body(&ptr) + } + } + + @inlinable + internal func withUnsafeBufferPointer( + range: Range, _ block: (UnsafeBufferPointer) throws -> R + ) rethrows -> R { + precondition(range.startIndex >= startIndex && range.endIndex <= endIndex, "Range is out of bounds") + return try _storage.withUnsafeMutablePointerToElements { elements in + let slice = UnsafeBufferPointer(start: elements + range.startIndex, count: range.count) + return try block(slice) + } + } +} diff --git a/Sources/WebURL/Util/Pointers.swift b/Sources/WebURL/Util/Pointers.swift new file mode 100644 index 000000000..6676a9515 --- /dev/null +++ b/Sources/WebURL/Util/Pointers.swift @@ -0,0 +1,356 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +// -------------------------------------------- +// MARK: - Unaligned loads +// -------------------------------------------- + + +extension UnsafeRawPointer { + + /// Returns a new instance of the given type, constructed from the raw memory at the specified offset. + /// + /// The memory at this pointer plus offset must be initialized to `T` or another type that is layout compatible with `T`. + /// It does not need to be aligned for access to `T`. + /// + @inlinable @inline(__always) + internal func loadUnaligned(fromByteOffset offset: Int = 0, as: T.Type) -> T where T: FixedWidthInteger { + var val: T = 0 + withUnsafeMutableBytes(of: &val) { + $0.copyMemory(from: UnsafeRawBufferPointer(start: self + offset, count: MemoryLayout.stride)) + } + return val + } +} + + +// -------------------------------------------- +// MARK: - Fast initialize +// -------------------------------------------- + + +extension UnsafeMutableBufferPointer { + + /// Initializes the buffer’s memory with the given elements. + /// + /// When calling the initialize(from:) method on a buffer b, the memory referenced by b must be uninitialized or the Element type must be a trivial type. + /// After the call, the memory referenced by this buffer up to, but not including, the returned index is initialized. + /// The buffer must contain sufficient memory to accommodate source.underestimatedCount. + /// + /// The returned index is the position of the element in the buffer one past the last element written. + /// If source contains no elements, the returned index is equal to the buffer’s startIndex. + /// If source contains an equal or greater number of elements than the buffer can hold, the returned index is equal to the buffer’s endIndex. + /// + /// This is like the standard library's `initialize(from:)` method, except that it doesn't return an iterator of remaining elements from the source, + /// and thus is able to be significantly faster for sources which implement `withContiguousStorageIfAvailable`. + /// + @inlinable + internal func fastInitialize(from source: S) -> Int where S: Sequence, S.Element == Element { + // UMBP.initialize(from:) is slow with slices. https://bugs.swift.org/browse/SR-14491 + let _bytesWritten = source.withContiguousStorageIfAvailable { srcBuffer -> Int in + guard let srcAddress = srcBuffer.baseAddress else { return 0 } + let bytesToWrite = Swift.min(count, srcBuffer.count) + self.baseAddress?.initialize(from: srcAddress, count: bytesToWrite) + return bytesToWrite + } + if let bytesWritten = _bytesWritten { + return bytesWritten + } + return initialize(from: source).1 + } +} + + +// -------------------------------------------- +// MARK: - Pointers to tuple elements +// -------------------------------------------- + + +extension UnsafeRawBufferPointer { + + /// Returns a typed pointer to the memory referenced by this buffer, assuming that the memory is already bound to the specified type. + /// + /// This is equivalent to calling `UnsafeRawPointer.assumingMemoryBound` on this buffer's base address, and dividing this buffer's + /// `count` by the `stride` of the given type. Be sure to do lots of research on the above method before even thinking about using this. + /// + @inlinable @inline(__always) + internal func _assumingMemoryBound(to: T.Type) -> UnsafeBufferPointer { + guard let base = baseAddress else { + return .init(start: nil, count: 0) + } + // Question: If we 'assumingMemoryBound' the base address, can we just make a buffer with the correct 'count' + // and treat all of it as typed/bound? + // + // Answer: Yes. Unlike 'bindMemory', which calls a Builtin function [1] with the pointer address and number of + // elements and communicates to the compiler the entire _region_ of memory being bound, + // 'assumingMemoryBound' does nothing [2] - it doesn't call any Builtins, and simply constructs + // a typed pointer from an untyped one. + // + // That's what makes it so dangerous: as it doesn't actually communicate anything to the compiler + // about how the memory is being accessed, incorrect use can cause type-based anti-aliasing to miscompile. + // As the name suggests, we assume the compiler already knows - i.e. that the entire region has already + // been bound. + // + // [1]: https://github.com/apple/swift/blob/a0098c0174199b76473636af50699e21b688110c/stdlib/public/core/UnsafeRawBufferPointer.swift.gyb#L692 + // [2]: https://github.com/apple/swift/blob/a0098c0174199b76473636af50699e21b688110c/stdlib/public/core/UnsafeRawPointer.swift#L335 + return .init(start: base.assumingMemoryBound(to: to), count: count / MemoryLayout.stride) + } +} + +extension UnsafeMutableRawBufferPointer { + + /// Returns a typed pointer to the memory referenced by this buffer, assuming that the memory is already bound to the specified type. + /// + /// This is equivalent to calling `UnsafeMutableRawPointer.assumingMemoryBound` on this buffer's base address, and dividing this buffer's + /// `count` by the `stride` of the given type. Be sure to do lots of research on the above method before even thinking about using this. + /// + @inlinable @inline(__always) + internal func _assumingMemoryBound(to: T.Type) -> UnsafeMutableBufferPointer { + guard let base = baseAddress else { + return .init(start: nil, count: 0) + } + return .init(start: base.assumingMemoryBound(to: to), count: count / MemoryLayout.stride) + } +} + +// Arity 4: + +@inlinable @inline(__always) +internal func withUnsafeMutableBufferPointerToElements( + tuple: inout (T, T, T, T), _ body: (inout UnsafeMutableBufferPointer) -> Result +) -> Result { + return withUnsafeMutableBytes(of: &tuple) { + var ptr = $0._assumingMemoryBound(to: T.self) + return body(&ptr) + } +} + +// Arity 8: + +@inlinable @inline(__always) +internal func withUnsafeBufferPointerToElements( + tuple: (T, T, T, T, T, T, T, T), _ body: (UnsafeBufferPointer) -> Result +) -> Result { + return withUnsafeBytes(of: tuple) { + return body($0._assumingMemoryBound(to: T.self)) + } +} + +@inlinable @inline(__always) +internal func withUnsafeMutableBufferPointerToElements( + tuple: inout (T, T, T, T, T, T, T, T), _ body: (inout UnsafeMutableBufferPointer) -> Result +) -> Result { + return withUnsafeMutableBytes(of: &tuple) { + var ptr = $0._assumingMemoryBound(to: T.self) + return body(&ptr) + } +} + + +// -------------------------------------------- +// MARK: - Reducing arithmetic overflow traps +// -------------------------------------------- +// The implementation of UnsafeBufferPointer uses arithmetic which traps on overflow in its indexing operations +// (e.g. index(after:)). This isn't a part of memory safety - UnsafeBufferPointer is, as the name suggests, unsafe. +// The thing that makes it unsafe is that it lacks bounds-checking in release mode, so you can tell it to read from +// any nonsense offset (too large, negative, whatever), and it'll just do it and think everything was fine. +// +// Collection doesn't make any guarantees about what happens when you use a Collection incorrectly; +// incrementing an invalid index could trap, or it could always return startIndex, endIndex, or anything. +// The same goes for subscripting - a Collection of Ints could trap, or return 0 or -1 if you ask for an out-of-bounds +// element. Generic Collection code has to do things like checking an index is less than endIndex before incrementing - +// otherwise it will invoke _unspecified_ behaviour (not the same as UB in C) and may give you nonsense results. +// +// The one thing a Collection should never do (or any type in Swift, Collection or not), is violate memory safety. +// --> **Even if you use it incorrectly** <--. That's the difference with C-style undefined behaviour. +// The exception to this rule is UnsafeBufferPointer - as explained above, if you ask it to read from an invalid index, +// it will happily do so. Incorrect usage _will_ invoke C-style undefined behaviour and violate memory safety. +// +// This tweaked implementation of UnsafeBufferPointer makes some changes to the stdlib's implementation, +// but is no more or less safe (you can't be "more or less safe" than something else; something is safe or it isn't): +// +// - Indexing operations do not overflow. As explained above, there is no requirement to trap. +// UnsafeBufferPointer won't bounds-check the index anyway, so trap or not, reading from an index you incremented +// too far will violate memory safety. Trapping on overflow doesn't make incorrect code safe. +// +// See discussion at: +// https://forums.swift.org/t/is-it-okay-to-ignore-overflow-when-incrementing-decrementing-collection-indexes/47416 +// +// - It is self-slicing. This helps to ensure that `Slice` doesn't mess up any of the low-level performance tweaks. +// It also allows us to implement `_copyContents` without having to create a custom iterator, which is nice. +// +// -------------------------------------------- + + +extension UnsafeBufferPointer { + + @inlinable + internal var withoutTrappingOnIndexOverflow: NoOverflowUnsafeBufferPointer { + NoOverflowUnsafeBufferPointer(self) + } +} + +@usableFromInline +internal struct NoOverflowUnsafeBufferPointer { + + @usableFromInline + internal var baseAddress: UnsafePointer? + + @usableFromInline + internal var bounds: Range + + @inlinable + internal init(baseAddress: UnsafePointer?, count: Int) { + assert(count >= 0) + assert(count == 0 || baseAddress != nil) + self.baseAddress = baseAddress + self.bounds = Range(uncheckedBounds: (0, count)) + } + + @inlinable + internal init(_ buffer: UnsafeBufferPointer) { + self.init(baseAddress: buffer.baseAddress, count: buffer.count) + } + + @inlinable + internal init(slicing base: Self, bounds: Range) { + base._failEarlyRangeCheck(bounds, bounds: base.bounds) + self.baseAddress = base.baseAddress + self.bounds = bounds + } +} + +extension NoOverflowUnsafeBufferPointer: RandomAccessCollection { + + @inlinable + internal var startIndex: Int { + _assumeNonNegative(bounds.lowerBound) + } + + @inlinable + internal var endIndex: Int { + _assumeNonNegative(bounds.upperBound) + } + + @inlinable + internal var count: Int { + endIndex &- startIndex + } + + @inlinable + internal var isEmpty: Bool { + // startIndex will never be greater than endIndex, but by writing it this way (rather than startIndex != endIndex), + // we communicate that when 'isEmpty == false', startIndex is definitely < endIndex. This means: + // - startIndex + 1 won't overflow + // - The range startIndex..= endIndex + } + + @inlinable + internal subscript(position: Int) -> Element { + _failEarlyRangeCheck(position, bounds: bounds) + return baseAddress.unsafelyUnwrapped[position] + } + + @inlinable + internal subscript(sliceBounds: Range) -> NoOverflowUnsafeBufferPointer { + NoOverflowUnsafeBufferPointer(slicing: self, bounds: sliceBounds) + } + + @inlinable + internal func _failEarlyRangeCheck(_ index: Int, bounds: Range) { + assert(index >= bounds.lowerBound) + assert(index < bounds.upperBound) + } + + @inlinable + internal func _failEarlyRangeCheck(_ range: Range, bounds: Range) { + assert(range.lowerBound >= bounds.lowerBound) + assert(range.upperBound <= bounds.upperBound) + } + + @inlinable + internal func _copyContents( + initializing destination: UnsafeMutableBufferPointer + ) -> (Iterator, UnsafeMutableBufferPointer.Index) { + guard !isEmpty && !destination.isEmpty else { return (makeIterator(), 0) } + let src = self.baseAddress.unsafelyUnwrapped + bounds.lowerBound + let dst = destination.baseAddress.unsafelyUnwrapped + let n = Swift.min(destination.count, self.count) + dst.initialize(from: src, count: n) + return (self[Range(uncheckedBounds: (bounds.lowerBound &+ n, bounds.upperBound))].makeIterator(), n) + } + + @inlinable + internal var indices: Range { + bounds + } + + @inlinable @inline(__always) + internal func index(after i: Int) -> Int { + i &+ 1 + } + + @inlinable @inline(__always) + internal func formIndex(after i: inout Int) { + i &+= 1 + } + + @inlinable + internal func index(_ i: Int, offsetBy distance: Int) -> Int { + i &+ distance + } + + @inlinable + internal func formIndex(_ i: inout Int, offsetBy distance: Int) { + i &+= distance + } + + @inlinable + internal func index(_ i: Int, offsetBy n: Int, limitedBy limit: Int) -> Int? { + let l = limit &- i + if n > 0 ? l >= 0 && l < n : l <= 0 && n < l { + return nil + } + return i &+ n + } + + @inlinable @inline(__always) + internal func index(before i: Int) -> Int { + i &- 1 + } + + @inlinable @inline(__always) + internal func formIndex(before i: inout Int) { + i &-= 1 + } + + @inlinable + internal func distance(from start: Int, to end: Int) -> Int { + end &- start + } + + @inlinable + internal func withContiguousStorageIfAvailable( + _ body: (UnsafeBufferPointer) throws -> R + ) rethrows -> R? { + guard let baseAddress = baseAddress else { return try body(UnsafeBufferPointer(start: nil, count: 0)) } + return try body(UnsafeBufferPointer(start: baseAddress + startIndex, count: _assumeNonNegative(count))) + } +} diff --git a/Sources/WebURL/Util/UnsafeBuffer+ReplaceSubrange.swift b/Sources/WebURL/Util/UnsafeBuffer+ReplaceSubrange.swift new file mode 100644 index 000000000..16928a3c0 --- /dev/null +++ b/Sources/WebURL/Util/UnsafeBuffer+ReplaceSubrange.swift @@ -0,0 +1,295 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +// -------------------------------------------- +// This file contains a generic implementation of 'replaceSubrange' for contiguous buffers, +// including those only accessible indirectly such as `ManagedBuffer` subclasses. +// +// It includes optimizations for in-place replacement as well as consuming the original storage when additional +// capacity is required. The implementation is adapted from the standard library's source code, where it forms +// the basis of Array's implementation of replaceSubrange. +// +// I've tried to split the functionality in to small, well-defined functions. Unfortunately, the main entrypoint +// is still a big function with lots of parameters which I've found difficult to clean up by defining appropriate +// abstractions. It looks uglier than it is. +// -------------------------------------------- + + +/// An object which contains a unique, mutable buffer. +/// This protocol is a required level of indirection so that `replaceElements` can allocate, fill, and return objects which provide their buffers indirectly. +/// +@usableFromInline +internal protocol BufferContainer { + associatedtype Element + func withUnsafeMutablePointerToElements(_ body: (UnsafeMutablePointer) throws -> R) rethrows -> R +} + +/// Given a `buffer`, representing an entire allocation in which `0..( + in buffer: UnsafeMutableBufferPointer, + initializedCount: Int, + subrange: Range, + with newElements: C, + isUnique: Bool, + storageConstructor: (_ minimumCapacity: Int) -> T +) -> (bufferCount: Int, insertedCount: Int, newStorage: T?, newStorageCount: Int) +where C: Collection, T.Element == C.Element { + replaceElements( + in: buffer, + initializedCount: initializedCount, + isUnique: isUnique, + subrange: subrange, + withElements: newElements.count, + initializedWith: { return $0.fastInitialize(from: newElements) }, + storageConstructor: storageConstructor + ) +} + +@inlinable +internal func replaceElements( + in buffer: UnsafeMutableBufferPointer, + initializedCount: Int, + isUnique: Bool, + subrange: Range, + withElements newElementsCount: Int, + initializedWith initializer: (inout UnsafeMutableBufferPointer) -> Int, + storageConstructor: (_ minimumCapacity: Int) -> T +) -> (bufferCount: Int, insertedCount: Int, newStorage: T?, newStorageCount: Int) { + + precondition(subrange.lowerBound >= 0, "subrange start is negative") + precondition(subrange.upperBound <= initializedCount, "subrange extends past the end") + + let insertCount = newElementsCount + let finalCount = initializedCount - subrange.count + insertCount + + if isUnique && finalCount <= buffer.count { + let newCount = replaceSubrange_inplace( + buffer: buffer, initializedCount: initializedCount, + subrange: subrange, newElementCount: insertCount + ) { ptr, expectedCount in + var rangePtr = UnsafeMutableBufferPointer(start: ptr, count: expectedCount) + let n = initializer(&rangePtr) + precondition(n == expectedCount, "initializer failed to initialize entire capacity") + } + return (bufferCount: newCount, insertedCount: insertCount, newStorage: nil, newStorageCount: 0) + } + let newStorage = storageConstructor(finalCount) + let srcBuffer = UnsafeMutableBufferPointer(rebasing: buffer.prefix(initializedCount)) + let newCount = newStorage.withUnsafeMutablePointerToElements { newBufferPtr -> Int in + let newBuffer = UnsafeMutableBufferPointer(start: newBufferPtr, count: finalCount) + if isUnique { + return newBuffer.moveInitialize(from: srcBuffer, replacingSubrange: subrange, withElements: insertCount) { + ptr, expectedCount in + var rangePtr = UnsafeMutableBufferPointer(start: ptr, count: expectedCount) + let n = initializer(&rangePtr) + precondition(n == expectedCount, "initializer failed to initialize entire capacity") + } + } + return newBuffer.initialize( + from: UnsafeBufferPointer(srcBuffer), replacingSubrange: subrange, withElements: insertCount + ) { ptr, expectedCount in + var rangePtr = UnsafeMutableBufferPointer(start: ptr, count: expectedCount) + let n = initializer(&rangePtr) + precondition(n == expectedCount, "initializer failed to initialize entire capacity") + } + } + assert(newCount == finalCount) + return ( + bufferCount: isUnique ? 0 : initializedCount, insertedCount: insertCount, newStorage: newStorage, + newStorageCount: finalCount + ) +} + +/// Given a buffer, whose elements from `0..( + buffer: UnsafeMutableBufferPointer, + initializedCount: Int, + subrange: Range, + newElementCount: Int, + _ initializeNewElements: + ((UnsafeMutablePointer, _ count: Int) -> Void) = { ptr, count in + precondition(count == 0) + } +) -> Int { + + let oldCount = initializedCount + let growth = newElementCount - subrange.count + let finalCount = oldCount + growth + precondition(finalCount <= buffer.count, "Insufficient capacity for replaceSubrange_inplace") + + guard let elements = buffer.baseAddress else { return 0 } + switch growth { + case _ where growth > 0: + (elements + subrange.lowerBound + newElementCount) + .moveInitialize(from: elements + subrange.upperBound, count: oldCount - subrange.upperBound) + (elements + subrange.lowerBound).deinitialize(count: subrange.count) + initializeNewElements(elements + subrange.lowerBound, newElementCount) + + case _ where growth == 0: + (elements + subrange.lowerBound).deinitialize(count: subrange.count) + initializeNewElements(elements + subrange.lowerBound, newElementCount) + + case _ where growth < 0: fallthrough + default: + (elements + subrange.lowerBound).deinitialize(count: subrange.count) + initializeNewElements(elements + subrange.lowerBound, newElementCount) + (elements + subrange.lowerBound + newElementCount) + .moveInitialize(from: elements + subrange.upperBound, count: oldCount - subrange.upperBound) + } + return finalCount +} + + +// -------------------------------------------- +// MARK: - Out-of-place replacements +// -------------------------------------------- + + +extension UnsafeMutableBufferPointer { + + /// Initializes the contents of this buffer by moving the contents of `oldContents`, with the exception of `subrange`, whose + /// old contents are deinitialized and replaced by a region of size `newCount`, initialized by the given closure. + /// + /// - parameters: + /// - oldContents: The buffer whose contents should be moved in to this buffer. + /// - subrange: The region of `buffer` which should be replaced. + /// - newCount: The number of elements to replace `subrange` with. + /// - initializeNewElements: A closure, which **must** initialize `newCount` elements starting at the given pointer. + /// + /// - returns: The total number of elements that were initialized. + /// + @inlinable + internal func moveInitialize( + from oldContents: UnsafeMutableBufferPointer, + replacingSubrange subrange: Range, + withElements newCount: Int, // Number of new elements to insert + _ initializeNewElements: + ((UnsafeMutablePointer, _ count: Int) -> Void) = { ptr, count in + precondition(count == 0) + } + ) -> Int { + guard let sourceStart = oldContents.baseAddress else { return 0 } + + precondition(subrange.lowerBound >= 0 && subrange.upperBound <= oldContents.count, "Invalid subrange") + let finalCount = oldContents.count - subrange.count + newCount + precondition(finalCount <= self.count, "Insufficient capacity") + + var head = self.baseAddress! + // Move the head items + head.moveInitialize(from: sourceStart, count: subrange.lowerBound) + head += subrange.lowerBound + // Destroy unused source items + (sourceStart + subrange.lowerBound).deinitialize(count: subrange.count) + // Initialize the gap. + initializeNewElements(head, newCount) + head += newCount + // Move the tail items + head.moveInitialize(from: sourceStart + subrange.upperBound, count: oldContents.count - subrange.upperBound) + return finalCount + } + + /// Initializes the contents of this buffer by copying the contents of `oldContents`, with the exception of `subrange`, which is + /// replaced by a region of size `newCount`, initialized by the given closure. + /// + /// - parameters: + /// - oldContents: The buffer whose contents should be copied in to this buffer. + /// - subrange: The region of `buffer` which should be replaced. + /// - newCount: The number of elements to replace `subrange` with. + /// - initializeNewElements: A closure, which **must** initialize `newCount` elements starting at the given pointer. + /// + /// - returns: The total number of elements that were initialized. + /// + @inlinable + internal func initialize( + from oldContents: UnsafeBufferPointer, + replacingSubrange subrange: Range, + withElements newCount: Int, // Number of new elements to insert + _ initializeNewElements: + ((UnsafeMutablePointer, _ count: Int) -> Void) = { ptr, count in + precondition(count == 0) + } + ) -> Int { + guard let sourceStart = oldContents.baseAddress else { return 0 } + + precondition(subrange.lowerBound >= 0 && subrange.upperBound <= oldContents.count, "Invalid subrange") + let finalCount = oldContents.count - subrange.count + newCount + precondition(finalCount <= self.count, "Insufficient capacity") + + var head = self.baseAddress! + // Copy the head items. + head.initialize(from: sourceStart, count: subrange.lowerBound) + head += subrange.lowerBound + // Initialize the gap. + initializeNewElements(head, newCount) + head += newCount + // Copy the tail items. + head.initialize(from: sourceStart + subrange.upperBound, count: oldContents.count - subrange.upperBound) + return finalCount + } +} diff --git a/Sources/WebURL/WebURL+FormParameters.swift b/Sources/WebURL/WebURL+FormParameters.swift new file mode 100644 index 000000000..f212e3adf --- /dev/null +++ b/Sources/WebURL/WebURL+FormParameters.swift @@ -0,0 +1,665 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +extension WebURL { + + /// A mutable view of the `application/x-www-form-urlencoded` key-value pairs in this URL's `query`. + /// + public var formParams: FormEncodedQueryParameters { + get { + FormEncodedQueryParameters(storage: storage) + } + _modify { + var view = FormEncodedQueryParameters(storage: storage) + storage = _tempStorage + defer { storage = view.storage } + yield &view + } + set { + if newValue.storage.structure.queryIsKnownFormEncoded { + storage.withUnwrappedMutableStorage( + { small in small.setQuery(toKnownFormEncoded: newValue.storage.utf8.query) }, + { large in large.setQuery(toKnownFormEncoded: newValue.storage.utf8.query) } + ) + } else { + let formEncoded = newValue.formEncodedQueryBytes + storage.withUnwrappedMutableStorage( + { small in small.setQuery(toKnownFormEncoded: formEncoded) }, + { large in large.setQuery(toKnownFormEncoded: formEncoded) } + ) + } + } + } + + /// A view of the `application/x-www-form-urlencoded` key-value pairs in a URL's `query`. + /// + /// The `formParams` view allows you to conveniently get and set the values for particular keys by accessing them as members. + /// For keys which cannot be written as members, the `get` and `set` functions provide equivalent functionality. + /// The keys and values will be automatically encoded and decoded. + /// + /// ```swift + /// var url = WebURL("http://example.com/currency/convert?from=EUR&to=USD")! + /// assert(url.formParams.from == "EUR") + /// + /// url.formParams.from = "GBP" + /// assert(url.serialized == "http://example.com/currency/convert?from=GBP&to=USD") + /// + /// url.formParams.amount = "20" + /// assert(url.serialized == "http://example.com/currency/convert?from=GBP&to=USD&amount=20") + /// + /// url.formParams.to = "💵" + /// assert(url.serialized == "http://example.com/currency/convert?from=GBP&to=%F0%9F%92%B5&amount=20") + /// ``` + /// + /// Additionally, you can iterate over all of the key-value pairs using the `.allKeyValuePairs` property: + /// + /// ```swift + /// for (key, value) in url.formParams.allKeyValuePairs { + /// // ("from", "GBP") + /// // ("to", "💵") + /// // ("amount", "20") + /// } + /// ``` + /// + /// Key lookup (via `.contains`, `.get`, `.set`, etc) is not Unicode-aware. This means that the Unicode codepoints in the provided key must match + /// exactly with those in the query string after percent-decoding. This matches the behaviour of the `URLSearchParams` class defined in the URL standard. + /// + /// In the following example, the character "ñ" is not found when searching using a canonically-equivalent set of codepoints. + /// However, the `allKeyValuePairs` property provides the key using Swift's built-in `String` type, which does have Unicode-aware comparison: + /// + /// ```swift + /// let url = WebURL("http://example.com?jalape\u{006E}\u{0303}os=2")! + /// url.serialized // "http://example.com/?jalapen%CC%83os=2" + /// url.formParams.get("jalape\u{006E}\u{0303}os") // "2" + /// url.formParams.get("jalape\u{00F1}os") // nil + /// url.formParams.allKeyValuePairs.first(where: { $0.0 == "jalape\u{00F1}os" }) // ("jalapeños", "2") + /// ``` + /// + /// Also note that modifying any part of the query through this view will re-encode the _entire_ query as `application/x-www-form-urlencoded`. + /// Again, this matches the behaviour of `URLSearchParams` in the URL standard. + /// + @dynamicMemberLookup + public struct FormEncodedQueryParameters { + + @usableFromInline + internal var storage: AnyURLStorage + + internal init(storage: AnyURLStorage) { + self.storage = storage + } + } +} + +extension WebURL.FormEncodedQueryParameters { + + internal var formEncodedQueryBytes: ContiguousArray? { + guard let queryUTF8 = storage.utf8.query else { + return nil + } + var result = ContiguousArray() + result.reserveCapacity(queryUTF8.count + 1) + for kvp in RawKeyValuePairs(utf8: queryUTF8) { + result.append(contentsOf: queryUTF8[kvp.key].lazy.percentDecodedUTF8(from: \.form).percentEncoded(as: \.form)) + result.append(ASCII.equalSign.codePoint) + result.append(contentsOf: queryUTF8[kvp.value].lazy.percentDecodedUTF8(from: \.form).percentEncoded(as: \.form)) + result.append(ASCII.ampersand.codePoint) + } + _ = result.popLast() + // Non-empty queries may become empty once form-encoded (e.g. "&&&&"). + // These should result in 'nil' queries. + return result.isEmpty ? nil : result + } + + @usableFromInline + internal mutating func reencodeQueryIfNeeded() { + guard !storage.structure.queryIsKnownFormEncoded else { return } + let reencodedQuery = formEncodedQueryBytes + storage.withUnwrappedMutableStorage( + { small in small.setQuery(toKnownFormEncoded: reencodedQuery) }, + { large in large.setQuery(toKnownFormEncoded: reencodedQuery) } + ) + assert(storage.structure.queryIsKnownFormEncoded) + } +} + + +// -------------------------------------------- +// MARK: - Reading +// -------------------------------------------- + + +extension WebURL.FormEncodedQueryParameters { + + /// A `Sequence` allowing iteration over all form-encoded key-value pairs contained in this URL's query. + /// + public var allKeyValuePairs: KeyValuePairs { + KeyValuePairs(params: self) + } + + /// A `Sequence` allowing iteration over all form-encoded key-value pairs contained in a URL's query. + /// + public struct KeyValuePairs: Sequence { + + internal var rawKVPs: RawKeyValuePairs? + + internal init(params: WebURL.FormEncodedQueryParameters) { + self.rawKVPs = params.storage.utf8.query.map { RawKeyValuePairs(utf8: $0) } + } + + public func makeIterator() -> Iterator { + Iterator(rawIter: rawKVPs?.makeIterator()) + } + + public struct Iterator: IteratorProtocol { + + internal var rawIter: RawKeyValuePairs.Iterator? + + internal init(rawIter: RawKeyValuePairs.Iterator?) { + self.rawIter = rawIter + } + + public mutating func next() -> (String, String)? { + guard let nextKVP = rawIter?.next() else { + return nil + } + let queryUTF8 = rawIter!.remaining.base + return (queryUTF8[nextKVP.key].urlFormDecodedString, queryUTF8[nextKVP.value].urlFormDecodedString) + } + } + + /// Whether or not this sequence contains any key-value pairs. + /// + public var isEmpty: Bool { + var iter = makeIterator() + return iter.next() == nil + } + } + + /// A sequence which allows iterating the key-value pairs within a collection of UTF8 bytes. + /// + /// The sequence assumes the "&" and "=" delimiters have _not_ been encoded, but otherwise does not assume the contents of the keys or values + /// to be encoded in any particular way. + /// + @usableFromInline + internal struct RawKeyValuePairs: Sequence where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8 { + + /// - Note: `pair` includes the trailing ampersand delimiter, unless the key-value pair ends at the end of the query string. + @usableFromInline + internal typealias Ranges = ( + pair: Range, key: Range, value: Range + ) + + @usableFromInline + internal var utf8: UTF8Bytes + + @inlinable + internal init(utf8: UTF8Bytes) { + self.utf8 = utf8 + } + + @inlinable + internal func makeIterator() -> Iterator { + return Iterator(remaining: utf8[...]) + } + + @usableFromInline + internal struct Iterator: IteratorProtocol { + + @usableFromInline + internal var remaining: UTF8Bytes.SubSequence + + @inlinable + internal init(remaining: UTF8Bytes.SubSequence) { + self.remaining = remaining + } + + @inlinable + internal mutating func next() -> Ranges? { + + guard remaining.isEmpty == false else { + return nil + } + var nextKVP: UTF8Bytes.SubSequence + repeat { + if let nextKVPEnd = remaining.firstIndex(of: ASCII.ampersand.codePoint) { + nextKVP = remaining[..( + encodedKey: UTF8Bytes, encodedValue: UTF8Bytes? + ) -> AnyURLStorage where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8 { + + let oldQueryRange = header.structure.rangeForReplacingCodeUnits(of: .query).dropFirst() + + // If we have a new value to set, find the first KVP which matches the key. + let rangeToRemoveMatchesFrom: Range + var _rangeOfFirstValue: Range? + if let encodedValue = encodedValue { + guard + let firstMatch = WebURL.FormEncodedQueryParameters.RawKeyValuePairs( + utf8: codeUnits[oldQueryRange] + ).first(where: { codeUnits[$0.key].elementsEqual(encodedKey) }) + else { + return appendFormParamPairs(fromEncoded: CollectionOfOne((encodedKey, encodedValue))) + } + _rangeOfFirstValue = firstMatch.value + rangeToRemoveMatchesFrom = firstMatch.pair.upperBound.. Bool { + lhs.swiftModel == rhs.swiftModel + } + + public func hash(into hasher: inout Hasher) { + swiftModel.hash(into: &hasher) + } +} + + +// -------------------------------------------- +// MARK: - Components +// -------------------------------------------- + + +// Note: Documentation comments for these properties is copied from Node.js (MIT-licensed). +// It is not the primary interface for WebURL, so it isn't worth copying the whole thing or writing our own, +// but it is API and should at least have a brief comment. +// Node docs: https://nodejs.org/api/url.html#url_class_url + +extension WebURL.JSModel { + + /// Gets and sets the serialized URL. + /// + public var href: String { + get { + swiftModel.serialized + } + set { + if let newURL = WebURL(newValue) { + self = newURL.jsModel + } + } + } + + /// Gets the read-only serialization of the URL's origin. + /// + public var origin: String { + swiftModel.origin.serialized + } + + /// Gets and sets the username portion of the URL. + /// + public var username: String { + get { + swiftModel.username ?? "" + } + set { + try? swiftModel.setUsername(newValue) + } + } + + /// Gets and sets the password portion of the URL. + /// + public var password: String { + get { + swiftModel.password ?? "" + } + set { + try? swiftModel.setPassword(newValue) + } + } + + // Setters for the following components are a bit more complex. + // In the standard, they tend to go through the URL parser, which filters tabs and newlines + // (but doesn't trim ASCII C0 or spaces), and allows trailing data that just gets silently ignored. + // + // The Swift model setters do not filter tabs or newlines, nor do they silently drop any part of the given value, + // and they may choose to represent non-present values as 'nil' rather than empty strings, + // but in all other respects they should behave the same. + + /// Gets and sets the protocol portion of the URL. + /// + /// - Note: This property is called `protocol` in Javascript. + /// + public var scheme: String { + get { + swiftModel.scheme + ":" + } + set { + let trimmedAndFiltered: ASCII.NewlineAndTabFiltered + if let terminatorIdx = newValue.firstIndex(of: ":") { + trimmedAndFiltered = ASCII.NewlineAndTabFiltered(newValue[.. not an error => remove existing port (newPort == nil). + } else { + guard let parsedPort = UInt16(String(decoding: portString, as: UTF8.self)) else { + // Invalid number (e.g. overflow) => error => keep existing port (abort setter). + return + } + newPort = parsedPort + } + swiftModel.port = newPort.map { Int($0) } + } + } + + /// Gets and sets the path portion of the URL. + /// + public var pathname: String { + get { + swiftModel.path + } + set { + try? swiftModel.utf8.setPath(ASCII.NewlineAndTabFiltered(newValue.utf8)) + } + } + + /// Gets and sets the serialized query portion of the URL. + /// + public var search: String { + get { + let swiftValue = swiftModel.query ?? "" + if swiftValue.isEmpty { + return swiftValue + } + return "?" + swiftValue + } + set { + guard newValue.isEmpty == false else { + swiftModel.query = nil + return + } + var newQuery = newValue[...] + if newValue.first?.asciiValue == ASCII.questionMark.codePoint { + newQuery = newValue.dropFirst() + } + swiftModel.utf8.setQuery(ASCII.NewlineAndTabFiltered(newQuery.utf8)) + } + } + + /// Gets and sets the fragment portion of the URL. + /// + public var hash: String { + get { + let swiftValue = swiftModel.fragment ?? "" + if swiftValue.isEmpty { + return swiftValue + } + return "#" + swiftValue + } + set { + guard newValue.isEmpty == false else { + swiftModel.fragment = nil + return + } + var newFragment = newValue[...] + if newValue.first?.asciiValue == ASCII.numberSign.codePoint { + newFragment = newValue.dropFirst() + } + swiftModel.utf8.setFragment(ASCII.NewlineAndTabFiltered(newFragment.utf8)) + } + } +} diff --git a/Sources/WebURL/WebURL+Origin.swift b/Sources/WebURL/WebURL+Origin.swift new file mode 100644 index 000000000..3c54debc2 --- /dev/null +++ b/Sources/WebURL/WebURL+Origin.swift @@ -0,0 +1,174 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +extension WebURL { + + /// Origins are the fundamental currency of the Web's security model. + /// + /// Two actors in the Web platform that share an origin are assumed to trust each other and to have the same authority. + /// Actors with differing origins are considered potentially hostile versus each other, and are isolated from each other to varying degrees. + /// + /// The origin is not an attribute or component of a URL; it is a value which may be computed from a URL. + /// + /// The only URLs for which meaningful origins may be computed are: + /// - Those with the http, https, ftp, ws, or wss schemes (i.e. the "special" schemes, excluding file), and + /// - Those with the "blob" scheme, which do not have an authority (cannot-be-a-base), and whose path is another URL. + /// + /// Computing an origin using any other URL results in an _opaque origin_, which is defined to be an "internal value, with no serialization it can be recreated from, + /// [...] and for which the only meaningful operation is testing for equality." ([HTML Standard][HTML-origin]). + /// + /// The URL standard requires every computation of an opaque origin to result in a _new_ value; and the HTML standard builds on that by computing + /// new opaque origins at specific times for specific elements or browsing context, when it desires more specific behaviour for trust/security domains. + /// + /// This type deviates slightly from the URL standard in that it separates computing an origin using a URL from establishing a new trust/security domain. + /// Opaque origins are instead considered to be _undefined_ security domains - it does not matter if you call `url.origin` again or store + /// a previously-computed origin in your application state; an opaque origin will never compare as being "same origin" with anything, even itself. + /// This behaviour is in some respects analogous to a floating-point NaN value. + /// + /// Instead, if an application wishes to establish a trust/security domain, it should do so explicitly by using an augmented origin type, for instance: + /// + /// ``` + /// enum ApplicationSpecificOrigin { + /// case derivedFromURL(WebURL.Origin) // security domain is 'obvious' due to URL scheme known by the standard. + /// case applicationDefined(T) // A security domain which has been established by application-specific logic. + /// case undefinedOpaque // opaque origin, application unable to determine a security domain. + /// } + /// ``` + /// This can also be useful to define application-specific origins for "file" URLs, which the [URL standard][URL-origin] leaves as "an exercise for the reader". + /// + /// It is also recommended to read [RFC-6456 ("The Web Origin Concept")][RFC-6454] for a holistic understanding of the origin-based security model. + /// + /// [HTML-origin]: https://html.spec.whatwg.org/multipage/origin.html#concept-origin-opaque + /// [URL-origin]: https://url.spec.whatwg.org/#origin + /// [RFC-6454]: https://tools.ietf.org/html/rfc6454 + /// + public struct Origin { + + fileprivate enum Kind { + + /// An "opaque origin", which is an internal value. This type deviates from the standard by assigning these no identity, rather than assigning + /// a unique identity upon creation. This should not matter for security, as the change in behaviour only leads to _more_ isolation, never less. + case opaque + + /// A tuple of (scheme, host, port, null/domain). Stored pre-serialized. + /// According to the URL standard, the 'domain' should always be null. + case tuple(String) + } + fileprivate var kind: Kind + } +} + +extension WebURL { + + /// The origin of this URL. + /// + /// Origins are the fundamental currency of the Web's security model. + /// Two actors in the Web platform that share an origin are assumed to trust each other and to have the same authority. + /// Actors with differing origins are considered potentially hostile versus each other, and are isolated from each other to varying degrees. + /// + public var origin: Origin { + switch schemeKind { + case .http, .https, .ws, .wss, .ftp: + let serializedTuple = "\(scheme)://\(hostname!)\(port.map { ":\($0)" } ?? "")" + return Origin(kind: .tuple(serializedTuple)) + case .other where cannotBeABase && utf8.scheme.elementsEqual("blob".utf8): + return WebURL(path)?.origin ?? Origin(kind: .opaque) + default: + return Origin(kind: .opaque) + } + } +} + + +// -------------------------------------------- +// MARK: - Standard protocols +// -------------------------------------------- + + +extension WebURL.Origin: Equatable, Hashable { + + /// Whether this origin is considered "same origin" with respect to another origin. + /// + /// Note that this always returns `false` for opaque origins. + /// + public static func == (lhs: WebURL.Origin, rhs: WebURL.Origin) -> Bool { + switch (lhs.kind, rhs.kind) { + // Opaque origins are like floating-point NaNs; not same-origin WRT each other. + case (.opaque, _): return false + case (_, .opaque): return false + case (.tuple(let lhs), .tuple(let rhs)): return lhs == rhs + } + } + + public func hash(into hasher: inout Hasher) { + switch kind { + case .opaque: + hasher.combine(UInt8(0)) + case .tuple(let serialization): + hasher.combine(UInt8(1)) + hasher.combine(serialization) + } + } +} + +extension WebURL.Origin: CustomStringConvertible { + + public var description: String { + return serialized + } +} + + +// -------------------------------------------- +// MARK: - Properties +// -------------------------------------------- + + +extension WebURL.Origin { + + /// If `true`, this is an opaque origin with no meaningful value for use as a trust or security domain. + /// + /// Note that, analogous to floating-point NaN values, opaque origins are **not considered same-origin with themselves**. + /// This also means that opaque origins compare as _not equal_ using the `==` operator, and should not be stored in hash-tables + /// such as `Set` or `Dictionary`, as they will _always_ insert in to the table and degrade its performance: + /// + /// ```swift + /// let myURL = WebURL("foo://exampleHost:4567/")! + /// myURL.origin.isOpaque // true. WebURL is unable to define a security domain for "foo" URLs. + /// + /// myURL.origin == myURL.origin // false! + /// + /// var seenOrigins: Set = [myURL.origin] + /// seenOrigins.contains(myURL.origin) // false! + /// seenOrigins.insert(myURL.origin) // always inserts! lots of hash collisions! + /// ``` + /// + public var isOpaque: Bool { + if case .opaque = kind { return true } + return false + } + + /// The string representation of this origin. + /// + /// The serialization of an origin is defined in the [HTTP specification][HTTP]. + /// + /// [HTTP]: https://html.spec.whatwg.org/multipage/origin.html#ascii-serialisation-of-an-origin + /// + public var serialized: String { + guard case .tuple(let serialization) = kind else { + return "null" + } + return serialization + } +} diff --git a/Sources/WebURL/WebURL+PathComponents.swift b/Sources/WebURL/WebURL+PathComponents.swift new file mode 100644 index 000000000..a6f0ef73d --- /dev/null +++ b/Sources/WebURL/WebURL+PathComponents.swift @@ -0,0 +1,951 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +extension WebURL { + + /// A mutable view of this URL's path components. + /// + /// Use of this view requires that the URL is hierarchical (`cannotBeABase` is `false`). + /// Using this view with a non-hierachical URL will trigger a runtime error. + /// + public var pathComponents: PathComponents { + get { + precondition(!cannotBeABase, "cannot-be-a-base URLs do not have path components") + return PathComponents(storage: storage) + } + _modify { + precondition(!cannotBeABase, "cannot-be-a-base URLs do not have path components") + var view = PathComponents(storage: storage) + storage = _tempStorage + defer { storage = view.storage } + yield &view + } + set { + precondition(!cannotBeABase, "cannot-be-a-base URLs do not have path components") + try! utf8.setPath(newValue.storage.utf8.path) + } + } + + /// A view of the components in a hierarchical URL's path. + /// + /// This collection provides efficient, bidirectional, read-write access to the URL's path components. + /// Components are percent-decoded when they are returned and percent-encoded when they are replaced. + /// + /// ```swift + /// var url = WebURL("http://example.com/swift/packages/%F0%9F%A6%86%20tracker")! + /// url.pathComponents.first! // "swift" + /// url.pathComponents.last! // "🦆 tracker" + /// + /// url.pathComponents.removeLast() + /// url.pathComponents.append("swift-url") + /// print(url) // Prints "http://example.com/swift/packages/swift-url" + /// ``` + /// + /// Path components extend from their leading slash until the leading slash of the next component (or the end of the path). That means that a URL whose + /// path is "/" contains a single, empty path component, and paths which end with a "/" (also referred to as directory paths) end with an empty component. + /// When appending to a directory path (through `append` or any other function which replaces path components), this empty component is dropped + /// so that the result does not contain excessive empties. To create a directory path, append an empty component or call `ensureDirectoryPath`. + /// + /// ```swift + /// var url = WebURL("file:///")! + /// url.pathComponents.last! // "" + /// url.pathComponents.count // 1 + /// + /// url.pathComponents.append("usr") // file:///usr + /// url.pathComponents.count // 1, because the trailing empty component was dropped. + /// + /// url.pathComponents += ["bin", "swift"] // file:///usr/bin/swift + /// url.pathComponents.last! // "swift" + /// url.pathComponents.count // 3 + /// + /// url.pathComponents.ensureDirectoryPath() // file:///usr/bin/swift/ + /// url.pathComponents.last! // "" + /// url.pathComponents.count // 4 + /// ``` + /// + /// Modifying the URL, such as by setting its `path` or any other properties, invalidates all previously obtained path component indices. + /// Functions which modify the path components return new indices which may be used to maintain position across modifications. + /// + /// It is best to avoid making assumptions about how this collection's `count` is affected by a modification. In addition to the dropping of trailing empty + /// components described above, URLs with particular schemes are forbidden from ever having empty paths; attempting to remove all of the path components + /// from such a URL will result in a path with a single, empty component, just like setting the empty string to the URL's `path` property. + /// + /// This view does not support non-hierarchical URLs (`cannotBeABase` is `true`), and triggers a runtime error if it is accessed on such a URL. + /// Almost all URLs are hierarchical (in particular, URLs with special schemes, such as http, https, and file, are always hierarchical). + /// Non-hierarchical URLs can be recognized by the lack of slashes immediately following their scheme. Examples of such URLs are: + /// + /// - `mailto:bob@example.com` + /// - `javascript:alert("hello");` + /// - `data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==` + /// + /// See the `WebURL.cannotBeABase` property for more information about these URLs. + /// + public struct PathComponents { + + @usableFromInline + internal var storage: AnyURLStorage + + internal init(storage: AnyURLStorage) { + self.storage = storage + } + } +} + + +// -------------------------------------------- +// MARK: - Reading +// -------------------------------------------- + + +extension WebURL.PathComponents { + + /// The position of a path component within a URL string. + /// + public struct Index: Equatable, Comparable { + + /// The range of the component in the overall URL string's code-units. + /// Note that this includes the leading "/", so paths which end in a "/" include a trailing empty component before reaching endIndex. + /// + @usableFromInline + internal var range: Range + + @inlinable + internal init(codeUnitRange: Range) { + self.range = codeUnitRange + } + + @inlinable + public static func < (lhs: Index, rhs: Index) -> Bool { + lhs.range.lowerBound < rhs.range.lowerBound + } + } +} + +extension WebURL.UTF8View { + + /// The UTF-8 code-units containing the given path component. + /// + public func pathComponent(_ component: WebURL.PathComponents.Index) -> SubSequence { + // These bounds checks are only for semantics, not for memory safety; the slicing subscript handles that. + assert(component.range.lowerBound >= path.startIndex && component.range.lowerBound <= path.endIndex) + assert(self[component.range.lowerBound] == ASCII.forwardSlash.codePoint) + return self[component.range.dropFirst()] + } +} + +extension WebURL.PathComponents: BidirectionalCollection { + + public var startIndex: Index { + storage.pathComponentsStartIndex + } + + public var endIndex: Index { + storage.pathComponentsEndIndex + } + + public subscript(position: Index) -> String { + storage.utf8.pathComponent(position).percentDecodedString + } + + public func distance(from start: Index, to end: Index) -> Int { + guard start <= end else { + return -1 * distance(from: end, to: start) + } + return storage.utf8[start.range.lowerBound.. Index { + var copy = i + formIndex(after: ©) + return copy + } + + public func formIndex(after i: inout Index) { + let newEnd = storage.endOfPathComponent(startingAt: i.range.upperBound) ?? i.range.upperBound + i.range = i.range.upperBound.. Index { + var copy = i + formIndex(before: ©) + return copy + } + + public func formIndex(before i: inout Index) { + guard let newStart = storage.startOfPathComponent(endingAt: i.range.lowerBound) else { return } + i.range = newStart..( + _ bounds: Range, with newComponents: Components + ) -> Range where Components: Collection, Components.Element: StringProtocol { + // TODO: [performance]: Create a specialized StringProtocol -> UTF8View projection wrapper. + replaceComponents(bounds, withUTF8: newComponents.lazy.map { $0.utf8 }) + } + + @inlinable + @discardableResult + internal mutating func replaceComponents( + _ range: Range, withUTF8 newComponents: Components + ) -> Range + where Components: Collection, Components.Element: Collection, Components.Element.Element == UInt8 { + + var newSubrange: Range? + storage.withUnwrappedMutableStorage( + { small -> AnyURLStorage in + let result = small.replacePathComponents(range, with: newComponents) + newSubrange = result.1 + return result.0 + }, + { large -> AnyURLStorage in + let result = large.replacePathComponents(range, with: newComponents) + newSubrange = result.1 + return result.0 + } + ) + return newSubrange! + } +} + +// Insert, Append, Remove, Replace. + +extension WebURL.PathComponents { + + /// Inserts the elements of a collection into the path at the specified position. + /// + /// The new components are inserted before the component currently at the specified index, and their contents will be percent-encoded, if necessary. + /// If any of the new components are "." or ".." (or their percent-encoded versions, "%2E" or "%2E%2E", case-insensitive), those components are ignored. + /// + /// The following example inserts an array of path components in the middle of a path: + /// + /// ```swift + /// var url = WebURL("file:///usr/swift")! + /// url.pathComponents.insert( + /// contentsOf: ["local", "bin"], at: url.pathComponents.index(after: url.pathComponents.startIndex) + /// ) + /// print(url) // Prints "file:///usr/local/bin/swift" + /// ``` + /// + /// If you pass the path's `endIndex` property as the `position` parameter, the new elements are appended to the path. + /// Calling the `append(contentsOf:)` method instead is preferred. If inserting at the end of a path whose last component is empty (i.e. a directory path), + /// the trailing empty component will be dropped and replaced by the first inserted component. + /// + /// Calling this method invalidates any existing indices for this URL. + /// + /// - parameters: + /// - newComponents: The new components to insert into the path. + /// - position: The position at which to insert the new components. `position` must be a valid path component index. + /// - returns: A new range of indices corresponding to the location of the new components in the path. + /// + @inlinable + @discardableResult + public mutating func insert( + contentsOf newComponents: Components, at position: Index + ) -> Range where Components: Collection, Components.Element: StringProtocol { + replaceSubrange(position..( + contentsOf newComponents: Components + ) -> Range where Components: Collection, Components.Element: StringProtocol { + insert(contentsOf: newComponents, at: endIndex) + } + + /// Adds the elements of a collection to the end of this path. + /// + /// The contents of the appended components will be percent-encoded, if necessary. + /// If any of the new components are "." or ".." (or their percent-encoded versions, "%2E" or "%2E%2E", case-insensitive), those components are ignored. + /// + /// If appending to a path whose last component is empty (i.e. a directory path), + /// the trailing empty component will be dropped and replaced by the first inserted component. + /// + /// The following example builds a path by appending path components. Note that the first `+=` does not change the `count`, as the URL initially + /// has a directory path. + /// + /// ```swift + /// var url = WebURL("file:///")! + /// url.pathComponents.last! // "" + /// url.pathComponents.count // 1 + /// + /// url.pathComponents += ["tmp"] + /// url.pathComponents.last! // "tmp" + /// url.pathComponents.count // 1 + /// + /// url.pathComponents += ["my_app", "data.json"] + /// url.pathComponents.last! // "data.json" + /// url.pathComponents.count // 3 + /// + /// print(url) // Prints "file:///tmp/my_app/data.json" + /// ``` + /// + /// Calling this method invalidates any existing indices for this URL. + /// + @inlinable + public static func += ( + lhs: inout WebURL.PathComponents, rhs: Components + ) where Components: Collection, Components.Element: StringProtocol { + lhs.append(contentsOf: rhs) + } + + /// Removes the specified subrange of components from the path. + /// + /// ```swift + /// var url = WebURL("http://example.com/projects/swift/swift-url/")! + /// url.pathComponents.removeSubrange( + /// url.pathComponents.index(after: url.pathComponents.startIndex)..) -> Index { + replaceComponents(bounds, withUTF8: EmptyCollection>()).upperBound + } + + /// Replaces the path component at the specified position. + /// + /// The contents of the new component will be percent-encoded, if necessary. + /// + /// ```swift + /// var url = WebURL("file:///usr/bin/swift")! + /// url.pathComponents.replaceComponent( + /// at: url.pathComponents.index(after: url.pathComponents.startIndex), + /// with: "lib" + /// ) + /// print(url) // Prints "file:///usr/lib/swift" + /// ``` + /// + /// If the new component is "." or ".." (or their percent-encoded versions, "%2E" or "%2E%2E", case-insensitive), the component at `position` is removed - + /// as if calling `replaceSubrange` with an empty collection. + /// + /// Calling this method invalidates any existing indices for this URL. + /// + /// - parameters: + /// - position: The position of the component to replace. `position` must be a valid path component index. + /// - newComponent: The value to set the component to. + /// - returns: A new range of indices encompassing the replaced component. + /// + @inlinable + @discardableResult + public mutating func replaceComponent( + at position: Index, with newComponent: Component + ) -> Range where Component: StringProtocol { + precondition(position != endIndex, "Cannot replace component at endIndex") + return replaceSubrange(position..( + _ newComponent: Component, at position: Index + ) -> Range where Component: StringProtocol { + insert(contentsOf: CollectionOfOne(newComponent), at: position) + } + + /// Adds a component to the end of this path. + /// + /// The contents of the appended component will be percent-encoded, if necessary. + /// If the new component is "." or ".." (or their percent-encoded versions, "%2E" or "%2E%2E", case-insensitive), it will be ignored. + /// + /// If appending to a path whose last component is empty (i.e. a directory path), + /// the trailing empty component will be dropped and replaced by the new component. + /// + /// The following example builds a path by appending components. Note that the first `append` does not change the `count`, as the URL initially + /// has a directory path. + /// + /// ```swift + /// var url = WebURL("file:///")! + /// url.pathComponents.last! // "" + /// url.pathComponents.count // 1 + /// + /// url.pathComponents.append("tmp") + /// url.pathComponents.last! // "tmp" + /// url.pathComponents.count // 1 + /// + /// url.pathComponents.append("data.json") + /// url.pathComponents.last! // "data.json" + /// url.pathComponents.count // 2 + /// + /// print(url) // Prints "file:///tmp/data.json" + /// ``` + /// + /// Calling this method invalidates any existing indices for this URL. + /// + /// - parameter newComponent: The new component to add to end of the path. + /// - returns: A new range of indices corresponding to the location of the new component in the path. + /// + @inlinable + @discardableResult + public mutating func append( + _ newComponent: Component + ) -> Range where Component: StringProtocol { + append(contentsOf: CollectionOfOne(newComponent)) + } + + /// Removes the component at the given index from the path. + /// + /// ```swift + /// var url = WebURL("http://example.com/projects/swift/swift-url/Sources/")! + /// url.pathComponents.remove( + /// at: url.pathComponents.index(after: url.pathComponents.startIndex) + /// ) + /// print(url) // Prints "http://example.com/projects/swift-url/Sources/" + /// ``` + /// + /// URLs with particular schemes are forbidden from ever having empty paths; attempting to remove all of the path components + /// from such a URL will result in a path with a single, empty component, just like setting the empty string to the URL's `path` property. + /// + /// ```swift + /// var url = WebURL("http://example.com/foo")! + /// url.pathComponents.remove(at: url.pathComponents.startIndex) + /// print(url) // Prints "http://example.com/" + /// ``` + /// + /// Calling this method invalidates any existing indices for this URL. + /// + /// - parameter position: The index of the component to remove. `position` must be a valid path component index. + /// - returns: The index corresponding to the component following `position`, after modification. + /// + @discardableResult + public mutating func remove(at position: Index) -> Index { + precondition(position != endIndex, "Cannot remove component at endIndex") + return removeSubrange(position..= 0, "Cannot remove a negative number of path components") + removeSubrange(index(endIndex, offsetBy: -k, limitedBy: startIndex)!.. Index { + let range = append("") + assert(range.upperBound == endIndex) + return range.lowerBound + } +} + + +// -------------------------------------------- +// MARK: - URLStorage + PathComponents +// -------------------------------------------- + + +extension AnyURLStorage { + + @inlinable + internal var pathComponentsStartIndex: WebURL.PathComponents.Index { + switch self { + case .small(let storage): return storage.pathComponentsStartIndex + case .large(let storage): return storage.pathComponentsStartIndex + } + } + + @inlinable + internal var pathComponentsEndIndex: WebURL.PathComponents.Index { + switch self { + case .small(let storage): return storage.pathComponentsEndIndex + case .large(let storage): return storage.pathComponentsEndIndex + } + } + + @inlinable + internal func endOfPathComponent(startingAt componentStartOffset: Int) -> Int? { + switch self { + case .small(let storage): return storage.endOfPathComponent(startingAt: componentStartOffset) + case .large(let storage): return storage.endOfPathComponent(startingAt: componentStartOffset) + } + } + + @inlinable + internal func startOfPathComponent(endingAt componentEndOffset: Int) -> Int? { + switch self { + case .small(let storage): return storage.startOfPathComponent(endingAt: componentEndOffset) + case .large(let storage): return storage.startOfPathComponent(endingAt: componentEndOffset) + } + } +} + +extension URLStorage { + + @inlinable + internal var pathComponentsStartIndex: WebURL.PathComponents.Index { + let pathStart = header.structure.rangeForReplacingCodeUnits(of: .path).lowerBound + return WebURL.PathComponents.Index(codeUnitRange: pathStart.. Int? { + let pathRange = header.structure.rangeForReplacingCodeUnits(of: .path) + guard !pathRange.isEmpty, pathRange.contains(componentStartOffset) else { return nil } + assert(codeUnits[componentStartOffset] == ASCII.forwardSlash.codePoint, "UTF8 position not aligned to a component") + return codeUnits[componentStartOffset + 1.. Int? { + let pathRange = header.structure.rangeForReplacingCodeUnits(of: .path) + guard !pathRange.isEmpty, componentEndOffset > pathRange.lowerBound else { return nil } + if pathRange.contains(componentEndOffset) { + assert(codeUnits[componentEndOffset] == ASCII.forwardSlash.codePoint, "UTF8 position not aligned to a component") + } else { + assert(componentEndOffset == pathRange.upperBound, "UTF8 position is not within the path") + } + return codeUnits[pathRange.lowerBound.. (AnyURLStorage, Range) { + + let oldStructure = header.structure + let oldPathRange = oldStructure.rangeForReplacingCodeUnits(of: .path) + precondition(!oldStructure.cannotBeABaseURL, "Cannot replace components of a cannot-be-a-base URL") + + // We can only set an empty path if this is a non-special scheme with authority ("foo://host?query"). + // Everything else (special, path-only) requires at least a lone "/". + let replaced: AnyURLStorage + if !oldStructure.schemeKind.isSpecial, oldStructure.hasAuthority { + var newStructure = oldStructure + newStructure.pathLength = 0 + newStructure.firstPathComponentLength = 0 + replaced = removeSubrange(oldPathRange, newStructure: newStructure).newStorage + } else { + var commands = [ReplaceSubrangeOperation]() + var newStructure = oldStructure + if case .path = oldStructure.sigil { + commands.append(.remove(subrange: oldStructure.rangeForReplacingSigil)) + newStructure.sigil = .none + } + newStructure.pathLength = 1 + newStructure.firstPathComponentLength = 1 + commands.append( + .replace( + subrange: oldPathRange, withCount: 1, + writer: { buffer in + buffer.baseAddress.unsafelyUnwrapped.initialize(to: ASCII.forwardSlash.codePoint) + return 1 + }) + ) + replaced = multiReplaceSubrange(commands, newStructure: newStructure) + } + let newPathStart = replaced.structure.pathStart + let newPathEnd = replaced.structure.pathStart &+ replaced.structure.pathLength + let newLowerBound = WebURL.PathComponents.Index(codeUnitRange: newPathStart..( + _ replacedIndices: Range, + with components: Components + ) -> (AnyURLStorage, Range) + where Components: Collection, Components.Element: Collection, Components.Element.Element == UInt8 { + + let oldStructure = header.structure + let oldPathRange = oldStructure.rangeForReplacingCodeUnits(of: .path) + precondition(!oldStructure.cannotBeABaseURL, "Cannot replace components of a cannot-be-a-base URL") + + // If 'firstNewComponentLength' is nil, we infer that the components are empty (i.e. removal operation). + let components = components.lazy.filter { utf8 in + !PathComponentParser.isSingleDotPathSegment(utf8) && !PathComponentParser.isDoubleDotPathSegment(utf8) + } + let firstNewComponentLength = components.first.map { 1 + $0.lazy.percentEncoded(as: \.pathComponent).count } + + // If inserting elements at the end of a path which ends in a trailing slash, widen the replacement range + // so we drop the trailing empty component. This means appending "foo" to "/" results in "/foo" rather than "//foo", + // and "foo" to "/usr/" results in "/usr/foo" rather than "/usr//foo". + var replacedIndices = replacedIndices + if replacedIndices.lowerBound.range.lowerBound == oldPathRange.upperBound, + firstNewComponentLength != nil, _hasDirectoryPath + { + let newLowerBound = replacedIndices.lowerBound.range.lowerBound - 1..1 component, and the first component is empty. + let newPathFirstComponentLength: Int + let newPathRequiresSigil: Bool + if replacedRange.lowerBound == oldPathRange.lowerBound { + // Modifying the front of the path. + if let firstInsertedComponentLength = firstNewComponentLength { + // Inserting/replacing. The first component will be from the new components. + let firstComponentEmpty = (firstInsertedComponentLength == 1) + let hasComponentsAfter = + (replacedRange.upperBound != oldPathRange.upperBound) || (insertedPathLength != firstInsertedComponentLength) + newPathRequiresSigil = firstComponentEmpty && hasComponentsAfter + newPathFirstComponentLength = firstInsertedComponentLength + } else { + // Removing. The first component will be at replacedIndices.upperBound. + let firstComponentEmpty = (replacedIndices.upperBound.range.count == 1) + assert(replacedRange.upperBound != oldPathRange.upperBound, "Full path removals should have been handled above") + newPathRequiresSigil = firstComponentEmpty + newPathFirstComponentLength = replacedIndices.upperBound.range.count + } + } else { + // Modifying the middle/end of the path. The current first component will be maintained. + let oldStartIndex = pathComponentsStartIndex + let firstComponentEmpty = (oldStartIndex.range.count == 1) + let hasComponentsAfter: Bool + if firstNewComponentLength != nil { + // Inserting/replacing. There will certainly be components after the first one. + hasComponentsAfter = true + } else { + // Removing. Unless the entire rest of the path is removed, there will be components remaining. + hasComponentsAfter = (replacedRange != oldStartIndex.range.upperBound..= 1 && codeUnits[pathRange.upperBound - 1] == ASCII.forwardSlash.codePoint + } + + @inlinable + internal mutating func _normalizeWindowsDriveLetterIfPresent() { + guard case .file = header.structure.schemeKind else { return } + let path = codeUnits[header.structure.rangeForReplacingCodeUnits(of: .path)].dropFirst() + if PathComponentParser.isWindowsDriveLetter(path.prefix(2)), + path.count == 2 || path.dropFirst(2).first == ASCII.forwardSlash.codePoint + { + codeUnits[path.startIndex + 1] = ASCII.colon.codePoint + } + } +} diff --git a/Sources/WebURL/WebURL+Scheme.swift b/Sources/WebURL/WebURL+Scheme.swift new file mode 100644 index 000000000..ffa221ddf --- /dev/null +++ b/Sources/WebURL/WebURL+Scheme.swift @@ -0,0 +1,214 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +extension WebURL { + + /// A value representing a scheme's kind. + /// + /// A URL's `scheme` (or "protocol") describes how to communicate with the resource's location. + /// Some schemes ("http", "https", "ws", "wss", "ftp", and "file") are referred to as being "special". + /// + /// Note that this type intentionally does not conform to `Equatable`. + /// Two URLs with the same `SchemeKind` may have different schemes if the scheme is not special. + /// + @usableFromInline + enum SchemeKind { + case ftp + case file + case http + case https + case ws + case wss + case other + } +} + +extension WebURL.SchemeKind { + + /// Determines the `SchemeKind` for the given scheme content. + /// + /// This initializer does not determine whether a given scheme string is valid or not; it only detects certain known schemes and returns `.other` for everything + /// else. Note that the ":" terminator must not be included in the content; "http" will be recognized, but "http:" won't. This initializer is case-insensitive. + /// + /// - parameters: + /// - schemeContent: The scheme content, as a sequence of UTF8-encoded bytes. + /// + @inlinable + internal init(parsing schemeContent: UTF8Bytes) where UTF8Bytes: Sequence, UTF8Bytes.Element == UInt8 { + + if let contiguouslyParsed = schemeContent.withContiguousStorageIfAvailable({ buffer -> Self in + guard buffer.count != 0 else { return .other } + return WebURL.SchemeKind( + ptr: UnsafeRawPointer(buffer.baseAddress.unsafelyUnwrapped), count: UInt8(truncatingIfNeeded: buffer.count) + ) + }) { + self = contiguouslyParsed + return + } + + var buffer = 0 as UInt64 + self = withUnsafeMutableBytes(of: &buffer) { buffer -> Self in + var iter = schemeContent.makeIterator() + + guard let byte0 = iter.next(), let byte1 = iter.next() else { + return .other + } + buffer[0] = byte0 + buffer[1] = byte1 + guard let byte2 = iter.next() else { + return WebURL.SchemeKind(ptr: UnsafeRawPointer(buffer.baseAddress.unsafelyUnwrapped), count: 2) + } + buffer[2] = byte2 + guard let byte3 = iter.next() else { + return WebURL.SchemeKind(ptr: UnsafeRawPointer(buffer.baseAddress.unsafelyUnwrapped), count: 3) + } + buffer[3] = byte3 + guard let byte4 = iter.next() else { + return WebURL.SchemeKind(ptr: UnsafeRawPointer(buffer.baseAddress.unsafelyUnwrapped), count: 4) + } + buffer[4] = byte4 + guard iter.next() == nil else { + return .other + } + return WebURL.SchemeKind(ptr: UnsafeRawPointer(buffer.baseAddress.unsafelyUnwrapped), count: 5) + } + } + + // Note: 'count' is a separate parameter because UnsafeRawBufferPointer.count includes a force-unwrap, + // which can have a significant performance impact: https://bugs.swift.org/browse/SR-14422 + @inlinable + internal init(ptr: UnsafeRawPointer, count: UInt8) { + // Zeroing the 6th bit of each byte (i.e. AND-ing with 11011111) normalizes the code-unit to uppercase ASCII. + switch count { + case 2: + var s = ptr.loadUnaligned(as: UInt16.self) + s &= 0b11011111_11011111 + self = (s == Self._ws) ? .ws : .other + case 3: + // On big-endian machines, we need to swap-widen-swap: + // [F, T] ->(swap)-> [T, F] ->(widen)-> [0, 0, T, F] ->(swap)-> [F, T, 0, 0]. + var s = UInt32(ptr.loadUnaligned(as: UInt16.self).littleEndian).littleEndian + withUnsafeMutableBytes(of: &s) { $0[2] = ptr.load(fromByteOffset: 2, as: UInt8.self) } + s &= 0b11011111_11011111_11011111_11011111 + self = (s == Self._wss) ? .wss : (s == Self._ftp) ? .ftp : .other + case 4: + var s = ptr.loadUnaligned(as: UInt32.self) + s &= 0b11011111_11011111_11011111_11011111 + self = (s == Self._http) ? .http : (s == Self._file) ? .file : .other + case 5: + var s = ptr.loadUnaligned(as: UInt32.self) + s &= 0b11011111_11011111_11011111_11011111 + self = + ((s == Self._http) && ptr.load(fromByteOffset: 4, as: UInt8.self) & 0b11011111 == ASCII.S.codePoint) + ? .https : .other + default: + self = .other + } + } + + // On little-endian machines, the shifting will arrange these in reverse order (e.g. "PTTH" in memory), + // and .init(bigEndian:) will swap them back so they will have the same bytes, in the same order, as the code-units. + @inlinable @inline(__always) + internal static var _ws: UInt16 { + UInt16(bigEndian: UInt16(ASCII.W.codePoint) &<< 8 | UInt16(ASCII.S.codePoint)) + } + @inlinable @inline(__always) + internal static var _wss: UInt32 { + UInt32( + bigEndian: UInt32(ASCII.W.codePoint) &<< 24 | UInt32(ASCII.S.codePoint) &<< 16 | UInt32(ASCII.S.codePoint) &<< 8 + ) + } + @inlinable @inline(__always) + internal static var _ftp: UInt32 { + UInt32( + bigEndian: UInt32(ASCII.F.codePoint) &<< 24 | UInt32(ASCII.T.codePoint) &<< 16 | UInt32(ASCII.P.codePoint) &<< 8 + ) + } + @inlinable @inline(__always) + internal static var _http: UInt32 { + UInt32( + bigEndian: UInt32(ASCII.H.codePoint) &<< 24 | UInt32(ASCII.T.codePoint) &<< 16 | UInt32(ASCII.T.codePoint) &<< 8 + | UInt32(ASCII.P.codePoint) + ) + } + @inlinable @inline(__always) + internal static var _file: UInt32 { + UInt32( + bigEndian: UInt32(ASCII.F.codePoint) &<< 24 | UInt32(ASCII.I.codePoint) &<< 16 | UInt32(ASCII.L.codePoint) &<< 8 + | UInt32(ASCII.E.codePoint) + ) + } +} + +extension WebURL.SchemeKind { + + /// Whether or not this scheme is considered "special". + /// + /// URLs with special schemes may have additional constraints or normalisation rules. + /// + @inlinable + internal var isSpecial: Bool { + if case .other = self { return false } + return true + } + + /// This scheme's default port number, if it has one. + /// + /// Only some special schemes have known default port numbers. + /// + @inlinable + internal var defaultPort: UInt16? { + switch self { + case .http, .ws: return 80 + case .https, .wss: return 443 + case .ftp: return 21 + default: return nil + } + } + + /// Returns whether or not the given sequence of bytes are a UTF8-encoded string representation of this scheme's default port number. + /// If this scheme does not have a default port number, this method returns `false`. + /// + /// Note that the port string's leading ":" separator must not be included. + /// + @inlinable + internal func isDefaultPort( + utf8: UTF8Bytes + ) -> Bool where UTF8Bytes: Sequence, UTF8Bytes.Element == UInt8 { + + var buffer: UInt32 = 0 + + var bytesConsumed = 0 as UInt8 + var iter = utf8.makeIterator() + while let nextByte = iter.next(), bytesConsumed < 4 { + buffer &<<= 8 + buffer |= UInt32(nextByte) + bytesConsumed &+= 1 + } + guard iter.next() == nil else { + return false + } + + switch self { + case .http, .ws: + return buffer == UInt32(ASCII.n8.codePoint) << 8 | UInt32(ASCII.n0.codePoint) + case .https, .wss: + return buffer == UInt32(ASCII.n4.codePoint) << 16 | UInt32(ASCII.n4.codePoint) << 8 | UInt32(ASCII.n3.codePoint) + case .ftp: + return buffer == UInt32(ASCII.n2.codePoint) << 8 | UInt32(ASCII.n1.codePoint) + default: + return false + } + } +} diff --git a/Sources/WebURL/WebURL+UTF8View.swift b/Sources/WebURL/WebURL+UTF8View.swift new file mode 100644 index 000000000..ec719d41c --- /dev/null +++ b/Sources/WebURL/WebURL+UTF8View.swift @@ -0,0 +1,382 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +extension WebURL { + + /// A view of the UTF-8 code-units in a serialized URL. + /// + /// This view provides efficient random-access as well as read-write access to the code-units of a serialized URL string, + /// including information about where each URL component is situated. The code-units are guaranteed to only contain ASCII code-points. + /// + /// Component properties (such as `scheme`, `path`, and `query`) and setter methods (such as `setQuery`), have the same semantics and + /// behaviour as the corresponding methods on `WebURL`. + /// + public struct UTF8View { + + @usableFromInline + internal var storage: AnyURLStorage + + @inlinable + internal init(_ storage: AnyURLStorage) { + self.storage = storage + } + } + + /// A mutable view of the UTF-8 code-units of this URL's serialization. + /// + @inlinable + public var utf8: UTF8View { + get { storage.utf8 } + _modify { yield &storage.utf8 } + set { storage.utf8 = newValue } + } +} + +extension AnyURLStorage { + + @inlinable + internal var utf8: WebURL.UTF8View { + get { + WebURL.UTF8View(self) + } + _modify { + var view = WebURL.UTF8View(self) + self = _tempStorage + defer { self = view.storage } + yield &view + } + set { + self = newValue.storage + } + } +} + + +// -------------------------------------------- +// MARK: - RandomAccessCollection +// -------------------------------------------- + + +extension WebURL.UTF8View: RandomAccessCollection { + + public typealias Index = Int + public typealias Element = UInt8 + + @inlinable + public var startIndex: Index { + 0 + } + + @inlinable + public var endIndex: Index { + switch storage { + case .small(let small): return small.codeUnits.count + case .large(let large): return large.codeUnits.count + } + } + + @inlinable + public subscript(position: Index) -> Element { + // bounds-checking is performed by `ManagedArrayBuffer`. + switch storage { + case .small(let small): return small.codeUnits[position] + case .large(let large): return large.codeUnits[position] + } + } + + @inlinable + public func index(after i: Index) -> Index { + i &+ 1 + } + + @inlinable + public func formIndex(after i: inout Index) { + i &+= 1 + } + + @inlinable + public func index(before i: Index) -> Index { + i &- 1 + } + + @inlinable + public func formIndex(before i: inout Index) { + i &-= 1 + } + + @inlinable + public func index(_ i: Index, offsetBy distance: Index) -> Index { + i &+ distance + } + + @inlinable + public func formIndex(_ i: inout Index, offsetBy distance: Index) { + i &+= distance + } + + @inlinable + public var count: Int { + endIndex + } + + @inlinable + public func distance(from start: Index, to end: Index) -> Int { + end &- start + } + + @inlinable @inline(__always) + public func withContiguousStorageIfAvailable(_ body: (UnsafeBufferPointer) throws -> R) rethrows -> R? { + try withUnsafeBufferPointer(body) + } + + /// Invokes `body` with a pointer to the contiguous UTF-8 code-units of the serialized URL string. + /// + /// - important: The provided pointer is valid only for the duration of `body`. Do not store or return the pointer for later use. + /// - complexity: O(*1*) + /// - parameters: + /// - body: A closure which processes the content of the serialized URL. + /// + @inlinable @inline(__always) + public func withUnsafeBufferPointer(_ body: (UnsafeBufferPointer) throws -> R) rethrows -> R { + switch storage { + case .small(let small): return try small.codeUnits.withUnsafeBufferPointer(body) + case .large(let large): return try large.codeUnits.withUnsafeBufferPointer(body) + } + } +} + +extension Slice where Base == WebURL.UTF8View { + + /// Invokes `body` with a pointer to the contiguous UTF-8 code-units of this portion of the serialized URL string. + /// + /// - important: The provided pointer is valid only for the duration of `body`. Do not store or return the pointer for later use. + /// - complexity: O(*1*) + /// - parameters: + /// - body: A closure which processes the content of the serialized URL. + /// + @inlinable @inline(__always) + public func withUnsafeBufferPointer(_ body: (UnsafeBufferPointer) throws -> R) rethrows -> R { + switch base.storage { + case .small(let small): return try small.codeUnits.withUnsafeBufferPointer(range: startIndex..` tag on the page given by this URL. + /// + /// It should be noted that this method accepts protocol-relative URLs, which are able to direct to a different hostname, as well as absolute URL strings, + /// which do not copy any information from their base URLs. + /// + @inlinable @inline(__always) + public func resolve( + _ utf8: UTF8Bytes + ) -> WebURL? where UTF8Bytes: BidirectionalCollection, UTF8Bytes.Element == UInt8 { + urlFromBytes(utf8, baseURL: WebURL(storage: storage)) + } +} + + +// -------------------------------------------- +// MARK: - Components +// -------------------------------------------- + + +extension WebURL.UTF8View { + + /// The UTF-8 code-units containing this URL's `scheme`. + /// + /// - seealso: `WebURL.scheme` + /// + public var scheme: SubSequence { + guard let range = storage.structure.range(of: .scheme), range.count > 1 else { + preconditionFailure("URL does not have a scheme, or scheme is empty") + } + return self[range.dropLast()] + } + + /// Replaces this URL's `scheme` with the given UTF-8 code-units. + /// + @inlinable + public mutating func setScheme( + _ newScheme: UTF8Bytes + ) throws where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8 { + try storage.withUnwrappedMutableStorage( + { small in small.setScheme(to: newScheme) }, + { large in large.setScheme(to: newScheme) } + ) + } + + /// The UTF-8 code-units containing this URL's `username`, if present. + /// + /// - seealso: `WebURL.username` + /// + public var username: SubSequence? { + storage.structure.range(of: .username).map { self[$0] } + } + + /// Replaces this URL's `username` with the given UTF-8 code-units. + /// + /// Any code-points which are not valid for use in the URL's user-info section will be percent-encoded. + /// + @inlinable + public mutating func setUsername( + _ newUsername: UTF8Bytes? + ) throws where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8 { + try storage.withUnwrappedMutableStorage( + { small in small.setUsername(to: newUsername) }, + { large in large.setUsername(to: newUsername) } + ) + } + + /// The UTF-8 code-units containing this URL's `password`, if present. + /// + /// - seealso: `WebURL.password` + /// + public var password: SubSequence? { + guard let range = storage.structure.range(of: .password) else { return nil } + assert(range.count > 1) + return self[range.dropFirst()] + } + + /// Replaces this URL's `password` with the given UTF-8 code-units. + /// + /// Any code-points which are not valid for use in the URL's user-info section will be percent-encoded. + /// + @inlinable + public mutating func setPassword( + _ newPassword: UTF8Bytes? + ) throws where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8 { + try storage.withUnwrappedMutableStorage( + { small in small.setPassword(to: newPassword) }, + { large in large.setPassword(to: newPassword) } + ) + } + + /// The UTF-8 code-units containing this URL's `hostname`, if present. + /// + /// - seealso: `WebURL.hostname` + /// + public var hostname: SubSequence? { + storage.structure.range(of: .hostname).map { self[$0] } + } + + /// Replaces this URL's `hostname` with the given UTF-8 code-units. + /// + /// Unlike other setters, not all code-points which are invalid for use in hostnames will be percent-encoded. + /// If the new content contains a [forbidden host code-point][URL-fhcp], the operation will fail. + /// + /// [URL-fhcp]: https://url.spec.whatwg.org/#forbidden-host-code-point + /// + @inlinable + public mutating func setHostname( + _ newHostname: UTF8Bytes? + ) throws where UTF8Bytes: BidirectionalCollection, UTF8Bytes.Element == UInt8 { + try storage.withUnwrappedMutableStorage( + { small in small.setHostname(to: newHostname) }, + { large in large.setHostname(to: newHostname) } + ) + } + + /// The UTF-8 code-units containing this URL's `port`, if present. + /// + /// - seealso: `WebURL.port` + /// + public var port: SubSequence? { + guard let range = storage.structure.range(of: .port) else { return nil } + assert(range.count > 1) + return self[range.dropFirst()] + } + + /// The UTF-8 code-units containing this URL's `path`. + /// + /// - seealso: `WebURL.path` + /// + public var path: SubSequence { + self[storage.structure.rangeForReplacingCodeUnits(of: .path)] + } + + /// Replaces this URL's `path` with the given UTF-8 code-units. + /// + /// The given path string will be lexically simplified, and any code-points in the path's components that are not valid for use will be percent-encoded. + /// + @inlinable + public mutating func setPath( + _ newPath: UTF8Bytes + ) throws where UTF8Bytes: BidirectionalCollection, UTF8Bytes.Element == UInt8 { + try storage.withUnwrappedMutableStorage( + { small in small.setPath(to: newPath) }, + { large in large.setPath(to: newPath) } + ) + } + + /// The UTF-8 code-units containing this URL's `query`, if present. + /// + /// - seealso: `WebURL.query` + /// + public var query: SubSequence? { + guard let range = storage.structure.range(of: .query) else { return nil } + assert(!range.isEmpty) + return self[range.dropFirst()] + } + + /// Replaces this URL's `query` with the given UTF-8 code-units. + /// + /// Any code-points which are not valid for use in the URL's query will be percent-encoded. + /// Note that the set of code-points which are valid depends on the URL's `scheme`. + /// + @inlinable + public mutating func setQuery( + _ newQuery: UTF8Bytes? + ) where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8 { + storage.withUnwrappedMutableStorage( + { small in small.setQuery(to: newQuery) }, + { large in large.setQuery(to: newQuery) } + ) + } + + /// The UTF-8 code-units containing this URL's `fragment`, if present. + /// + /// - seealso: `WebURL.fragment` + /// + public var fragment: SubSequence? { + guard let range = storage.structure.range(of: .fragment) else { return nil } + assert(!range.isEmpty) + return self[range.dropFirst()] + } + + /// Replaces this URL's `fragment` with the given UTF-8 code-units. + /// + /// Any code-points which are not valid for use in the URL's fragment will be percent-encoded. + /// + @inlinable + public mutating func setFragment( + _ newFragment: UTF8Bytes? + ) where UTF8Bytes: Collection, UTF8Bytes.Element == UInt8 { + storage.withUnwrappedMutableStorage( + { small in small.setFragment(to: newFragment) }, + { large in large.setFragment(to: newFragment) } + ) + } +} diff --git a/Sources/WebURL/WebURL.swift b/Sources/WebURL/WebURL.swift new file mode 100644 index 000000000..062999e77 --- /dev/null +++ b/Sources/WebURL/WebURL.swift @@ -0,0 +1,415 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// A Uniform Resource Locator (URL) is a universal identifier, which often describes the location of a resource. +/// +/// URL parsing and serialization is compatible with the [WHATWG URL Standard][URL-spec]. +/// +/// The `WebURL` API is designed to meet the needs and expectations of Swift developers, expanding on the JavaScript API described in the standard +/// to add path-component manipulation, host objects, and more. Some of the component values have been tweaked to not include their leading or +/// trailing delimiters, and component setters are little stricter and more predictable, but in all other respects they should have the same behaviour. +/// +/// For more information on the differences between this API and the JavaScript `URL` class, see the `WebURL.JSModel` type. +/// +/// [URL-spec]: https://url.spec.whatwg.org/ +/// +public struct WebURL { + + @usableFromInline + internal var storage: AnyURLStorage + + @inlinable + internal init(storage: AnyURLStorage) { + self.storage = storage + } + + /// Constructs a URL by parsing the given string. + /// + /// This parser is compatible with the [WHATWG URL Standard][URL-spec]; this means that whitespace characters may be removed from the given string, + /// other characters may be percent-encoded based on which component they belong to, IP addresses rewritten in canonical notation, + /// and paths lexically simplified, among other transformations defined by the standard. + /// + /// [URL-spec]: https://url.spec.whatwg.org/ + /// + @inlinable @inline(__always) + public init?(_ string: S) where S: StringProtocol, S.UTF8View: BidirectionalCollection { + self.init(utf8: string.utf8) + } + + /// Constructs a URL by parsing the given string, which is provided as a collection of UTF-8 code-units. + /// + /// This parser is compatible with the [WHATWG URL Standard][URL-spec]; this means that whitespace characters may be removed from the given string, + /// other characters may be percent-encoded based on which component they belong to, IP addresses rewritten in canonical notation, + /// and paths lexically simplified, among other transformations defined by the standard. + /// + /// [URL-spec]: https://url.spec.whatwg.org/ + /// + @inlinable @inline(__always) + public init?(utf8: UTF8Bytes) where UTF8Bytes: BidirectionalCollection, UTF8Bytes.Element == UInt8 { + guard let url = urlFromBytes(utf8, baseURL: nil) else { return nil } + self = url + } + + /// Parses the given string with this URL as its base. + /// + /// This function supports a wide range of relative URL strings, producing the same result as an HTML `` tag on the page given by this URL. + /// + /// ```swift + /// let base = WebURL("http://example.com/karl/index.html")! + /// + /// base.resolve("photos/img.jpg?size=200x200")! // "http://example.com/karl/photos/img.jpg?size=200x200" + /// base.resolve("/mary/lambs/1/fleece.txt")! // "http://example.com/mary/lambs/1/fleece.txt" + /// ``` + /// + /// It should be noted that this method accepts protocol-relative URLs, which are able to direct to a different hostname, as well as absolute URL strings, + /// which do not copy any information from their base URLs. + /// + @inlinable @inline(__always) + public func resolve(_ string: S) -> WebURL? where S: StringProtocol, S.UTF8View: BidirectionalCollection { + utf8.resolve(string.utf8) + } +} + + +// -------------------------------------------- +// MARK: - Standard protocols +// -------------------------------------------- + + +extension WebURL: Equatable, Hashable, Comparable { + + public static func == (lhs: Self, rhs: Self) -> Bool { + lhs.utf8.withUnsafeBufferPointer { lhsBuffer in + rhs.utf8.withUnsafeBufferPointer { rhsBuffer in + (lhsBuffer.baseAddress == rhsBuffer.baseAddress && lhsBuffer.count == rhsBuffer.count) + || lhsBuffer.elementsEqual(rhsBuffer) + } + } + } + + public func hash(into hasher: inout Hasher) { + utf8.withUnsafeBufferPointer { buffer in + hasher.combine(bytes: UnsafeRawBufferPointer(buffer)) + } + } + + public static func < (lhs: Self, rhs: Self) -> Bool { + lhs.utf8.withUnsafeBufferPointer { lhsBuffer in + rhs.utf8.withUnsafeBufferPointer { rhsBuffer in + lhsBuffer.lexicographicallyPrecedes(rhsBuffer) + } + } + } +} + +extension WebURL: CustomStringConvertible, LosslessStringConvertible { + + public var description: String { + serialized + } +} + +extension WebURL: Codable { + + public init(from decoder: Decoder) throws { + let container = try decoder.singleValueContainer() + guard let decoded = WebURL(try container.decode(String.self)) else { + throw DecodingError.dataCorruptedError(in: container, debugDescription: "Invalid URL") + } + self = decoded + } + + public func encode(to encoder: Encoder) throws { + var container = encoder.singleValueContainer() + try container.encode(serialized) + } +} + + +// -------------------------------------------- +// MARK: - Properties +// -------------------------------------------- + + +extension WebURL { + + // Required by the parser. + @inlinable + internal var schemeKind: WebURL.SchemeKind { + storage.schemeKind + } +} + +extension WebURL { + + /// The string representation of this URL. + /// + public var serialized: String { + String(decoding: utf8, as: UTF8.self) + } + + /// The string representation of this URL, excluding the URL's fragment. + /// + public var serializedExcludingFragment: String { + utf8.withUnsafeBufferPointer { String(decoding: $0[..(_ newScheme: S) throws where S: StringProtocol { + try utf8.setScheme(newScheme.utf8) + } + + /// Replaces this URL's `username` with the given string. + /// + /// Any code-points which are not valid for use in the URL's user-info section will be percent-encoded. + /// Setting this component may fail if the URL does not allow credentials. + /// + /// - seealso: `username` + /// + @inlinable + public mutating func setUsername(_ newUsername: S?) throws where S: StringProtocol { + try utf8.setUsername(newUsername?.utf8) + } + + /// Replaces this URL's `password` with the given string. + /// + /// Any code-points which are not valid for use in the URL's user-info section will be percent-encoded. + /// Setting this component may fail if the URL does not allow credentials. + /// + /// - seealso: `password` + /// + @inlinable + public mutating func setPassword(_ newPassword: S?) throws where S: StringProtocol { + try utf8.setPassword(newPassword?.utf8) + } + + /// Replaces this URL's `hostname` with the given string. + /// + /// When setting this component, the new contents will be parsed and normalized (e.g. domains will be percent-decoded and lowercased, and IP addresses + /// will be rewritten in their canonical form). Unlike setting other components, not all code-points which are invalid for use in hostnames will be percent-encoded. + /// If the new content contains a [forbidden host code-point][URL-fhcp], the operation will fail. + /// + /// [URL-fhcp]: https://url.spec.whatwg.org/#forbidden-host-code-point + /// + /// - seealso: `hostname` + /// + @inlinable + public mutating func setHostname(_ newHostname: S?) throws + where S: StringProtocol, S.UTF8View: BidirectionalCollection { + try utf8.setHostname(newHostname?.utf8) + } + + /// Replaces this URL's `port`. + /// + /// Setting this component may fail if the new value is out of range, or if the URL does not support port numbers. + /// If the URL has a "special" scheme, setting the port to its known default value will remove the port. + /// + /// - seealso: `port` + /// + public mutating func setPort(_ newPort: Int?) throws { + guard let newPort = newPort else { + try storage.withUnwrappedMutableStorage( + { small in small.setPort(to: nil) }, + { large in large.setPort(to: nil) } + ) + return + } + guard let uint16Port = UInt16(exactly: newPort) else { + throw URLSetterError.portValueOutOfBounds + } + try storage.withUnwrappedMutableStorage( + { small in small.setPort(to: uint16Port) }, + { large in large.setPort(to: uint16Port) } + ) + } + + /// Replaces this URL's `path` with the given string. + /// + /// When setting this component, the given path string will be lexically simplified, and any code-points in the path's components that are not valid + /// for use will be percent-encoded. Setting this component will fail if the URL is non-hierarchical (see `WebURL.cannotBeABase` for more information). + /// + /// - seealso: `path` + /// + @inlinable + public mutating func setPath(_ newPath: S) throws where S: StringProtocol, S.UTF8View: BidirectionalCollection { + try utf8.setPath(newPath.utf8) + } + + /// Replaces this URL's `query` with the given string. + /// + /// When setting this property, any code-points which are not valid for use in the URL's query will be percent-encoded. + /// Note that the set of code-points which are valid depends on the URL's `scheme`. + /// + /// - seealso: `query` + /// + @inlinable + public mutating func setQuery(_ newQuery: S?) where S: StringProtocol { + utf8.setQuery(newQuery?.utf8) + } + + /// Replaces this URL's `fragment` with the given string. + /// + /// When setting this property, any code-points which are not valid for use in the URL's fragment will be percent-encoded. + /// + /// - seealso: `fragment` + /// + @inlinable + public mutating func setFragment(_ newFragment: S?) where S: StringProtocol { + utf8.setFragment(newFragment?.utf8) + } +} diff --git a/Sources/WebURLTestSupport/IPAddressUtils.swift b/Sources/WebURLTestSupport/IPAddressUtils.swift new file mode 100644 index 000000000..b71e79b75 --- /dev/null +++ b/Sources/WebURLTestSupport/IPAddressUtils.swift @@ -0,0 +1,298 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import WebURL + +extension IPv4Address { + public enum Utils {} +} + +extension IPv4Address.Utils { + + // Random addresses. + + /// Returns a random 32-bit number, which may be interpreted as an IPv4 network address. + /// + public static func randomAddress() -> UInt32 { + var rng = SystemRandomNumberGenerator() + return randomAddress(using: &rng) + } + + /// Returns a random 32-bit number, which may be interpreted as an IPv4 network address. + /// + public static func randomAddress(using rng: inout RNG) -> UInt32 { + return .random(in: 0 ... .max, using: &rng) + } + + // Random strings. + + public enum Format: CaseIterable { + case a + case ab + case abc + case abcd + } + + public enum PieceRadix: CaseIterable { + case octal + case decimal + case hex + } + + /// Generates a random IP address string representing the given address. + /// + /// - parameters: + /// - address: The address to serialize + /// - allowedFormats: The set of allowed shorthand formats which may be produced. + /// One of these will be selected at random. + /// - allowedRadixes: The set of allowed radixes for pieces of the chosen format. + /// The radix for each piece will be selected at random from this list. + /// + /// Example representations of the address `123456789`: + /// ``` + /// 7.0x5b.0xcd.0x15 (dec/hex/hex/hex) + /// 07.0x5b.52501 (oct/hex/dec) + /// 7.0133.0146425 (dec/oct/oct) + /// 7.6016277 (dec/dec) + /// 07.0133.0315.025 (oct/oct/oct/oct) + /// 7.0x5b.205.21 (dec/hex/dec/dec) + /// 0726746425 (oct) + /// ``` + /// + public static func randomString( + address: UInt32, + allowedFormats: [Format] = Format.allCases, + allowedRadixes: [PieceRadix] = PieceRadix.allCases + ) -> String { + var rng = SystemRandomNumberGenerator() + return randomString( + address: address, allowedFormats: allowedFormats, allowedRadixes: allowedRadixes, using: &rng + ) + } + + /// Generates a random IP address string representing the given address. + /// + /// - seealso: `randomString(address:allowedFormats:allowedRadixes:)` + /// + public static func randomString( + address: UInt32, + allowedFormats: [Format] = Format.allCases, + allowedRadixes: [PieceRadix] = PieceRadix.allCases, + using rng: inout RNG + ) -> String { + + func formatPiece(piece: B, radix: PieceRadix) -> String { + switch radix { + case .octal: return "0" + String(piece, radix: 8) + case .decimal: return String(piece, radix: 10) + case .hex: return "0x" + String(piece, radix: 16) + } + } + // swift-format-ignore + switch allowedFormats.randomElement(using: &rng)! { + case .a: + let a = address + return formatPiece(piece: a, radix: allowedRadixes.randomElement(using: &rng)!) + case .ab: + let a = UInt8((address & 0b11111111_00000000_00000000_00000000) >> 24) + let b = UInt32((address & 0b00000000_11111111_11111111_11111111)) + return formatPiece(piece: a, radix: allowedRadixes.randomElement(using: &rng)!) + "." + + formatPiece(piece: b, radix: allowedRadixes.randomElement(using: &rng)!) + case .abc: + let a = UInt8((address & 0b11111111_00000000_00000000_00000000) >> 24) + let b = UInt8((address & 0b00000000_11111111_00000000_00000000) >> 16) + let c = UInt16((address & 0b00000000_00000000_11111111_11111111)) + return formatPiece(piece: a, radix: allowedRadixes.randomElement(using: &rng)!) + "." + + formatPiece(piece: b, radix: allowedRadixes.randomElement(using: &rng)!) + "." + + formatPiece(piece: c, radix: allowedRadixes.randomElement(using: &rng)!) + case .abcd: + let a = UInt8((address & 0b11111111_00000000_00000000_00000000) >> 24) + let b = UInt8((address & 0b00000000_11111111_00000000_00000000) >> 16) + let c = UInt8((address & 0b00000000_00000000_11111111_00000000) >> 8) + let d = UInt8((address & 0b00000000_00000000_00000000_11111111)) + return formatPiece(piece: a, radix: allowedRadixes.randomElement(using: &rng)!) + "." + + formatPiece(piece: b, radix: allowedRadixes.randomElement(using: &rng)!) + "." + + formatPiece(piece: c, radix: allowedRadixes.randomElement(using: &rng)!) + "." + + formatPiece(piece: d, radix: allowedRadixes.randomElement(using: &rng)!) + } + } +} + +// MARK: - IPv6 + +extension IPv6Address { + public enum Utils {} +} + +extension IPv6Address.Utils { + + // Random addresses. + + /// Generates a random 128-bit number, which may be interpreted as an IPv6 network address. + /// + /// At random, a series of 16-bit pieces are set to 0, in order to prompt a compressed serialization format. + /// There is no bias to produce more addresses in the IPv4 range. + /// + public static func randomAddress() -> IPv6Address.Pieces { + var rng = SystemRandomNumberGenerator() + let injectCompression = Bool.random(using: &rng) + return randomAddress(limitedToIPv4: false, injectCompression: injectCompression, using: &rng) + } + + /// Generates a random 128-bit number, which may be interpreted as an IPv6 network address. + /// + /// - parameters: + /// - limitedToIPv4: If `true`, the resulting addresses are limited to the IPv4 range. + /// - injectCompression: If `true`, a random series of 16-bit pieces are set to 0, in order to prompt a compressed serialization format. + /// - rng: The `RandomNumberGenerator` to use. + /// + public static func randomAddress( + limitedToIPv4: Bool, + injectCompression: Bool, + using rng: inout RNG + ) -> IPv6Address.Pieces { + + switch limitedToIPv4 { + case true: + var randomBytes = (UInt64(0), UInt64.random(in: 0...UInt64(UInt32.max), using: &rng).bigEndian) + return withUnsafeBytes(of: &randomBytes) { $0.load(as: IPv6Address.Pieces.self) } + + case false: + var randomBytes = (UInt64.random(in: 0 ... .max, using: &rng), UInt64.random(in: 0 ... .max, using: &rng)) + return withUnsafeMutableBytes(of: &randomBytes) { rawPtr in + if injectCompression { + // Compress a random range by setting bytes to 0. + // The range should be aligned to a 16-bit piece, be a multiple of 16 bits in length, + // and be at least 2 pieces long to ensure it results in a compressed serialization format. + let compressStart = Int.random(in: 0..<6) + let compressEnd = Int.random(in: (compressStart + 2)..<9) + for x in (compressStart * 2)..<(compressEnd * 2) { + rawPtr[x] = 0 + } + } + return rawPtr.load(as: IPv6Address.Pieces.self) + } + } + } + + // Random strings. + + /// Generates a random IPv6 address and random serialization. + /// + /// The goal is for this function to return pretty-much all of the addresses our IPv6Address parser supports. + /// As a rough guide, assuming the RNG gives random `Bool`s with 50:50 chance and all `UInt64`s are generated with equal probability: + /// + /// - 50% chance to get an address in the IPv4 range + /// - 25% chance that it gets formatted as an IPv4 address ("::192.168.0.1") + /// - 25% chance that it gets formatted as an IPv6 address ("::c0a8:1") + /// - 50% chance to get an address beyond the IPv4 range + /// - 25% chance that some region of it is compressed + /// - 12.5% chance that it is actually formatted as a compressed address ("ff08::c80a") + /// - 12.5% chance that it contains a string of zeroes ("ff08:0:0:0:0:0:0:c80a") + /// - 25% chance that it is not compressed + /// + /// This may be skewed a bit - some generated addresses will already be compressed, regardless of our + /// randomised toggle to inject compressable pieces. + /// + public static func randomString() -> (IPv6Address.Pieces, String) { + var rng = SystemRandomNumberGenerator() + let address = randomAddress( + limitedToIPv4: Bool.random(using: &rng), + injectCompression: Bool.random(using: &rng), + using: &rng + ) + let string = randomString( + address: address, + allowIPv4Addresses: true, + mayCompress: true, + using: &rng + ) + return (address, string) + } + + /// Generates a random IPv6 address string. + /// + /// The format of the resulting String is configured by the `allowIPv4Addresses` and `mayCompress` flags. These flags only describe + /// the allowed formats, and the actual format will be randomly selected given that configuration. + /// + /// - parameters: + /// - address: The address to serialize. + /// - allowIPv4Addresses: If `true`, and if `address` represents a value that could be written as an IPv4 address, the resulting String + /// may contain an embedded IPv4 address. + /// - mayCompress: If `true`, and if `address` has a string of zero-valued `UInt16` pieces, the resulting String may compress + /// them using the "::" notation. + /// - using: The random number generator to use. + /// + /// + public static func randomString( + address: IPv6Address.Pieces, + allowIPv4Addresses: Bool, + mayCompress: Bool, + using rng: inout RNG + ) -> String { + + // If the address can be represented as an IPv4 address, randomly decide to format it that way. + + let ipv4Address = withUnsafeBytes(of: address) { rawAddress -> UInt32? in + let ptr = rawAddress.bindMemory(to: UInt64.self) + guard ptr[0] == 0, ptr[1].bigEndian <= UInt64(UInt32.max) else { + return nil + } + return UInt32(exactly: ptr[1].bigEndian) + } + + if let ipv4Address = ipv4Address, allowIPv4Addresses && Bool.random(using: &rng) { + let ipv4Piece = IPv4Address.Utils.randomString( + address: ipv4Address, + allowedFormats: [.abcd], allowedRadixes: [.decimal], + using: &rng + ) + return "::" + ipv4Piece + } + + // Otherwise, serialise as an IPv6 address. Randomly compressing pieces which can be compressed or not. + + var addressString = "" + + withUnsafeBytes(of: address) { addressBytes in + let pieces = addressBytes.bindMemory(to: UInt16.self) + let compressedRange: (subrange: Range, length: Int) + if mayCompress && Bool.random(using: &rng) { + compressedRange = pieces._longestSubrange(equalTo: 0) + } else { + compressedRange = (subrange: 0..<0, length: 0) + } + + for i in pieces.startIndex.. 0 { + if !addressString.isEmpty { + addressString.removeLast() + } + addressString += "::" + } + for i in compressedRange.subrange.endIndex..= 0) + sections[sections.count - 1].reporters.append(contentsOf: repeatElement(nil, count: count)) + } + + /// Performs a test. The given closure is invoked with a mutable `Reporter` object and test index. + /// Refer to `Reporter`'s API to find out how to make testable assertions that will be logged in the test report. + /// + public mutating func performTest(_ test: (inout Reporter) throws -> Void) { + var reporter = Reporter() + do { + try test(&reporter) + } catch { + reporter.uncaughtError(error) + } + sections[sections.count - 1].reporters.append(reporter) + } + + /// Whether or not this report contains any unexpected test results (unexpected passes or failures). + /// + public var hasUnexpectedResults: Bool { + return sections.contains { section in + section.reporters.contains { reporter in + reporter?.actualResult != reporter?.expectedResult + } + } + } +} + +extension SimpleTestReport.Reporter { + + /// Captures the given test artefact for inclusion in the test report. + /// + public mutating func capture(key: String, _ object: Any) { + capturedData.append((key, object)) + } + + public mutating func fail(_ key: String? = nil) { + actualResult = .fail + key.map { failureKeys.append($0) } + } + + public mutating func expectEqual(_ lhs: T, _ rhs: T, _ key: String? = nil) { + if lhs != rhs { fail(key) } + } + + public mutating func expectTrue(_ lhs: Bool, _ key: String? = nil) { + if lhs == false { fail(key) } + } + + public mutating func expectFalse(_ lhs: Bool, _ key: String? = nil) { + if lhs == true { fail(key) } + } + + mutating fileprivate func uncaughtError(_ error: Error) { + capture(key: "__uncaught_error", error) + fail() + } +} + +extension SimpleTestReport { + + public func generateReport() -> String { + + // Gather cumulative stats. + var tests_count = 0 + var tests_xPass_aFail = 0 + var tests_xPass_aPass = 0 + var tests_xFail_aFail = 0 + var tests_xFail_aPass = 0 + var tests_skipped = 0 + + for reporter in sections.lazy.map({ $0.reporters }).joined() { + defer { tests_count += 1 } + guard let reporter = reporter else { + tests_skipped += 1 + continue + } + switch (reporter.expectedResult, reporter.actualResult) { + case (.pass, .pass): tests_xPass_aPass += 1 + case (.fail, .fail): tests_xFail_aFail += 1 + case (.pass, .fail): tests_xPass_aFail += 1 + case (.fail, .pass): tests_xFail_aPass += 1 + } + } + + var output = "" + print( + """ + --------------------------------------------- + --------------------------------------------- + \(tests_xPass_aFail + tests_xFail_aPass) tests failed (out of \(tests_count)). + --------------------------------------------- + Pass: \(tests_xPass_aPass + tests_xFail_aPass) (\(tests_xPass_aPass) expected) + Fail: \(tests_xFail_aFail + tests_xPass_aFail) (\(tests_xFail_aFail) expected) + \(tests_skipped) Tests skipped. + --------------------------------------------- + + """, to: &output) + + func printLine() { + print(String(repeating: "=", count: 30), to: &output) + } + + var testNumber = 0 + for section in sections { + // Only print the section name if it contains an unexpected result. + var hasPrintedName = false + func printSectionNameIfNeeded() { + if !hasPrintedName { + if let sectionName = section.name { + printLine() + print("### \(sectionName) ###", to: &output) + printLine() + print("", to: &output) + } + hasPrintedName = true + } + } + + for reporter in section.reporters { + defer { testNumber += 1 } + guard let reporter = reporter, reporter.actualResult != reporter.expectedResult else { continue } + printSectionNameIfNeeded() + + print("[\(testNumber)]:", to: &output) + print("", to: &output) + print("Expected: \(reporter.expectedResult). Actual: \(reporter.actualResult)", to: &output) + + if !reporter.failureKeys.isEmpty { + print("", to: &output) + print("Failed checks:", to: &output) + reporter.failureKeys.forEach { + print("- \($0)", to: &output) + } + } + if !reporter.capturedData.isEmpty { + print("", to: &output) + print("Captured data:", to: &output) + reporter.capturedData.forEach { + let (key, value) = $0 + print("- \(key):", to: &output) + print(value, to: &output) + print("", to: &output) + } + } + + print("", to: &output) + } + } + return output + } +} diff --git a/Sources/WebURLTestSupport/URLValues.swift b/Sources/WebURLTestSupport/URLValues.swift new file mode 100644 index 000000000..20ba1ae7f --- /dev/null +++ b/Sources/WebURLTestSupport/URLValues.swift @@ -0,0 +1,157 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import WebURL + +/// A property exposed by the WHATWG URL model. +/// +public enum URLModelProperty: String, CaseIterable, Equatable, Hashable, CodingKey { + case href = "href" + case origin = "origin" + case `protocol` = "protocol" + case username = "username" + case password = "password" + case host = "host" + case hostname = "hostname" + case port = "port" + case pathname = "pathname" + case search = "search" + case hash = "hash" + + public var name: String { + stringValue + } +} + +/// A storage type for the properties exposed in the WHATWG URL model. +/// +public struct URLValues: Equatable, Hashable { + private var href: String + private var `protocol`: String + private var username: String + private var password: String + private var host: String + private var hostname: String + private var port: String + private var pathname: String + private var search: String + private var hash: String + // Unfortunately, the WPT constructor tests often omit the origin 😐. + // "The origin key may be missing. In that case, the API’s origin attribute is not tested." + public var origin: String? + + public subscript(property: URLModelProperty) -> String? { + get { + switch property { + case .href: return href + case .origin: return origin + case .protocol: return self.protocol + case .username: return username + case .password: return password + case .host: return host + case .hostname: return hostname + case .port: return port + case .pathname: return pathname + case .search: return search + case .hash: return hash + } + } + } + + public init( + href: String, origin: String?, protocol: String, username: String, password: String, host: String, + hostname: String, port: String, pathname: String, search: String, hash: String + ) { + self.href = href + self.origin = origin + self.protocol = `protocol` + self.username = username + self.password = password + self.host = host + self.hostname = hostname + self.port = port + self.pathname = pathname + self.search = search + self.hash = hash + } +} + +extension URLValues: CustomStringConvertible { + + public var description: String { + return """ + { + .href: \(href) + .origin: \(origin ?? "") + .protocol: \(`protocol`) + .username: \(username) + .password: \(password) + .host: \(host) + .hostname: \(hostname) + .port: \(port) + .pathname: \(pathname) + .search: \(search) + .hash: \(hash) + } + """ + } +} + + +extension WebURL.JSModel { + public var urlValues: URLValues { + return .init( + href: href, origin: origin, protocol: scheme, + username: username, password: password, + host: host, hostname: hostname, port: port, + pathname: pathname, search: search, hash: hash + ) + } +} + + +extension URLValues { + + public static func diff(_ lhs: URLValues?, _ rhs: URLValues?) -> [URLModelProperty] { + switch (lhs, rhs) { + case (.none, .none): return [] + case (.some, .none), (.none, .some): return URLModelProperty.allCases + case (.some(let lhs), .some(let rhs)): return rhs.allMismatchingURLProperties(comparedWith: lhs) + } + } + + /// The properties which must always be present, and always be tested. + private static var minimumPropertiesToDiff: [URLModelProperty] { + return [ + .href, + .protocol, + .username, .password, + .hostname, .port, .host, + .pathname, .search, .hash, + ] + } + + func allMismatchingURLProperties(comparedWith other: URLValues) -> [URLModelProperty] { + var results = [URLModelProperty]() + for property in Self.minimumPropertiesToDiff { + if self[property] != other[property] { + results.append(property) + } + } + if let origin = self.origin, let otherOrigin = other.origin, origin != otherOrigin { + results.append(.origin) + } + return results + } +} diff --git a/Sources/WebURLTestSupport/WPTConstructorTest+WebURLReportHarness.swift b/Sources/WebURLTestSupport/WPTConstructorTest+WebURLReportHarness.swift new file mode 100644 index 000000000..cf35766fd --- /dev/null +++ b/Sources/WebURLTestSupport/WPTConstructorTest+WebURLReportHarness.swift @@ -0,0 +1,81 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import WebURL + +extension WPTConstructorTest { + + /// A harness for running a series of `WPTConstructorTest`s with the `WebURL` parser and accumulating the results in a `SimpleTestReport`. + /// + public struct WebURLReportHarness { + public private(set) var report = SimpleTestReport() + public private(set) var entriesSeen = 0 + public let expectedFailures: Set + + public init(expectedFailures: Set = []) { + self.expectedFailures = expectedFailures + } + } +} + +extension WPTConstructorTest.WebURLReportHarness: WPTConstructorTest.Harness { + + public func parseURL(_ input: String, base: String?) -> URLValues? { + return WebURL.JSModel(input, base: base)?.urlValues + } + + public mutating func reportComment(_ comment: String) { + entriesSeen += 1 + report.markSection(comment) + } + + public mutating func reportTestResult(_ result: WPTConstructorTest.Result) { + entriesSeen += 1 + report.performTest { reporter in + reporter.capture(key: "Testcase", result.testcase) + reporter.capture(key: "Result", result.propertyValues?.description ?? "nil") + + if expectedFailures.contains(result.testNumber) { + reporter.expectedResult = .fail + } + if !result.failures.isEmpty { + var remainingFailures = result.failures + if let _ = remainingFailures.remove(.baseURLFailedToParse) { + reporter.fail("base URL failed to parse") + } + if let _ = remainingFailures.remove(.inputDidNotFailWhenUsedAsBaseURL) { + reporter.fail("Test is XFAIL, but input string parsed successfully without a base URL") + } + if let _ = remainingFailures.remove(.unexpectedFailureToParse) { + reporter.fail("Unexpected failure to parse") + } + if let _ = remainingFailures.remove(.unexpectedSuccessfulParse) { + reporter.fail("Unexpected successful parsing") + } + if let _ = remainingFailures.remove(.propertyMismatch) { + for mismatch in URLValues.diff(result.testcase.expectedValues, result.propertyValues) { + reporter.fail(mismatch.name) + } + } + if let _ = remainingFailures.remove(.notIdempotent) { + reporter.fail("") + } + if !remainingFailures.isEmpty { + assertionFailure("Unhandled failure condition") + reporter.fail("unknown reason") + } + } + } + } +} diff --git a/Sources/WebURLTestSupport/WPTConstructorTest.swift b/Sources/WebURLTestSupport/WPTConstructorTest.swift new file mode 100644 index 000000000..9f377e9fe --- /dev/null +++ b/Sources/WebURLTestSupport/WPTConstructorTest.swift @@ -0,0 +1,352 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +public enum WPTConstructorTest {} + + +// -------------------------------------------- +// MARK: - Test file data model +// -------------------------------------------- + + +extension WPTConstructorTest { + + /// The contents of a WPT URL constructor test file. + /// + public struct TestFile: Codable { + public var tests: [FileEntry] + + public init(from decoder: Decoder) throws { + self.tests = try decoder.singleValueContainer().decode([FileEntry].self) + } + + public func encode(to encoder: Encoder) throws { + var container = encoder.singleValueContainer() + try container.encode(tests) + } + } + + /// An entry in a WPT URL constructor test file; either a comment or a constructor test case. + /// + public enum FileEntry: Codable { + case comment(String) + case testcase(Testcase) + + public init(from decoder: Decoder) throws { + if let testcase = try? Testcase(from: decoder) { + self = .testcase(testcase) + } else { + self = .comment(try decoder.singleValueContainer().decode(String.self)) + } + } + + public func encode(to encoder: Encoder) throws { + switch self { + case .comment(let header): + var container = encoder.singleValueContainer() + try container.encode(header) + case .testcase(let testcase): + try testcase.encode(to: encoder) + } + } + } + + /// A data structure containing information for a WPT URL constructor test. + /// + /// Details from https://github.com/web-platform-tests/wpt/blob/master/url/README.md as of `09d8830`: + /// + /// The keys for each test case are: + /// + /// - `base`: an absolute URL as a string whose parsing without a base of its own must succeed. + /// This key is always present, and may have a value like "about:blank" when `input` is an absolute URL. + /// - `input`: an URL as a string to be parsed with `base` as its base URL. + /// - Either: + /// - `failure` with the value `true`, indicating that parsing `input` should return failure, + /// - or `href`, `origin`, `protocol`, `username`, `password`, `host`, `hostname`, `port`, `pathname`, `search`, and `hash` + /// with string values; indicating that parsing `input` should return an URL record and that the getters of each corresponding attribute in that + /// URL’s API should return the corresponding value. + /// + /// The `origin` key may be missing. In that case, the API’s `origin` attribute is not tested. + /// + public struct Testcase: Equatable, Hashable, Codable { + public var input: String + public var base: String + public var expectedValues: URLValues? = nil + + public var failure: Bool { + return expectedValues == nil + } + + public init(input: String, base: String, expectedValues: URLValues?) { + self.input = input + self.base = base + self.expectedValues = expectedValues + } + + enum CodingKeys: String, CodingKey { + case input = "input" + case base = "base" + + case failure = "failure" + + case href = "href" + case origin = "origin" + case `protocol` = "protocol" + case username = "username" + case password = "password" + case host = "host" + case hostname = "hostname" + case port = "port" + case pathname = "pathname" + case search = "search" + case hash = "hash" + } + + public init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + self.input = try container.decode(String.self, forKey: .input) + self.base = try container.decode(String.self, forKey: .base) + + if let xfail = try? container.decode(Bool.self, forKey: .failure) { + assert(xfail, "A failure key means the test is expected to fail; is always 'true' if present") + self.expectedValues = nil + } else { + self.expectedValues = URLValues( + href: try container.decode(String.self, forKey: .href), + origin: try? container.decode(String.self, forKey: .origin), + protocol: try container.decode(String.self, forKey: .protocol), + username: try container.decode(String.self, forKey: .username), + password: try container.decode(String.self, forKey: .password), + host: try container.decode(String.self, forKey: .host), + hostname: try container.decode(String.self, forKey: .hostname), + port: try container.decode(String.self, forKey: .port), + pathname: try container.decode(String.self, forKey: .pathname), + search: try container.decode(String.self, forKey: .search), + hash: try container.decode(String.self, forKey: .hash) + ) + } + } + + public func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + try container.encode(input, forKey: .input) + try container.encode(base, forKey: .base) + if let expectedValues = expectedValues { + try container.encodeIfPresent(expectedValues[.href], forKey: .href) + try container.encodeIfPresent(expectedValues[.origin], forKey: .origin) + try container.encodeIfPresent(expectedValues[.protocol], forKey: .protocol) + try container.encodeIfPresent(expectedValues[.username], forKey: .username) + try container.encodeIfPresent(expectedValues[.password], forKey: .password) + try container.encodeIfPresent(expectedValues[.host], forKey: .host) + try container.encodeIfPresent(expectedValues[.hostname], forKey: .hostname) + try container.encodeIfPresent(expectedValues[.port], forKey: .port) + try container.encodeIfPresent(expectedValues[.pathname], forKey: .pathname) + try container.encodeIfPresent(expectedValues[.search], forKey: .search) + try container.encodeIfPresent(expectedValues[.hash], forKey: .hash) + } else { + try container.encode(true, forKey: .failure) + } + } + } +} + +extension WPTConstructorTest.Testcase: CustomStringConvertible { + + public var description: String { + guard let expectedValues = expectedValues else { + return """ + { + .input: \(input) + .base: \(base) + -- expected failure -- + } + """ + } + return """ + { + .input: \(input) + .base: \(base) + + .href: \(expectedValues[.href]!) + .origin: \(expectedValues.origin ?? "") + .protocol: \(expectedValues[.protocol]!) + .username: \(expectedValues[.username]!) + .password: \(expectedValues[.password]!) + .host: \(expectedValues[.host]!) + .hostname: \(expectedValues[.hostname]!) + .port: \(expectedValues[.port]!) + .pathname: \(expectedValues[.pathname]!) + .search: \(expectedValues[.search]!) + .hash: \(expectedValues[.hash]!) + } + """ + } +} + + +// -------------------------------------------- +// MARK: - Test harness +// -------------------------------------------- + + +extension WPTConstructorTest { + + /// An object which runs a set of WPT URL constructor tests and processes its results. + /// + public typealias Harness = _WPTConstructorTest_Harness +} + +public protocol _WPTConstructorTest_Harness { + + /// Parses the given input string, relative to the given base URL, in accordance with the WHATWG URL Standard, and returns + /// a `URLValues` containing values for the properties specified in the standard. If parsing fails, returns `nil`. + /// + func parseURL(_ input: String, base: String?) -> URLValues? + + /// A callback that is invoked when the harness encounters a comment in the constructor test file. + /// + mutating func reportComment(_ comment: String) + + /// A callback that is invoked after the harness executes a WPT URL constructor testcase and provides its results. + /// + /// This callback is invoked with the results of each testcase that is executed, including those whose results are not unexpected. + /// + mutating func reportTestResult(_ result: WPTConstructorTest.Result) +} + +extension WPTConstructorTest.Harness { + + public nonmutating func reportComment(_ comment: String) { + // No-op. + } +} + +extension WPTConstructorTest { + + /// The result of executing a WPT URL constructor test. + /// + public struct Result: Equatable, Hashable { + + /// The number of tests that have been run prior to this test. + public var testNumber: Int + + /// The test that was run. + public var testcase: Testcase + + /// The URL record properties formed by parsing the `input` and `base` values specified in `testcase`. + public var propertyValues: URLValues? + + /// The set of test steps that failed. + /// + /// If empty, the URL parser appeared to behave in accordance with the URL standard for the `input` and `base` values in `testcase`. + /// + public var failures: Failures + + public init(testNumber: Int, testcase: Testcase, propertyValues: URLValues?, failures: Failures) { + self.testNumber = testNumber + self.testcase = testcase + self.propertyValues = propertyValues + self.failures = failures + } + } + + /// The set of test steps which a WPT URL constructor test failed (if any). + /// + public struct Failures: OptionSet, Equatable, Hashable { + public var rawValue: UInt8 + public init(rawValue: UInt8) { + self.rawValue = rawValue + } + + /// No failures. + public static var noFailures: Self { .init(rawValue: 0) } + + /// Parsing the base URL must always succeed. + public static var baseURLFailedToParse: Self { .init(rawValue: 1 << 0) } + + /// A URL which fails to parse with a valid base must also fail to parse with no base (i.e. when used as a base itself). + public static var inputDidNotFailWhenUsedAsBaseURL: Self { .init(rawValue: 1 << 1) } + + /// URL failed to parse but wasn't an expected failure. + public static var unexpectedFailureToParse: Self { .init(rawValue: 1 << 2) } + + /// URL was parsed successfully parsed, but was expected to fail. + public static var unexpectedSuccessfulParse: Self { .init(rawValue: 1 << 3) } + + /// The parsed URL's properties do not match the expected values. + public static var propertyMismatch: Self { .init(rawValue: 1 << 4) } + + /// The URL was parsed, serialised, and re-parsed, and produced a different result the second time around. + public static var notIdempotent: Self { .init(rawValue: 1 << 5) } + } +} + +extension WPTConstructorTest.Harness { + + /// Runs the WPT URL constructor tests in the given `TestFile`. + /// + public mutating func runTests(_ testFile: WPTConstructorTest.TestFile) { + runTests(testFile.tests) + } + + /// Runs the given collection of WPT URL constructor tests. + /// + public mutating func runTests(_ tests: [WPTConstructorTest.FileEntry]) { + + var index = 0 + for entry in tests { + switch entry { + case .comment(let comment): + reportComment(comment) + case .testcase(let testcase): + var result = WPTConstructorTest.Result( + testNumber: index, testcase: testcase, propertyValues: nil, failures: .noFailures + ) + defer { + reportTestResult(result) + index += 1 + } + // Parsing the base URL must always succeed. + if parseURL(testcase.base, base: nil) == nil { + result.failures.insert(.baseURLFailedToParse) + } + // If failure = true, parsing "about:blank" against input must fail. + if testcase.failure && parseURL("about:blank", base: testcase.input) != nil { + result.failures.insert(.inputDidNotFailWhenUsedAsBaseURL) + } + guard let parsedVals = parseURL(testcase.input, base: testcase.base) else { + if !testcase.failure { result.failures.insert(.unexpectedFailureToParse) } + continue + } + if let expectedValues = testcase.expectedValues { + if !parsedVals.allMismatchingURLProperties(comparedWith: expectedValues).isEmpty { + result.failures.insert(.propertyMismatch) + } + } else { + result.failures.insert(.unexpectedSuccessfulParse) + } + // Check idempotence: parse the href again and check all properties. + var serialized = parsedVals[.href]! + serialized.makeContiguousUTF8() + guard let reparsed = parseURL(serialized, base: nil) else { + result.failures.insert(.notIdempotent) + continue + } + if !parsedVals.allMismatchingURLProperties(comparedWith: reparsed).isEmpty { + result.failures.insert(.notIdempotent) + } + } + } + } +} diff --git a/Sources/WebURLTestSupport/WPTSetterTest+WebURLReportHarness.swift b/Sources/WebURLTestSupport/WPTSetterTest+WebURLReportHarness.swift new file mode 100644 index 000000000..cb0237d72 --- /dev/null +++ b/Sources/WebURLTestSupport/WPTSetterTest+WebURLReportHarness.swift @@ -0,0 +1,103 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import WebURL + +extension WPTSetterTest { + + /// A harness for running a series of WPT URL setter tets with the `WebURL` JS model and accumulating the results in a `SimpleTestReport`. + /// + public struct WebURLReportHarness { + public private(set) var report = SimpleTestReport() + public private(set) var entriesSeen = 0 + + public init() {} + } +} + +extension WPTSetterTest.WebURLReportHarness: WPTSetterTest.Harness { + + public typealias URLType = WebURL.JSModel + + public func parseURL(_ input: String) -> URLType? { + WebURL(input)?.jsModel + } + + public func setValue(_ newValue: String, forProperty property: URLModelProperty, on url: inout URLType) { + switch property { + case .href: + url.href = newValue + case .protocol: + url.scheme = newValue + case .username: + url.username = newValue + case .password: + url.password = newValue + case .hostname: + url.hostname = newValue + case .port: + url.port = newValue + case .pathname: + url.pathname = newValue + case .search: + url.search = newValue + case .hash: + url.hash = newValue + case .origin: + assertionFailure("The URL Standard does not allow setting the origin directly") + case .host: + break // 'host' setter is not implemented. + } + } + + public func urlValues(_ url: URLType) -> URLValues { + url.urlValues + } + + public mutating func reportTestResult(_ result: WPTSetterTest.Result) { + if case .host = result.property { + return // 'host' setter is not implemented. Pass/failure isn't meaningful. + } + entriesSeen += 1 + report.performTest { reporter in + reporter.capture(key: "Property", result.property.name) + reporter.capture(key: "Testcase", result.testcase) + if let actualValues = result.resultingValues { + reporter.capture(key: "Result", actualValues) + } + + if !result.failures.isEmpty { + var remainingFailures = result.failures + + if let _ = remainingFailures.remove(.failedToParse) { + reporter.fail("Starting URL failed to parse") + } + if let _ = remainingFailures.remove(.propertyMismatches), let actualValues = result.resultingValues { + for (property, expectedValue) in result.testcase.expected { + if actualValues[property] != expectedValue { + reporter.fail(property.name) + } + } + } + if let _ = remainingFailures.remove(.notIdempotent) { + reporter.fail("") + } + if !remainingFailures.isEmpty { + assertionFailure("Unhandled failure condition") + reporter.fail("unknown reason") + } + } + } + } +} diff --git a/Sources/WebURLTestSupport/WPTSetterTest.swift b/Sources/WebURLTestSupport/WPTSetterTest.swift new file mode 100644 index 000000000..44dc7e505 --- /dev/null +++ b/Sources/WebURLTestSupport/WPTSetterTest.swift @@ -0,0 +1,210 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +public enum WPTSetterTest {} + + +// -------------------------------------------- +// MARK: - Test file data model +// -------------------------------------------- + + +extension WPTSetterTest { + + /// The contents of a WPT URL setter test file. + /// + public struct TestFile: Codable { + public var tests: [URLModelProperty: [Testcase]] + + public init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: URLModelProperty.self) + var tests = [URLModelProperty: [Testcase]]() + for key in URLModelProperty.allCases { + let testcases = try container.decodeIfPresent([Testcase].self, forKey: key) + tests[key] = testcases + } + self.tests = tests + } + + public func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: URLModelProperty.self) + for (property, testcases) in tests { + try container.encode(testcases, forKey: property) + } + } + } + + /// A WPT URL setter test for a particular property, consisting of a starting URL, new value for the property, and expected property values after the set operation. + /// + public struct Testcase: Equatable, Hashable, Codable { + public var comment: String? + public var href: String + public var new_value: String + public var expected: [URLModelProperty: String] + + enum CodingKeys: String, CodingKey { + case comment = "comment" + case href = "href" + case new_value = "new_value" + case expected = "expected" + } + + public init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + self.comment = try container.decodeIfPresent(String.self, forKey: .comment) + self.href = try container.decode(String.self, forKey: .href) + self.new_value = try container.decode(String.self, forKey: .new_value) + let expectedValuesContainer = try container.nestedContainer(keyedBy: URLModelProperty.self, forKey: .expected) + self.expected = [:] + for prop in expectedValuesContainer.allKeys { + expected[prop] = try expectedValuesContainer.decode(String.self, forKey: prop) + } + } + + public func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + try container.encodeIfPresent(comment, forKey: .comment) + try container.encode(href, forKey: .href) + try container.encode(new_value, forKey: .new_value) + var expectedValuesContainer = container.nestedContainer(keyedBy: URLModelProperty.self, forKey: .expected) + for (property, value) in expected { + try expectedValuesContainer.encode(value, forKey: property) + } + } + } +} + + +// -------------------------------------------- +// MARK: - Test harness +// -------------------------------------------- + + +extension WPTSetterTest { + + /// An object which runs a set of WPT URL setter tests and processes their results. + /// + public typealias Harness = _WPTSetterTest_Harness +} + +public protocol _WPTSetterTest_Harness { + + associatedtype URLType + + /// Parses the given input string in accordance with the WHATWG URL Standard, and returns a `URLType` with the result. If parsing fails, returns `nil`. + /// + func parseURL(_ input: String) -> URLType? + + /// Performs the 'set' operation, setting the given property to the given value on the given url object. + /// + func setValue(_ newValue: String, forProperty property: URLModelProperty, on url: inout URLType) + + /// Extracts the `URLValues` from the given url object. + /// + func urlValues(_: URLType) -> URLValues + + /// A callback that is invoked after the harness executes a URL setter test, providing the results of that test. + /// This callback is invoked with the results of every test that is executed, including those whose results are not unexpected. + /// + mutating func reportTestResult(_ result: WPTSetterTest.Result) +} + +extension WPTSetterTest { + + /// The result of executing a WPT URL setter test. + /// + public struct Result: Equatable, Hashable { + + /// The property that was set by this test. + public var property: URLModelProperty + + /// The test that was run. + public var testcase: Testcase + + /// The URL's values after setting `property` to the value given by `testcase`. + public var resultingValues: URLValues? + + /// The set of test steps that failed. + /// + /// If empty, the URL object appeared to behave in accordance with the URL standard. + /// + public var failures: Failures + } + + /// The set of test steps which a WPT URL setter test failed (if any). + /// + public struct Failures: OptionSet, Equatable, Hashable { + public var rawValue: UInt8 + public init(rawValue: UInt8) { + self.rawValue = rawValue + } + + /// No failures. + public static var noFailures: Self { .init(rawValue: 0) } + + /// The starting URL failed to parse. + public static var failedToParse: Self { .init(rawValue: 1 << 0) } + + /// The URL did not contain the expected values after setting. + public static var propertyMismatches: Self { .init(rawValue: 1 << 1) } + + /// After setting, the URL was serialised and re-parsed, and produced a different result the second time around. + public static var notIdempotent: Self { .init(rawValue: 1 << 2) } + } +} + +extension WPTSetterTest.Harness { + + /// Runs the WPT URL setter tests in the given `TestFile`. + /// + public mutating func runTests(_ testFile: WPTSetterTest.TestFile) { + runTests(testFile.tests) + } + + /// Runs the given collection of WPT URL setter tests. + /// + public mutating func runTests(_ tests: [URLModelProperty: [WPTSetterTest.Testcase]]) { + for (property, testcases) in tests { + for testcase in testcases { + var result = WPTSetterTest.Result( + property: property, testcase: testcase, resultingValues: nil, failures: .noFailures + ) + defer { reportTestResult(result) } + // 1. Parse the URL. + guard var url = parseURL(testcase.href) else { + result.failures.insert(.failedToParse) + return + } + // 2. Set the value. + setValue(testcase.new_value, forProperty: property, on: &url) + let values = urlValues(url) + result.resultingValues = values + // 3. Check all given keys against their expected values. + for (expected_key, expected_value) in testcase.expected { + if values[expected_key] != expected_value { + result.failures.insert(.propertyMismatches) + } + } + // 4. (Not in standard). Check that modified URL is idempotent WRT serialization. + guard + let reparsedValues = parseURL(values[.href]!).map({ urlValues($0) }), + values.allMismatchingURLProperties(comparedWith: reparsedValues).isEmpty + else { + result.failures.insert(.notIdempotent) + return + } + } + } + } +} diff --git a/Tests/LinuxMain.swift b/Tests/LinuxMain.swift new file mode 100644 index 000000000..3ef66fb52 --- /dev/null +++ b/Tests/LinuxMain.swift @@ -0,0 +1,19 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#error(""" + ----------------------------------------------------- + Please test with `swift test --enable-test-discovery` + ----------------------------------------------------- +""") diff --git a/Tests/WebURLTests/ASCIITests.swift b/Tests/WebURLTests/ASCIITests.swift new file mode 100644 index 000000000..6212ebd6f --- /dev/null +++ b/Tests/WebURLTests/ASCIITests.swift @@ -0,0 +1,225 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Checkit +import XCTest + +@testable import WebURL + +final class ASCIITests: XCTestCase { + + func testASCIIHexValue() { + // Test that hex digits have the appropriate character classes, + // that we can get the numeric value of a character and the character of a numeric value. + + // uppercase. + do { + for (numericValue, character) in "0123456789ABCDEF".enumerated() { + guard let asciiChar = ASCII(flatMap: character.asciiValue) else { + XCTFail("\(character) not recognized as ASCII") + continue + } + + XCTAssertTrue(asciiChar.isHexDigit) + XCTAssertTrue(asciiChar.isAlphaNumeric) + if numericValue < 10 { + XCTAssertTrue(asciiChar.isDigit) + } else { + XCTAssertTrue(asciiChar.isAlpha) + } + + XCTAssertEqual(asciiChar.hexNumberValue.map { Int($0) }, numericValue) + XCTAssertEqual(asciiChar, ASCII.uppercaseHexDigit(of: UInt8(numericValue))) + } + for invalidChar in [ASCII.G, .O, .X] { + XCTAssertFalse(invalidChar.isHexDigit) + XCTAssertNil(invalidChar.hexNumberValue) + + XCTAssertTrue(invalidChar.isAlpha) + XCTAssertTrue(invalidChar.isAlphaNumeric) + } + for cycledNumber in 16...UInt8.max { + XCTAssertEqual(ASCII.uppercaseHexDigit(of: cycledNumber).hexNumberValue, cycledNumber % 16) + } + } + + // lowercase. + do { + for (numericValue, character) in "0123456789abcdef".enumerated() { + guard let asciiChar = ASCII(flatMap: character.asciiValue) else { + XCTFail("\(character) not recognized as ASCII") + continue + } + + XCTAssertTrue(asciiChar.isHexDigit) + XCTAssertTrue(asciiChar.isAlphaNumeric) + if numericValue < 10 { + XCTAssertTrue(asciiChar.isDigit) + } else { + XCTAssertTrue(asciiChar.isAlpha) + } + + XCTAssertEqual(asciiChar.hexNumberValue.map { Int($0) }, numericValue) + XCTAssertEqual(asciiChar, ASCII.lowercaseHexDigit(of: UInt8(numericValue))) + } + for invalidChar in [ASCII.g, .o, .x] { + XCTAssertFalse(invalidChar.isHexDigit) + XCTAssertNil(invalidChar.hexNumberValue) + + XCTAssertTrue(invalidChar.isAlpha) + XCTAssertTrue(invalidChar.isAlphaNumeric) + } + for cycledNumber in 16...UInt8.max { + XCTAssertEqual(ASCII.lowercaseHexDigit(of: cycledNumber).hexNumberValue, cycledNumber % 16) + } + } + } + + func testASCIIDecimalValue() { + // Test that decimal digits have the appropriate character classes, + // that we can get the numeric value of a character and the character of a numeric value. + + for (numericValue, character) in "0123456789".enumerated() { + guard let asciiChar = ASCII(flatMap: character.asciiValue) else { + XCTFail("\(character) not recognized as ASCII") + continue + } + + XCTAssertTrue(asciiChar.isDigit) + XCTAssertTrue(asciiChar.isHexDigit) + XCTAssertFalse(asciiChar.isAlpha) + XCTAssertTrue(asciiChar.isAlphaNumeric) + + XCTAssertEqual(asciiChar.decimalNumberValue.map { Int($0) }, numericValue) + XCTAssertEqual(asciiChar, ASCII.decimalDigit(of: UInt8(numericValue))) + } + for invalidChar in [ASCII.A, .B, .C, .D, .E, .F, .G, .O, .X, .a, .b, .c, .d, .e, .f, .g, .o, .x] { + XCTAssertFalse(invalidChar.isDigit) + XCTAssertTrue(invalidChar.isAlpha) + XCTAssertTrue(invalidChar.isAlphaNumeric) + XCTAssertNil(invalidChar.decimalNumberValue) + } + for invalidNumber in 10...UInt8.max { + XCTAssertNil(ASCII.decimalDigit(of: invalidNumber)) + } + } + + func testASCIIDecimalPrinting() { + + var buf: [UInt8] = [0, 0, 0, 0, 0, 0, 0, 0] + + // UInt8. + buf.withUnsafeMutableBytes { buffer in + for num in (UInt8.min)...(UInt8.max) { + let bufferContentsCount = ASCII.writeDecimalString(for: num, to: buffer.baseAddress!) + XCTAssertEqualElements(buffer[.. 10 } + XCTAssertEqual(results_noheadmatch, [1, 3, 5, 7, 9]) + // Everything matches (trim everything). + let results_allmatch = [1, 3, 5, 7, 9, 11, 13, 15].trim { _ in true } + XCTAssertEqual(results_allmatch, []) + + // Both ends match, one element does not match (trim everything except that element). + let results_onematch = [2, 10, 12, 15, 20, 100].trim { $0.isMultiple(of: 2) } + XCTAssertEqual(results_onematch, [15]) + + // Both ends match, some string of >1 elements do not match (return that string). + let results_0 = [2, 10, 11, 15, 20, 21, 100].trim(where: { $0.isMultiple(of: 2) }) + XCTAssertEqual(results_0, [11, 15, 20, 21]) + } +} + +// Collection+longestRange + +extension AlgorithmsTestCase { + + func testCollectionLongestSubrange() { + + // Empty collection. + let results_empty = ([] as [Int]).longestSubrange { _ in true } + XCTAssertEqual(results_empty.subrange, 0..<0) + XCTAssertEqual(results_empty.length, 0) + + let range_basic = [1, 2, 4, 3, 2, 2, 2, 4, 5, 2, 2, 2, 2, 6, 7, 8] + + // No match (empty result). + let range_empty_result = range_basic.longestSubrange { $0 == 10 } + XCTAssertEqual(range_empty_result.subrange, 0..<0) + XCTAssertEqual(range_empty_result.length, 0) + // Single match. + let range_single_end_result = range_basic.longestSubrange { $0 == 8 } + XCTAssertEqual(range_single_end_result.subrange, 15..<16) + XCTAssertEqual(range_single_end_result.length, 1) + // Multiple matches, no length ties. + let range_basic_result = range_basic.longestSubrange { $0 == 2 } + XCTAssertEqual(range_basic_result.subrange, 9..<13) + XCTAssertEqual(range_basic_result.length, 4) + // Multiple matches, tied on length. + let range_tie_result = range_basic.longestSubrange { $0 == 4 } + XCTAssertEqual(range_tie_result.subrange, 2..<3) + XCTAssertEqual(range_tie_result.length, 1) + } +} diff --git a/Tests/WebURLTests/FormParametersTests.swift b/Tests/WebURLTests/FormParametersTests.swift new file mode 100644 index 000000000..747e16c23 --- /dev/null +++ b/Tests/WebURLTests/FormParametersTests.swift @@ -0,0 +1,555 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import XCTest + +@testable import WebURL + +final class FormEncodedQueryParametersTests: XCTestCase { + + func testDocumentationExamples() { + + // From documentation for `WebURL.formParams`: + do { + var url = WebURL("http://example.com/currency/convert?from=EUR&to=USD")! + XCTAssertEqual(url.formParams.from, "EUR") + XCTAssertFalse(url.storage.structure.queryIsKnownFormEncoded) + + url.formParams.from = "GBP" + XCTAssertEqual(url.serialized, "http://example.com/currency/convert?from=GBP&to=USD") + + url.formParams.amount = "20" + XCTAssertEqual(url.serialized, "http://example.com/currency/convert?from=GBP&to=USD&amount=20") + + url.formParams.to = "💵" + XCTAssertEqual(url.serialized, "http://example.com/currency/convert?from=GBP&to=%F0%9F%92%B5&amount=20") + + XCTAssertURLIsIdempotent(url) + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + + let expected = [ + ("from", "GBP"), + ("to", "💵"), + ("amount", "20"), + ] + for (i, (key, value)) in url.formParams.allKeyValuePairs.enumerated() { + XCTAssertEqual(key, expected[i].0) + XCTAssertEqual(value, expected[i].1) + } + } + // From documentation for 'contains', 'get': + do { + let url = WebURL("http://example.com?jalape\u{006E}\u{0303}os=2")! + XCTAssertEqual(url.serialized, "http://example.com/?jalapen%CC%83os=2") + + XCTAssertTrue(url.formParams.contains("jalape\u{006E}\u{0303}os")) + XCTAssertEqual(url.formParams.get("jalape\u{006E}\u{0303}os"), "2") + + XCTAssertFalse(url.formParams.contains("jalape\u{00F1}os")) + XCTAssertNil(url.formParams.get("jalape\u{00F1}os")) + + XCTAssert( + url.formParams.allKeyValuePairs.first(where: { $0.0 == "jalape\u{00F1}os" }) ?? ("", "") == ("jalapeños", "2") + ) + } + } + + func testGet_Contains() { + + let url = WebURL("http://example.com?a=b&c+is the key=d&&e=&=foo&e=g&e&h=👀&e=f")! + XCTAssertEqual(url.serialized, "http://example.com/?a=b&c+is%20the%20key=d&&e=&=foo&e=g&e&h=%F0%9F%91%80&e=f") + XCTAssertEqual(url.query, "a=b&c+is%20the%20key=d&&e=&=foo&e=g&e&h=%F0%9F%91%80&e=f") + XCTAssertFalse(url.storage.structure.queryIsKnownFormEncoded) + + // Check that we can look up a simple key, and the value is decoded. + XCTAssertEqual(url.formParams.a, "b") + XCTAssertEqual(url.formParams.h, "👀") + // Check we can find a key which requires encoding. + XCTAssertEqual(url.formParams.get("c is the key"), "d") + // If a key has multiple values, the first one is returned. + // Also, check we can find a key with an empty value. + XCTAssertEqual(url.formParams.e, "") + // Empty keys can also be found. + XCTAssertEqual(url.formParams.get(""), "foo") + + // Non-present keys return nil. + XCTAssertNil(url.formParams.doesNotExist) + XCTAssertNil(url.formParams.get("nope")) + + // 'contains' returns the same information. + XCTAssertTrue(url.formParams.contains("a")) + XCTAssertTrue(url.formParams.contains("c is the key")) + XCTAssertTrue(url.formParams.contains("")) + XCTAssertFalse(url.formParams.contains("doesNotExist")) + + // 'getAll' finds all values for a key, returns them in correct order. + XCTAssertEqual(url.formParams.getAll("e"), ["", "g", "", "f"]) + XCTAssertEqual(url.formParams.getAll("doesNotExist"), []) + + // All of this is read-only; the URL's query string remains as it was. + XCTAssertEqual(url.serialized, "http://example.com/?a=b&c+is%20the%20key=d&&e=&=foo&e=g&e&h=%F0%9F%91%80&e=f") + XCTAssertEqual(url.query, "a=b&c+is%20the%20key=d&&e=&=foo&e=g&e&h=%F0%9F%91%80&e=f") + } + + func testEmptyAndNil() { + + // Both nil and empty query strings present as empty query parameters. + do { + var url = WebURL("http://example.com")! + XCTAssertEqual(url.serialized, "http://example.com/") + XCTAssertNil(url.query) + XCTAssertNil(url.formParams.get("")) + XCTAssertNil(url.formParams.get("?")) + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + + url.query = "" + XCTAssertEqual(url.serialized, "http://example.com/?") + XCTAssertEqual(url.query, "") + XCTAssertNil(url.formParams.get("")) + XCTAssertNil(url.formParams.get("?")) + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + XCTAssertURLIsIdempotent(url) + } + + // When emptying the formParams, the URL's query gets set to nil rather than empty. + do { + var url = WebURL("http://example.com?a=b&c is the key=d&&e=&e&=foo&e=g&h=👀&e=f")! + XCTAssertEqual(url.serialized, "http://example.com/?a=b&c%20is%20the%20key=d&&e=&e&=foo&e=g&h=%F0%9F%91%80&e=f") + XCTAssertEqual(url.query, "a=b&c%20is%20the%20key=d&&e=&e&=foo&e=g&h=%F0%9F%91%80&e=f") + XCTAssertEqual(url.formParams.h, "👀") + XCTAssertFalse(url.storage.structure.queryIsKnownFormEncoded) + + url.formParams.removeAll() + XCTAssertEqual(url.serialized, "http://example.com/") + XCTAssertNil(url.query) + XCTAssertNil(url.formParams.h) + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + XCTAssertURLIsIdempotent(url) + } + + // KVPs without keys or values (so strings of "&" characters in the query) get removed by form-encoding + // and are the equivalent of an empty query. + do { + var url = WebURL("http://example.com?&&&")! + XCTAssertEqual(url.serialized, "http://example.com/?&&&") + XCTAssertEqual(url.query, "&&&") + XCTAssertNil(url.formParams.get("")) + + url.formParams = url.formParams + XCTAssertEqual(url.serialized, "http://example.com/") + XCTAssertNil(url.query) + XCTAssertNil(url.formParams.get("")) + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + XCTAssertURLIsIdempotent(url) + } + } + + func testAppend() { + + // Start with a URL without query, use 'append' to build one. + do { + var url = WebURL("http://example.com")! + XCTAssertEqual(url.serialized, "http://example.com/") + XCTAssertNil(url.query) + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + + url.formParams.append("non_escaped", value: "true") // Neither key or value need escaping. + url.formParams.append("spa ce", value: "") // key needs escaping due to substitution only. + url.formParams.append("search query", value: "why are 🦆 so awesome?") // both need escaping. + url.formParams.append("`back`'tick'", value: "") // U+0027 is encoded by forms, and only by forms. + XCTAssertEqual( + url.serialized, + "http://example.com/?non_escaped=true&spa+ce=&search+query=why+are+%F0%9F%A6%86+so+awesome%3F&%60back%60%27tick%27=" + ) + XCTAssertEqual( + url.query, "non_escaped=true&spa+ce=&search+query=why+are+%F0%9F%A6%86+so+awesome%3F&%60back%60%27tick%27=" + ) + XCTAssertEqual(url.formParams.get("non_escaped"), "true") + XCTAssertEqual(url.formParams.get("search query"), "why are 🦆 so awesome?") + XCTAssertEqual(url.formParams.get("`back`'tick'"), "") + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + XCTAssertURLIsIdempotent(url) + + // Store the params object and reset the query. + var storedParams = url.formParams + url.query = nil + url.hostname = "foobar.org" + XCTAssertEqual(url.serialized, "http://foobar.org/") + XCTAssertNil(url.query) + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + XCTAssertFalse(url.formParams.contains("search query")) + XCTAssertTrue(storedParams.contains("search query")) + // Append to the free-standing copy. + storedParams.append("still alive?", value: "should be!") + storedParams.append("owned and mutable?", value: "sure thing!") + XCTAssertEqual(storedParams.get("still alive?"), "should be!") + XCTAssertEqual(storedParams.get("owned and mutable?"), "sure thing!") + // Assign it to the URL. + url.formParams = storedParams + XCTAssertEqual( + url.serialized, + "http://foobar.org/?non_escaped=true&spa+ce=&search+query=why+are+%F0%9F%A6%86+so+awesome%3F&%60back%60%27tick%27=&still+alive%3F=should+be%21&owned+and+mutable%3F=sure+thing%21" + ) + XCTAssertEqual(url.formParams.get("still alive?"), "should be!") + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + XCTAssertURLIsIdempotent(url) + } + + // Ensure that we can append to an empty (not 'nil') query. + do { + var url = WebURL("foo://bar?")! + XCTAssertEqual(url.serialized, "foo://bar?") + XCTAssertEqual(url.query, "") + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + + url.formParams.append("test", value: "works!") + XCTAssertEqual(url.serialized, "foo://bar?test=works%21") + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + XCTAssertURLIsIdempotent(url) + } + } + + func testAppendSequence() { + + do { + // Start with a URL without query, use 'append' to build one. + var url = WebURL("http://example.com")! + XCTAssertEqual(url.serialized, "http://example.com/") + XCTAssertNil(url.query) + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + + url.formParams += [ + ("search query", "why are 🦆 so awesome?"), + ("`back`'tick'", ""), // U+0027 is encoded by forms, and only by forms. + ] + XCTAssertEqual( + url.serialized, + "http://example.com/?search+query=why+are+%F0%9F%A6%86+so+awesome%3F&%60back%60%27tick%27=" + ) + XCTAssertEqual(url.query, "search+query=why+are+%F0%9F%A6%86+so+awesome%3F&%60back%60%27tick%27=") + XCTAssertEqual(url.formParams.get("search query"), "why are 🦆 so awesome?") + XCTAssertEqual(url.formParams.get("`back`'tick'"), "") + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + XCTAssertURLIsIdempotent(url) + + // Store the params object and reset the query. + var storedParams = url.formParams + url.query = nil + url.hostname = "foobar.org" + XCTAssertEqual(url.serialized, "http://foobar.org/") + XCTAssertNil(url.query) + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + XCTAssertFalse(url.formParams.contains("search query")) + XCTAssertTrue(storedParams.contains("search query")) + // Append to the free-standing copy. + storedParams.append(contentsOf: [ + (key: "still alive?", value: "should be!"), + (key: "owned and mutable?", value: "sure thing!"), + ]) + XCTAssertEqual(storedParams.get("still alive?"), "should be!") + XCTAssertEqual(storedParams.get("owned and mutable?"), "sure thing!") + // Assign it to the URL. + url.formParams = storedParams + XCTAssertEqual( + url.serialized, + "http://foobar.org/?search+query=why+are+%F0%9F%A6%86+so+awesome%3F&%60back%60%27tick%27=&still+alive%3F=should+be%21&owned+and+mutable%3F=sure+thing%21" + ) + XCTAssertEqual(url.formParams.get("still alive?"), "should be!") + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + XCTAssertURLIsIdempotent(url) + } + + // Dictionary has a concrete overload which sorts its key-value pairs, + // so appending a dictionary always gives predictable results. + do { + var url = WebURL("http://example.com")! + XCTAssertEqual(url.serialized, "http://example.com/") + XCTAssertNil(url.query) + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + + let dictionary: [String: String] = [ + "key one": "value one", + "key 2️⃣": "value %02", + ] + url.formParams += dictionary + XCTAssertEqual(url.serialized, "http://example.com/?key+2%EF%B8%8F%E2%83%A3=value+%2502&key+one=value+one") + XCTAssertEqual(url.query, "key+2%EF%B8%8F%E2%83%A3=value+%2502&key+one=value+one") + XCTAssertEqual(url.formParams.get("key one"), "value one") + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + XCTAssertURLIsIdempotent(url) + } + } + + func testRemove() { + + var url = WebURL("http://example.com?a=b&c+is the key=d&&e=&=foo&e=g&e&h=👀&e=f")! + XCTAssertEqual(url.serialized, "http://example.com/?a=b&c+is%20the%20key=d&&e=&=foo&e=g&e&h=%F0%9F%91%80&e=f") + XCTAssertEqual(url.query, "a=b&c+is%20the%20key=d&&e=&=foo&e=g&e&h=%F0%9F%91%80&e=f") + XCTAssertFalse(url.storage.structure.queryIsKnownFormEncoded) + + // Removal from the front. + XCTAssertEqual(url.formParams.a, "b") + url.formParams.remove("a") + XCTAssertEqual(url.serialized, "http://example.com/?c+is+the+key=d&e=&=foo&e=g&e=&h=%F0%9F%91%80&e=f") + XCTAssertEqual(url.query, "c+is+the+key=d&e=&=foo&e=g&e=&h=%F0%9F%91%80&e=f") + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + XCTAssertNil(url.formParams.a) + XCTAssertURLIsIdempotent(url) + + // Removal of a key with multiple entries. + XCTAssertEqual(url.formParams.e, "") + XCTAssertEqual(url.formParams.getAll("e"), ["", "g", "", "f"]) + url.formParams.remove("e") + XCTAssertEqual(url.serialized, "http://example.com/?c+is+the+key=d&=foo&h=%F0%9F%91%80") + XCTAssertEqual(url.query, "c+is+the+key=d&=foo&h=%F0%9F%91%80") + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + XCTAssertNil(url.formParams.e) + XCTAssertURLIsIdempotent(url) + + // Removal from the back. + XCTAssertEqual(url.formParams.h, "👀") + url.formParams.remove("h") + XCTAssertEqual(url.serialized, "http://example.com/?c+is+the+key=d&=foo") + XCTAssertEqual(url.query, "c+is+the+key=d&=foo") + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + XCTAssertNil(url.formParams.h) + XCTAssertURLIsIdempotent(url) + + // Removing all key-value pairs results in a 'nil' query. + XCTAssertEqual(url.formParams.get("c is the key"), "d") + XCTAssertEqual(url.formParams.get(""), "foo") + url.formParams.remove("c is the key") + url.formParams.remove("") + XCTAssertEqual(url.serialized, "http://example.com/") + XCTAssertNil(url.query) + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + XCTAssertNil(url.formParams.get("c is the key")) + XCTAssertNil(url.formParams.get("")) + XCTAssertURLIsIdempotent(url) + } + + func testRemoveAll() { + + var url = WebURL("http://example.com?a=b&c+is the key=d&&e=&=foo&e=g&e&h=👀&e=f")! + XCTAssertEqual(url.serialized, "http://example.com/?a=b&c+is%20the%20key=d&&e=&=foo&e=g&e&h=%F0%9F%91%80&e=f") + XCTAssertEqual(url.query, "a=b&c+is%20the%20key=d&&e=&=foo&e=g&e&h=%F0%9F%91%80&e=f") + XCTAssertFalse(url.storage.structure.queryIsKnownFormEncoded) + XCTAssertEqual(url.formParams.e, "") + XCTAssertEqual(url.formParams.a, "b") + + url.formParams.removeAll() + XCTAssertEqual(url.serialized, "http://example.com/") + XCTAssertNil(url.query) + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + XCTAssertNil(url.formParams.e) + XCTAssertNil(url.formParams.a) + XCTAssertURLIsIdempotent(url) + } + + func testSet() { + + var url = WebURL("http://example.com?a=b&c+is the key=d&&e=&=foo&e=g&e&h=👀&e=f")! + XCTAssertEqual(url.serialized, "http://example.com/?a=b&c+is%20the%20key=d&&e=&=foo&e=g&e&h=%F0%9F%91%80&e=f") + XCTAssertEqual(url.query, "a=b&c+is%20the%20key=d&&e=&=foo&e=g&e&h=%F0%9F%91%80&e=f") + XCTAssertFalse(url.storage.structure.queryIsKnownFormEncoded) + + // Set unique, pre-existing keys. Relative position of KVP within the string is maintained. + XCTAssertEqual(url.formParams.a, "b") + url.formParams.a = "THIS ONE" + XCTAssertEqual(url.serialized, "http://example.com/?a=THIS+ONE&c+is+the+key=d&e=&=foo&e=g&e=&h=%F0%9F%91%80&e=f") + XCTAssertEqual(url.query, "a=THIS+ONE&c+is+the+key=d&e=&=foo&e=g&e=&h=%F0%9F%91%80&e=f") + XCTAssertEqual(url.formParams.a, "THIS ONE") + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + XCTAssertURLIsIdempotent(url) + + XCTAssertEqual(url.formParams.h, "👀") + url.formParams.set("h", to: "ALSO THIS ONE") + XCTAssertEqual(url.serialized, "http://example.com/?a=THIS+ONE&c+is+the+key=d&e=&=foo&e=g&e=&h=ALSO+THIS+ONE&e=f") + XCTAssertEqual(url.query, "a=THIS+ONE&c+is+the+key=d&e=&=foo&e=g&e=&h=ALSO+THIS+ONE&e=f") + XCTAssertEqual(url.formParams.h, "ALSO THIS ONE") + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + XCTAssertURLIsIdempotent(url) + + // Set a key with multiple entries. + XCTAssertEqual(url.formParams.e, "") + url.formParams.e = "collapsed" + XCTAssertEqual(url.serialized, "http://example.com/?a=THIS+ONE&c+is+the+key=d&e=collapsed&=foo&h=ALSO+THIS+ONE") + XCTAssertEqual(url.query, "a=THIS+ONE&c+is+the+key=d&e=collapsed&=foo&h=ALSO+THIS+ONE") + XCTAssertEqual(url.formParams.e, "collapsed") + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + XCTAssertURLIsIdempotent(url) + + // Setting to 'nil' removes the key. + XCTAssertEqual(url.formParams.a, "THIS ONE") + url.formParams.a = nil + XCTAssertEqual(url.serialized, "http://example.com/?c+is+the+key=d&e=collapsed&=foo&h=ALSO+THIS+ONE") + XCTAssertEqual(url.query, "c+is+the+key=d&e=collapsed&=foo&h=ALSO+THIS+ONE") + XCTAssertNil(url.formParams.a) + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + XCTAssertURLIsIdempotent(url) + + // Setting a non-existent key appends it. + XCTAssertNil(url.formParams.doesNotExist) + url.formParams.doesNotExist = "Yes, it does!" + XCTAssertEqual( + url.serialized, + "http://example.com/?c+is+the+key=d&e=collapsed&=foo&h=ALSO+THIS+ONE&doesNotExist=Yes%2C+it+does%21") + XCTAssertEqual(url.query, "c+is+the+key=d&e=collapsed&=foo&h=ALSO+THIS+ONE&doesNotExist=Yes%2C+it+does%21") + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + XCTAssertURLIsIdempotent(url) + } + + func testAssignment() { + + do { + var url0 = WebURL("http://example.com?a=b&c+is the key=d&&e=&=foo&e=g&e&h=👀&e=f")! + XCTAssertEqual(url0.serialized, "http://example.com/?a=b&c+is%20the%20key=d&&e=&=foo&e=g&e&h=%F0%9F%91%80&e=f") + XCTAssertEqual(url0.query, "a=b&c+is%20the%20key=d&&e=&=foo&e=g&e&h=%F0%9F%91%80&e=f") + XCTAssertFalse(url0.storage.structure.queryIsKnownFormEncoded) + + var url1 = WebURL("foo://bar")! + XCTAssertEqual(url1.serialized, "foo://bar") + XCTAssertNil(url1.query) + XCTAssertTrue(url1.storage.structure.queryIsKnownFormEncoded) + + // Set url1's formParams from empty to url0's non-empty formParams. + // url1's query string should be the form-encoded version version of url0's query, which itself remains unchanged. + url1.formParams = url0.formParams + XCTAssertEqual(url1.serialized, "foo://bar?a=b&c+is+the+key=d&e=&=foo&e=g&e=&h=%F0%9F%91%80&e=f") + XCTAssertEqual(url1.query, "a=b&c+is+the+key=d&e=&=foo&e=g&e=&h=%F0%9F%91%80&e=f") + XCTAssertEqual(url0.serialized, "http://example.com/?a=b&c+is%20the%20key=d&&e=&=foo&e=g&e&h=%F0%9F%91%80&e=f") + XCTAssertEqual(url0.query, "a=b&c+is%20the%20key=d&&e=&=foo&e=g&e&h=%F0%9F%91%80&e=f") + XCTAssertFalse(url0.storage.structure.queryIsKnownFormEncoded) + XCTAssertTrue(url1.storage.structure.queryIsKnownFormEncoded) + XCTAssertURLIsIdempotent(url1) + + // Reset url1 to a nil query. Set url0's non-empty params to url1's empty params. + // url0 should now have a nil query, and url1 remains unchanged. + url1 = WebURL("foo://bar")! + XCTAssertEqual(url1.serialized, "foo://bar") + XCTAssertNil(url1.query) + XCTAssertTrue(url1.storage.structure.queryIsKnownFormEncoded) + + url0.formParams = url1.formParams + XCTAssertEqual(url0.serialized, "http://example.com/") + XCTAssertNil(url0.query) + XCTAssertTrue(url0.storage.structure.queryIsKnownFormEncoded) + XCTAssertEqual(url1.serialized, "foo://bar") + XCTAssertNil(url1.query) + XCTAssertTrue(url1.storage.structure.queryIsKnownFormEncoded) + XCTAssertURLIsIdempotent(url0) + } + + // Assigning a URL's query parameters to itself causes the string to be re-encoded. + do { + var url = WebURL("http://example.com?a=b&c+is the key=d&&e=&=foo&e=g&e&h=👀&e=f&&&")! + XCTAssertEqual(url.serialized, "http://example.com/?a=b&c+is%20the%20key=d&&e=&=foo&e=g&e&h=%F0%9F%91%80&e=f&&&") + XCTAssertEqual(url.query, "a=b&c+is%20the%20key=d&&e=&=foo&e=g&e&h=%F0%9F%91%80&e=f&&&") + XCTAssertFalse(url.storage.structure.queryIsKnownFormEncoded) + + url.formParams = url.formParams + XCTAssertEqual(url.serialized, "http://example.com/?a=b&c+is+the+key=d&e=&=foo&e=g&e=&h=%F0%9F%91%80&e=f") + XCTAssertEqual(url.query, "a=b&c+is+the+key=d&e=&=foo&e=g&e=&h=%F0%9F%91%80&e=f") + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + XCTAssertURLIsIdempotent(url) + } + } + + func testKeyValuePairsSequence() { + + var url = WebURL("http://example.com?a=b&c+is the key=d&&e=&=foo&e=g&e&h=👀&e=f")! + XCTAssertEqual(url.serialized, "http://example.com/?a=b&c+is%20the%20key=d&&e=&=foo&e=g&e&h=%F0%9F%91%80&e=f") + XCTAssertEqual(url.query, "a=b&c+is%20the%20key=d&&e=&=foo&e=g&e&h=%F0%9F%91%80&e=f") + XCTAssertFalse(url.formParams.allKeyValuePairs.isEmpty) + + // Tuples are not Equatable :( + struct KeyValuePair: Equatable { + var key: String + var value: String + } + // Check that all elements are returned (even duplicates), and in the correct order. + let actualKVPs = url.formParams.allKeyValuePairs.map { KeyValuePair(key: $0.0, value: $0.1) } + let expectedKVPs = [ + ("a", "b"), ("c is the key", "d"), ("e", ""), ("", "foo"), ("e", "g"), ("e", ""), ("h", "👀"), ("e", "f"), + ].map { KeyValuePair(key: $0.0, value: $0.1) } + + XCTAssertEqualElements(actualKVPs, expectedKVPs) + + // Check that we can iterate again, with the same results. + let actualKVPs_secondIteration = url.formParams.allKeyValuePairs.map { KeyValuePair(key: $0.0, value: $0.1) } + XCTAssertEqualElements(actualKVPs, actualKVPs_secondIteration) + + // Dictionary construction. + let dictionary = Dictionary(url.formParams.allKeyValuePairs, uniquingKeysWith: { earlier, later in earlier }) + XCTAssertEqual(dictionary.count, 5) + XCTAssertEqual(dictionary["c is the key"], "d") + + // 'isEmpty' property. + url.formParams.removeAll() + XCTAssertEqual(url.serialized, "http://example.com/") + XCTAssertNil(url.query) + XCTAssertTrue(url.formParams.allKeyValuePairs.isEmpty) + + url.formParams.someKey = "someValue" + XCTAssertEqual(url.serialized, "http://example.com/?someKey=someValue") + XCTAssertEqual(url.query, "someKey=someValue") + XCTAssertFalse(url.formParams.allKeyValuePairs.isEmpty) + + // Empty KVPs are ignored by form encoding. + url = WebURL("http://example.com/?&&&&")! + XCTAssertEqual(url.serialized, "http://example.com/?&&&&") + XCTAssertEqual(url.query, "&&&&") + XCTAssertTrue(url.formParams.allKeyValuePairs.isEmpty) + for _ in url.formParams.allKeyValuePairs { + XCTFail("Expected formParams to be empty") + } + } + + func testKnownFormEncodedFlag() { + + // For a non-empty query, the flag should start at 'false'. + var url = WebURL("http://example.com?a=b&c+is the key=d&&e=&=foo&e=g&e&h=👀&e=f")! + XCTAssertEqual(url.serialized, "http://example.com/?a=b&c+is%20the%20key=d&&e=&=foo&e=g&e&h=%F0%9F%91%80&e=f") + XCTAssertEqual(url.query, "a=b&c+is%20the%20key=d&&e=&=foo&e=g&e&h=%F0%9F%91%80&e=f") + XCTAssertFalse(url.storage.structure.queryIsKnownFormEncoded) + + // Modifying via 'formParams' sets the flag to true, as the query is re-encoded. + url.formParams.h = nil + XCTAssertEqual(url.serialized, "http://example.com/?a=b&c+is+the+key=d&e=&=foo&e=g&e=&e=f") + XCTAssertEqual(url.query, "a=b&c+is+the+key=d&e=&=foo&e=g&e=&e=f") + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + XCTAssertURLIsIdempotent(url) + + // Copying from a base URL maintains the flag. + let joinedURL = url.resolve("#someFragment")! + XCTAssertEqual(joinedURL.serialized, "http://example.com/?a=b&c+is+the+key=d&e=&=foo&e=g&e=&e=f#someFragment") + XCTAssertEqual(joinedURL.query, "a=b&c+is+the+key=d&e=&=foo&e=g&e=&e=f") + XCTAssertTrue(joinedURL.storage.structure.queryIsKnownFormEncoded) + XCTAssertURLIsIdempotent(joinedURL) + + // Setting via '.query' to a non-empty value sets the flag back to false. + url.query = "foobar" + XCTAssertEqual(url.serialized, "http://example.com/?foobar") + XCTAssertEqual(url.query, "foobar") + XCTAssertFalse(url.storage.structure.queryIsKnownFormEncoded) + XCTAssertURLIsIdempotent(url) + + // Setting via '.query' to an empty/nil value sets the flag to true. + url.query = "" + XCTAssertEqual(url.serialized, "http://example.com/?") + XCTAssertEqual(url.query, "") + XCTAssertTrue(url.storage.structure.queryIsKnownFormEncoded) + XCTAssertURLIsIdempotent(url) + } +} diff --git a/Tests/WebURLTests/IPv4AddressTests.swift b/Tests/WebURLTests/IPv4AddressTests.swift new file mode 100644 index 000000000..76e255726 --- /dev/null +++ b/Tests/WebURLTests/IPv4AddressTests.swift @@ -0,0 +1,284 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import WebURLTestSupport +import XCTest + +@testable import WebURL + +extension UInt32 { + fileprivate var octets: [UInt8] { + withUnsafeBytes(of: self) { Array($0) } + } +} + +extension Array { + fileprivate init(ipv4Octets: IPv4Address.Octets) where Element == UInt8 { + self = [ipv4Octets.0, ipv4Octets.1, ipv4Octets.2, ipv4Octets.3] + } +} + +final class IPv4AddressTests: XCTestCase { + + func testBasic() { + + let expectedNumericAddress: UInt32 = 3_237_937_669 + let strings = [ + "3237937669", // 1 component, decimal. + "0xC0.077601005", // 2 components, hex/octal. + "192.0xff.01005", // 3 components, decimal/hex/octal. + "192.255.2.5", // 4 components, decimal. + "0xc0.0xff.0x02.0x05", // 4 components, hex. + ] + for string in strings { + guard let addr = IPv4Address(string[...]) else { + XCTFail("Failed to parse valid address: \(string)") + continue + } + XCTAssertEqual(Array(ipv4Octets: addr.octets), [192, 255, 2, 5]) + XCTAssertEqual(addr.serialized, "192.255.2.5") + XCTAssertEqual(addr[value: .numeric], expectedNumericAddress) + XCTAssertEqual(addr[value: .binary], UInt32(bigEndian: expectedNumericAddress)) + + guard let reparsedAddr = IPv4Address(addr.serialized) else { + XCTFail("Failed to reparse. Original: '\(string)'. Parsed: '\(addr.serialized)'") + continue + } + XCTAssertEqual(Array(ipv4Octets: addr.octets), Array(ipv4Octets: reparsedAddr.octets)) + XCTAssertEqual(addr.serialized, reparsedAddr.serialized) + } + } + + func testTrailingDots() { + + let expectedNumericAddress: UInt32 = 16_909_060 + + // Zero trailing dots are allowed (obviously). + if let addr = IPv4Address("1.2.3.4") { + XCTAssertEqual(Array(ipv4Octets: addr.octets), [1, 2, 3, 4]) + XCTAssertEqual(addr.serialized, "1.2.3.4") + XCTAssertEqual(addr[value: .numeric], expectedNumericAddress) + XCTAssertEqual(addr[value: .binary], UInt32(bigEndian: expectedNumericAddress)) + } else { + XCTFail("Failed to parse valid address") + } + + // One trailing dot is allowed. + if let addr = IPv4Address("1.2.3.4.") { + XCTAssertEqual(Array(ipv4Octets: addr.octets), [1, 2, 3, 4]) + XCTAssertEqual(addr.serialized, "1.2.3.4") + XCTAssertEqual(addr[value: .numeric], expectedNumericAddress) + XCTAssertEqual(addr[value: .binary], UInt32(bigEndian: expectedNumericAddress)) + } else { + XCTFail("Failed to parse valid address") + } + + // Two or more trailing dots are not allowed. + if let _ = IPv4Address("1.2.3.4..") { XCTFail("Expected fail") } + if let _ = IPv4Address("1.2.3.4...") { XCTFail("Expected fail") } + if let _ = IPv4Address("1.2.3.4....") { XCTFail("Expected fail") } + // More than 4 components are not allowed. + if let _ = IPv4Address("1.2.3.4.5") { XCTFail("Expected fail") } + if let _ = IPv4Address("1.2.3.4.5.") { XCTFail("Expected fail") } + if let _ = IPv4Address("1.2.3.4.5..") { XCTFail("Expected fail") } + } + + func testTrailingZeroes() { + + if let addr = IPv4Address("234") { + XCTAssertEqual(Array(ipv4Octets: addr.octets), [0, 0, 0, 234]) + XCTAssertEqual(addr.serialized, "0.0.0.234") + XCTAssertEqual(addr[value: .numeric], 234) + XCTAssertEqual(addr[value: .binary], UInt32(bigEndian: 234)) + } else { + XCTFail("Failed to parse valid address") + } + + if let addr = IPv4Address("234.0") { + XCTAssertEqual(Array(ipv4Octets: addr.octets), [234, 0, 0, 0]) + XCTAssertEqual(addr.serialized, "234.0.0.0") + XCTAssertEqual(addr[value: .numeric], 3_925_868_544) + XCTAssertEqual(addr[value: .binary], UInt32(bigEndian: 3_925_868_544)) + } else { + XCTFail("Failed to parse valid address") + } + + // First, test that we parse the correct value with no trailing zeroes. + // "234.011" = "234.0.0.9" (the octal 9 occupies the lowest byte) = 0xEA000009 = 3925868553. + let noTrailingZeroes = [ + "234.011", + "234.011.", + ] + for string in noTrailingZeroes { + let expectedNumericAddress: UInt32 = 3_925_868_553 + + guard let addr = IPv4Address(string) else { + XCTFail("Failed to parse valid address") + continue + } + + XCTAssertEqual(Array(ipv4Octets: addr.octets), [234, 0, 0, 9]) + XCTAssertEqual(addr.serialized, "234.0.0.9") + XCTAssertEqual(addr[value: .numeric], expectedNumericAddress) + XCTAssertEqual(addr[value: .binary], UInt32(bigEndian: expectedNumericAddress)) + + guard let reparsedAddr = IPv4Address(addr.serialized) else { + XCTFail("Failed to reparse. Original: '\(string)'. Parsed: '\(addr.serialized)'") + continue + } + XCTAssertEqual(Array(ipv4Octets: addr.octets), Array(ipv4Octets: reparsedAddr.octets)) + XCTAssertEqual(addr.serialized, reparsedAddr.serialized) + } + + // Next, test that we parse the correct value with any valid number of trailing zeroes. + // "234.011.0" = "234.9.0.0" (the 9 is shifted up to the second byte and the lowest bytes are all zero) + // = 0xEA090000 = 3926458368. + let valid_trailingZeroes = [ + "234.011.0", + "234.011.0.", + "234.011.0.0", + "234.011.0.0.", + ] + for string in valid_trailingZeroes { + let expectedNumericAddress: UInt32 = 3_926_458_368 + + guard let addr = IPv4Address(string) else { + XCTFail("Failed to parse valid address") + continue + } + + XCTAssertEqual(Array(ipv4Octets: addr.octets), [234, 9, 0, 0]) + XCTAssertEqual(addr.serialized, "234.9.0.0") + XCTAssertEqual(addr[value: .numeric], expectedNumericAddress) + XCTAssertEqual(addr[value: .binary], UInt32(bigEndian: expectedNumericAddress)) + + guard let reparsedAddr = IPv4Address(addr.serialized) else { + XCTFail("Failed to reparse. Original: '\(string)'. Parsed: '\(addr.serialized)'") + continue + } + XCTAssertEqual(Array(ipv4Octets: addr.octets), Array(ipv4Octets: reparsedAddr.octets)) + XCTAssertEqual(addr.serialized, reparsedAddr.serialized) + } + + // Finally, test that we reject an invalid number of trailing zeroes. + let invalid_trailingZeroes = [ + "234.011.0.0.0", + "234.011.0.0.0.", + "234.011.0.0.0.0", + "234.011.0.0.0.0.", + ] + for string in invalid_trailingZeroes { + XCTAssertNil(IPv4Address(string), "Invalid address should have been rejected: \(string)") + } + } + + func testInvalid() { + // TODO: Check for specific validation errors. + + if let _ = IPv4Address("0.0x300") {} else { XCTFail("Failed to parse valid address") } + if let _ = IPv4Address("0..0x300") { XCTFail("Expected fail") } + + // Non-numbers. + if let _ = IPv4Address("sup?") { XCTFail("Expected fail") } + if let _ = IPv4Address("100sup?") { XCTFail("Expected fail") } + if let _ = IPv4Address("100.sup?") { XCTFail("Expected fail") } + + // Overflow. + if let _ = IPv4Address("0xFFFFFFFF") {} else { XCTFail("Failed to parse valid address") } + if let _ = IPv4Address("0xFFFFFFFF1") { XCTFail("Expected to fail") } + if let _ = IPv4Address("1.0xFFFFFFF") { XCTFail("Expected to fail") } + if let _ = IPv4Address("1.1.0xFFFFF") { XCTFail("Expected to fail") } + if let _ = IPv4Address("1.1.1.0xFFF") { XCTFail("Expected to fail") } + + // Invalid base-X characters. + if let _ = IPv4Address("192.0xF") {} else { XCTFail("Failed to parse valid address") } + if let _ = IPv4Address("192.F") { XCTFail("Expected fail") } + if let _ = IPv4Address("192.0F") { XCTFail("Expected fail") } + if let _ = IPv4Address("192.0xG") { XCTFail("Expected fail") } + } +} + +// Randomized testing. + +#if canImport(Glibc) || canImport(Darwin) + + #if canImport(Glibc) + import Glibc + #else + import Darwin + #endif + + func libc_aton(_ straddr: String) -> UInt32? { + var addr = in_addr() + guard inet_aton(straddr, &addr) != 0 else { return nil } + return addr.s_addr + } + + func libc_ntoa(_ netaddr: UInt32) -> String { + var addr = in_addr() + addr.s_addr = netaddr + return String(cString: inet_ntoa(addr)) + } + + extension IPv4AddressTests { + + /// Generate 1000 random IP addresses, serialize them via IPAddress. + /// Then serialize the same addresss via `ntoa`, and ensure it returns the same string. + /// Then parse our serialized version back via `aton`, and ensure it returns the same address. + /// Then parse our serialized version again, and ensure that it returns the same address. + /// + func testRandom_Serialisation() { + for _ in 0..<1000 { + let expected = IPv4Address.Utils.randomAddress() + let address = IPv4Address(value: expected, .numeric) + + // Serialize the same address with libc (note: ntoa expects network byte order). It should return the same String. + XCTAssertEqual(libc_ntoa(expected.bigEndian), address.serialized) + + // Parse our serialized output with libc. It should return the same address. + let libcAddress = libc_aton(address.serialized) + XCTAssertEqual( + libcAddress?.octets, Array(ipv4Octets: address.octets), "Mismatch detected for address \(address)") + XCTAssertEqual(libcAddress, address[value: .binary], "Mismatch detected for address \(address)") + + // Re-parse our serialized output. It should return the same address. + if let reparsed = IPv4Address(address.serialized) { + XCTAssertEqual(Array(ipv4Octets: address.octets), Array(ipv4Octets: reparsed.octets)) + } else { + XCTFail("Address is not idempotent: \(address.serialized)") + } + } + } + + /// Generate 1000 random IP Address Strings, parse them via IPAddress, + /// check that the numerical value matches the expected network address, + /// and that `aton` gets the same result when parsing the same random String. + /// + func testRandom_Parsing() { + for _ in 0..<1000 { + let randomNumericAddress = IPv4Address.Utils.randomAddress() + let randomAddressString = IPv4Address.Utils.randomString(address: randomNumericAddress) + + guard let parsed = IPv4Address(randomAddressString) else { + XCTFail("Failed to parse address: \(randomAddressString); expected address: \(randomNumericAddress)") + continue + } + XCTAssertEqual(Array(ipv4Octets: parsed.octets), libc_aton(randomAddressString)?.octets) + XCTAssertEqual(parsed[value: .numeric], randomNumericAddress) + XCTAssertEqual(parsed[value: .binary], libc_aton(randomAddressString)) + } + } + } + +#endif diff --git a/Tests/WebURLTests/IPv6AddressTests.swift b/Tests/WebURLTests/IPv6AddressTests.swift new file mode 100644 index 000000000..cf4e2e509 --- /dev/null +++ b/Tests/WebURLTests/IPv6AddressTests.swift @@ -0,0 +1,310 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import WebURLTestSupport +import XCTest + +@testable import WebURL + +extension Array { + fileprivate init(ipv6Octets addr: IPv6Address.Octets) where Element == UInt8 { + self = [ + addr.0, addr.1, addr.2, addr.3, addr.4, addr.5, addr.6, addr.7, + addr.8, addr.9, addr.10, addr.11, addr.12, addr.13, addr.14, addr.15, + ] + } + + fileprivate init(ipv6Pieces addr: IPv6Address.Pieces) where Element == UInt16 { + self = [addr.0, addr.1, addr.2, addr.3, addr.4, addr.5, addr.6, addr.7] + } +} + +final class IPv6AddressTests: XCTestCase { + + func testBasic() { + + let testData: [(String, String, [UInt8], [UInt16])] = [ + // Canonical + ( + "2001:0db8:85a3:0000:0000:8a2e:0370:7334", "2001:db8:85a3::8a2e:370:7334", + [0x20, 0x01, 0x0d, 0x0b8, 0x85, 0xa3, 0x00, 0x00, 0x00, 0x00, 0x8a, 0x2e, 0x03, 0x70, 0x73, 0x34], + [0x2001, 0x0db8, 0x85a3, 0x0000, 0x0000, 0x8a2e, 0x0370, 0x7334] + ), + // Teredo + ( + "2001::ce49:7601:e866:efff:62c3:fffe", "2001:0:ce49:7601:e866:efff:62c3:fffe", + [0x20, 0x01, 0x00, 0x00, 0xce, 0x49, 0x76, 0x01, 0xe8, 0x66, 0xef, 0xff, 0x62, 0xc3, 0xff, 0xfe], + [0x2001, 0x0000, 0xce49, 0x7601, 0xe866, 0xefff, 0x62c3, 0xfffe] + ), + // Compact + ( + "2608::3:5", "2608::3:5", + [0x26, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x05], + [0x2608, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0003, 0x0005] + ), + // Empty + ( + "::", "::", + [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], + [0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000] + ), + // IPv4 + ( + "::ffff:192.168.0.1", "::ffff:c0a8:1", + [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xc0, 0xa8, 0x00, 0x01], + [0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xc0a8, 0x0001] + ), + ] + + for (string, expectedDescription, expectedOctets, expectedNumericPieces) in testData { + guard let addr = IPv6Address(string) else { + XCTFail("Failed to parse valid address: \(string)") + continue + } + + XCTAssertEqual(Array(ipv6Octets: addr.octets), expectedOctets) + XCTAssertEqual(addr.serialized, expectedDescription) + XCTAssertEqual(Array(ipv6Pieces: addr[pieces: .numeric]), expectedNumericPieces) + XCTAssertEqual(Array(ipv6Pieces: addr[pieces: .binary]), expectedNumericPieces.map { UInt16(bigEndian: $0) }) + + guard let reparsedAddr = IPv6Address(addr.serialized) else { + XCTFail("Failed to reparse. Original: '\(string)'. Parsed: '\(addr.serialized)'") + continue + } + XCTAssertEqual(Array(ipv6Octets: addr.octets), Array(ipv6Octets: reparsedAddr.octets)) + XCTAssertEqual(addr.serialized, reparsedAddr.serialized) + } + } + + func testCompression() { + + let testData: [(String, String, [UInt8], [UInt16])] = [ + // Leading + ( + "::1234:F088", "::1234:f088", + [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x34, 0xf0, 0x88], + [0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x1234, 0xf088] + ), + ( + "0:0::0:192.168.0.2", "::c0a8:2", + [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xa8, 0x00, 0x02], + [0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xc0a8, 0x0002] + ), + // Middle + ( + "1212:F0F0::3434:D0D0", "1212:f0f0::3434:d0d0", + [0x12, 0x12, 0xf0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x34, 0xd0, 0xd0], + [0x1212, 0xf0f0, 0x0000, 0x0000, 0x0000, 0x0000, 0x3434, 0xd0d0] + ), + // Trailing + ( + "1234:F088::", "1234:f088::", + [0x12, 0x34, 0xf0, 0x88, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], + [0x1234, 0xf088, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000] + ), + ] + + for (string, expectedDescription, expectedOctets, expectedNumericPieces) in testData { + guard let addr = IPv6Address(string) else { + XCTFail("Failed to parse valid address: \(string)") + continue + } + + XCTAssertEqual(Array(ipv6Octets: addr.octets), expectedOctets) + XCTAssertEqual(addr.serialized, expectedDescription) + XCTAssertEqual(Array(ipv6Pieces: addr[pieces: .numeric]), expectedNumericPieces) + XCTAssertEqual(Array(ipv6Pieces: addr[pieces: .binary]), expectedNumericPieces.map { UInt16(bigEndian: $0) }) + + guard let reparsedAddr = IPv6Address(addr.serialized) else { + XCTFail("Failed to reparse. Original: '\(string)'. Parsed: '\(addr.serialized)'") + continue + } + XCTAssertEqual(Array(ipv6Octets: addr.octets), Array(ipv6Octets: reparsedAddr.octets)) + XCTAssertEqual(addr.serialized, reparsedAddr.serialized) + } + } + + func testInvalid() { + + let invalidAddresses: [(String, IPv6Address.ParserError)] = [ + // - Invalid piece. + ("12345::", .unexpectedCharacter), + ("FG::", .unexpectedCharacter), + + // - Invalid compression. + (":", .unexpectedLeadingColon), + (":::", .multipleCompressedPieces), + ("F:", .unexpectedTrailingColon), + ("42:", .unexpectedTrailingColon), + + // - Invalid IPv4 piece. + + ("::ffff:555.168.0.1", .invalidIPv4Address), + ("::ffff:192.168.0.1.8", .invalidIPv4Address), + ("::ffff:192.168.a.1", .invalidIPv4Address), + ("::ffff:192.168.0.01", .invalidIPv4Address), + ("::ffff:192.168.0xf.1", .invalidIPv4Address), + ("::ffff:192.168.0.1.", .invalidIPv4Address), // trailing dot + ("::ffff:.168.0.1", .unexpectedPeriod), + // TODO: Improve this. Should be: "invalidPositionForIPv4Address" + ("0001:0002:0003:0004:192.168.0.1:0006:0007:0008", .invalidIPv4Address), + + // - Invalid number of pieces. + ("0001:0002:0003:0004:0005", .notEnoughPieces), + ("0001:0002:0003:0004:0005:0006:0007:0008:0009", .tooManyPieces), + ] + + struct LastParserError: IPAddressParserCallback { + var error: IPv6Address.ParserError? + mutating func validationError(ipv6 error: IPv6Address.ParserError) { + self.error = error + } + func validationError(ipv4 error: IPv4Address.ParserError) { + XCTFail("Unexpected IPv4 error: \(error)") + } + } + + for (string, expectedError) in invalidAddresses { + var callback = LastParserError() + if let addr = IPv6Address.parse(utf8: string.utf8, callback: &callback) { + XCTFail("Invalid address '\(string)' was parsed as '\(addr)") + } + XCTAssertEqual( + callback.error?.errorCode, expectedError.errorCode, "Unexpected error for invalid address '\(string)'" + ) + } + } +} + +// Randomized testing. + +#if canImport(Glibc) || canImport(Darwin) + + #if canImport(Glibc) + import Glibc + let in6_union_property = \in6_addr.__in6_u + #elseif canImport(Darwin) + import Darwin + let in6_union_property = \in6_addr.__u6_addr + #endif + + fileprivate func pton_octets(_ input: String) -> [UInt8]? { + var result = in6_addr() + guard inet_pton(AF_INET6, input, &result) != 0 else { return nil } + return withUnsafeBytes(of: &result[keyPath: in6_union_property].__u6_addr8) { ptr in + let u16 = ptr.bindMemory(to: UInt8.self) + return Array(u16) + } + } + + fileprivate func pton_pieces(_ input: String) -> [UInt16]? { + var result = in6_addr() + guard inet_pton(AF_INET6, input, &result) != 0 else { return nil } + return withUnsafeBytes(of: &result[keyPath: in6_union_property].__u6_addr16) { ptr in + let u16 = ptr.bindMemory(to: UInt16.self) + return Array(u16) + } + } + + fileprivate func ntop_octets(_ input: [UInt8]) -> String? { + var src = in6_addr() + src[keyPath: in6_union_property].__u6_addr8.0 = input[0] + src[keyPath: in6_union_property].__u6_addr8.1 = input[1] + src[keyPath: in6_union_property].__u6_addr8.2 = input[2] + src[keyPath: in6_union_property].__u6_addr8.3 = input[3] + src[keyPath: in6_union_property].__u6_addr8.4 = input[4] + src[keyPath: in6_union_property].__u6_addr8.5 = input[5] + src[keyPath: in6_union_property].__u6_addr8.6 = input[6] + src[keyPath: in6_union_property].__u6_addr8.7 = input[7] + src[keyPath: in6_union_property].__u6_addr8.8 = input[8] + src[keyPath: in6_union_property].__u6_addr8.9 = input[9] + src[keyPath: in6_union_property].__u6_addr8.10 = input[10] + src[keyPath: in6_union_property].__u6_addr8.11 = input[11] + src[keyPath: in6_union_property].__u6_addr8.12 = input[12] + src[keyPath: in6_union_property].__u6_addr8.13 = input[13] + src[keyPath: in6_union_property].__u6_addr8.14 = input[14] + src[keyPath: in6_union_property].__u6_addr8.15 = input[15] + let bytes = [CChar](unsafeUninitializedCapacity: Int(INET6_ADDRSTRLEN)) { buffer, count in + let p = inet_ntop(AF_INET6, &src, buffer.baseAddress, socklen_t(buffer.count)) + count = (p == nil) ? 0 : strlen(buffer.baseAddress!) + } + return bytes.isEmpty ? nil : String(cString: bytes) + } + + fileprivate func ntop_pieces(_ input: [UInt16]) -> String? { + var src = in6_addr() + src[keyPath: in6_union_property].__u6_addr16.0 = input[0] + src[keyPath: in6_union_property].__u6_addr16.1 = input[1] + src[keyPath: in6_union_property].__u6_addr16.2 = input[2] + src[keyPath: in6_union_property].__u6_addr16.3 = input[3] + src[keyPath: in6_union_property].__u6_addr16.4 = input[4] + src[keyPath: in6_union_property].__u6_addr16.5 = input[5] + src[keyPath: in6_union_property].__u6_addr16.6 = input[6] + src[keyPath: in6_union_property].__u6_addr16.7 = input[7] + let bytes = [CChar](unsafeUninitializedCapacity: Int(INET6_ADDRSTRLEN)) { buffer, count in + let p = inet_ntop(AF_INET6, &src, buffer.baseAddress, socklen_t(buffer.count)) + count = (p == nil) ? 0 : strlen(buffer.baseAddress!) + } + return bytes.isEmpty ? nil : String(cString: bytes) + } + + extension IPv6AddressTests { + + /// Generate 1000 random IP addresses, serialize them via IPAddress. + /// Then serialize the same addresss via `ntop`, and ensure it returns the same string. + /// Then parse our serialized version back via `pton`, and ensure it returns the same address. + /// + func testRandom_Serialization() { + for _ in 0..<1000 { + let expected = IPv6Address.Utils.randomAddress() + let address = IPv6Address(pieces: expected, .binary) + if address.serialized.contains("::") { + XCTAssertTrue(Array(ipv6Pieces: expected)._longestSubrange(equalTo: 0).length > 0) + } + + // Serialize with libc. It should return the same String. + let libcStr = ntop_pieces(Array(ipv6Pieces: expected)) + if libcStr?.contains(".") == true { + // Exception: if the address <= UInt32.max, libc may print this as an embedded IPv4 address on some platforms + // (e.g. it prints "::198.135.80.188", we print "::c687:50bc"). + XCTAssertTrue(Array(ipv6Pieces: expected).dropLast(2).allSatisfy { $0 == 0 }) + } else { + XCTAssertEqual(libcStr, address.serialized) + } + + // Parse our serialized output with libc. It should return the same address. + XCTAssertEqual(pton_octets(address.serialized), Array(ipv6Octets: address.octets)) + XCTAssertEqual(pton_pieces(address.serialized), Array(ipv6Pieces: address[pieces: .binary])) + } + } + + /// Generate 1000 random IP Address Strings, parse them via IPAddress, + /// check that the numerical value matches the expected network address, + /// and that `pton` gets the same result when parsing the same random String. + /// + func testRandom_Parsing() { + for _ in 0..<1000 { + let (randomPieces, randomAddressString) = IPv6Address.Utils.randomString() + guard let parsedAddress = IPv6Address(randomAddressString) else { + XCTFail("Failed to parse address: \(randomAddressString); expected pieces: \(randomPieces)") + continue + } + XCTAssertEqual(Array(ipv6Octets: parsedAddress.octets), pton_octets(randomAddressString)) + XCTAssertEqual(Array(ipv6Pieces: parsedAddress[pieces: .binary]), Array(ipv6Pieces: randomPieces)) + XCTAssertEqual(Array(ipv6Pieces: parsedAddress[pieces: .binary]), pton_pieces(randomAddressString)) + } + } + } + +#endif diff --git a/Tests/WebURLTests/ManagedArrayBufferTests.swift b/Tests/WebURLTests/ManagedArrayBufferTests.swift new file mode 100644 index 000000000..4d3e71e29 --- /dev/null +++ b/Tests/WebURLTests/ManagedArrayBufferTests.swift @@ -0,0 +1,460 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Checkit +import XCTest + +@testable import WebURL + +final class ManagedArrayBufferTests: XCTestCase {} + +extension ManagedArrayBufferTests { + + struct BasicHeader: ManagedBufferHeader { + var count: Int + let capacity: Int + func withCapacity(minimumCapacity: Int, maximumCapacity: Int) -> BasicHeader? { + return BasicHeader(count: count, capacity: minimumCapacity) + } + } + + struct DataHolderHeader: ManagedBufferHeader { + var data: T + var count: Int + let capacity: Int + + func withCapacity(minimumCapacity: Int, maximumCapacity: Int) -> DataHolderHeader? { + return DataHolderHeader(data: data, count: count, capacity: minimumCapacity) + } + } + + func testEmpty() { + let emptyBuffer = ManagedArrayBuffer( + minimumCapacity: 10, initialHeader: BasicHeader(count: -1, capacity: -1) + ) + // Collection properties confirm that the buffer is empty. + XCTAssertEqual(emptyBuffer.startIndex, 0) + XCTAssertEqual(emptyBuffer.endIndex, 0) + XCTAssertEqual(emptyBuffer.count, 0) + XCTAssertTrue(emptyBuffer.isEmpty) + // Header properties are set appropriately. + XCTAssertEqual(emptyBuffer.header.count, 0) + XCTAssertGreaterThanOrEqual(emptyBuffer.header.capacity, 10) + } + + func testHeaderCOW() { + var original = ManagedArrayBuffer, Void>( + minimumCapacity: 10, initialHeader: DataHolderHeader(data: 42, count: -1, capacity: -1) + ) + let originalAddress = original.withUnsafeBufferPointer { $0.baseAddress } + XCTAssertEqual(original.header.data, 42) + + // Mutate the header. Ensure it doesn't copy as the buffer is unique. + original.header.data = 24 + XCTAssertEqual(original.header.data, 24) + XCTAssertEqual(original.header.count, 0) + XCTAssertGreaterThanOrEqual(original.header.capacity, 10) + XCTAssertEqual(originalAddress, original.withUnsafeBufferPointer { $0.baseAddress }) + + // Copy the reference and mutate via the copy. Ensure the mutation copies to new storage. + var copy = original + copy.header.data = -88 + XCTAssertEqual(original.header.data, 24) + XCTAssertEqual(original.header.count, 0) + XCTAssertGreaterThanOrEqual(original.header.capacity, 10) + XCTAssertNotEqual(originalAddress, copy.withUnsafeBufferPointer { $0.baseAddress }) + XCTAssertEqual(copy.header.data, -88) + XCTAssertEqual(copy.header.count, 0) + // We can't say what the copy's capacity is. + } + + func testAppend() { + var buffer = ManagedArrayBuffer, Int>( + minimumCapacity: 10, initialHeader: DataHolderHeader(data: 42, count: -1, capacity: -1) + ) + // Appending works to fill the buffer. + // The returned range tells you where the elements were inserted, + // and the header's extra data is unmodified by the append. + let range0 = buffer.append(contentsOf: 100..<200) + XCTAssertEqualElements(buffer, 100..<200) + XCTAssertEqualElements(range0, 0..<100) + XCTAssertEqualElements(buffer[range0], 100..<200) + XCTAssertEqual(buffer.header.data, 42) + XCTAssertEqual(buffer.count, 100) + XCTAssertEqual(buffer.header.count, 100) + + let range1 = buffer.append(contentsOf: 0..<10) + XCTAssertEqualElements(buffer, [100..<200, 0..<10].joined()) + XCTAssertEqualElements(range1, 100..<110) + XCTAssertEqualElements(buffer[range1], 0..<10) + XCTAssertEqual(buffer.header.data, 42) + XCTAssertEqual(buffer.count, 110) + XCTAssertEqual(buffer.header.count, 110) + + let copy = buffer + + // Appending works when the buffer is not uniquely references. + // Appending a single element returns the index of that element, + // and the header's data is unmodified by the append. + let idx2 = buffer.append(500) + XCTAssertEqualElements(buffer, [100..<200, 0..<10, 500..<501].joined()) + XCTAssertEqual(idx2, 110) + XCTAssertEqual(buffer[idx2], 500) + XCTAssertEqual(buffer.header.data, 42) + XCTAssertEqual(buffer.count, 111) + XCTAssertEqual(buffer.header.count, 111) + // Since the buffer was not uniquely referenced, it copied. Other references don't see the append. + XCTAssertEqualElements(copy, [100..<200, 0..<10].joined()) + XCTAssertEqual(copy.header.data, 42) + XCTAssertEqual(copy.count, 110) + XCTAssertEqual(copy.header.count, 110) + } + + func testCollectionConformance() { + var buffer = ManagedArrayBuffer, Int>( + minimumCapacity: 10, initialHeader: DataHolderHeader(data: 42, count: -1, capacity: -1) + ) + // Check conformance when buffer is empty. + CollectionChecker.check(buffer) + // Check conformance when non-empty. + buffer.append(contentsOf: 100..<200) + XCTAssertEqualElements(buffer, 100..<200) + CollectionChecker.check(buffer) + } + + func testMutableCollection() { + var buffer = ManagedArrayBuffer, Int>( + minimumCapacity: 10, initialHeader: DataHolderHeader(data: 42, count: -1, capacity: -1) + ) + buffer.append(contentsOf: 100..<200) + XCTAssertEqualElements(buffer, 100..<200) + + // Mutate in-place. The test for MutableCollection is that doing so does not invalidate any indexes. + XCTAssertEqual(buffer[20], 120) + XCTAssertEqual(buffer.header.data, 42) + let beforeIndices = buffer.indices + buffer[20] = -99 + XCTAssertEqual(buffer[20], -99) + XCTAssertEqual(buffer.header.data, 42) + XCTAssertEqual(buffer.indices, beforeIndices) + // Check COW. + let copy = buffer + buffer[95] = Int.max + XCTAssertEqual(buffer[95], Int.max) + XCTAssertEqual(copy[95], 195) + XCTAssertEqual(buffer.header.data, 42) + XCTAssertEqual(copy.header.data, 42) + } + + func testReserveCapacity() { + var buffer = ManagedArrayBuffer, Int>( + minimumCapacity: 10, initialHeader: DataHolderHeader(data: 42, count: -1, capacity: -1) + ) + buffer.append(contentsOf: 20..<25) + let originalAddress = buffer.withUnsafeBufferPointer { $0.baseAddress } + + XCTAssertEqual(buffer.header.data, 42) + XCTAssertEqual(buffer.header.count, 5) + XCTAssertEqual(buffer.header.capacity, 10) + XCTAssertEqual(originalAddress, buffer.withUnsafeBufferPointer { $0.baseAddress }) + XCTAssertEqualElements(buffer, 20..<25) + + // Reserve less than count. Should be a no-op. + buffer.reserveCapacity(2) + XCTAssertEqual(buffer.header.data, 42) + XCTAssertEqual(buffer.header.count, 5) + XCTAssertEqual(buffer.header.capacity, 10) + XCTAssertEqual(originalAddress, buffer.withUnsafeBufferPointer { $0.baseAddress }) + XCTAssertEqualElements(buffer, 20..<25) + + // Reserve more than count. Should reserve. + buffer.reserveCapacity(500) + XCTAssertEqual(buffer.header.data, 42) + XCTAssertEqual(buffer.header.count, 5) + XCTAssertEqual(buffer.header.capacity, 500) + XCTAssertNotEqual(originalAddress, buffer.withUnsafeBufferPointer { $0.baseAddress }) + XCTAssertEqualElements(buffer, 20..<25) + + // Make non-unique and reserve again. Should allocate new storage with the requested capacity. + let copy = buffer + + buffer.reserveCapacity(100) + buffer.append(25) + XCTAssertEqual(buffer.header.data, 42) + XCTAssertEqual(buffer.header.count, 6) + XCTAssertEqual(buffer.header.capacity, 100) + XCTAssertEqualElements(buffer, 20..<26) + + // The old reference has the old capacity and contents. + XCTAssertEqual(copy.header.data, 42) + XCTAssertEqual(copy.header.count, 5) + XCTAssertEqual(copy.header.capacity, 500) + XCTAssertEqualElements(copy, 20..<25) + } + + func doTestReplaceSubrange(_ didFinishTestStep: (inout ManagedArrayBuffer, Int>) -> Void) { + // Test replacement: + // - At the start + // - In the middle + // - At the end + // and with the given range: + // - Shrinking (removing some elements, inserting fewer elements) + // - Growing (removing some elements, inserting more elements) + // - Removing (removing some elements, inserting no elements) + // - Inserting (removing no elements) + var buffer = ManagedArrayBuffer, Int>( + minimumCapacity: 10, initialHeader: DataHolderHeader(data: 42, count: -1, capacity: -1) + ) + buffer.append(contentsOf: 100..<200) + XCTAssertEqualElements(buffer, 100..<200) + buffer.reserveCapacity(500) // Make sure we never reallocate due to capacity, only due to COW. + didFinishTestStep(&buffer) + + // Shrink at the start. + let range0 = buffer.replaceSubrange(0..<10, with: 10..<15) + XCTAssertEqualElements(buffer, [10..<15, 110..<200].joined()) + XCTAssertEqualElements(buffer[range0], 10..<15) + XCTAssertEqual(buffer.header.data, 42) + XCTAssertEqual(buffer.count, 95) + XCTAssertEqual(buffer.header.count, 95) + didFinishTestStep(&buffer) + + // Shrink in the middle. + let range1 = buffer.replaceSubrange(40..<60, with: 0..<5) + XCTAssertEqualElements(buffer, [10..<15, 110..<145, 0..<5, 165..<200].joined()) + XCTAssertEqualElements(buffer[range1], 0..<5) + XCTAssertEqual(buffer.header.data, 42) + XCTAssertEqual(buffer.count, 80) + XCTAssertEqual(buffer.header.count, 80) + didFinishTestStep(&buffer) + + // Shrink at the end. + let range2 = buffer.replaceSubrange(74..<80, with: CollectionOfOne(99)) + XCTAssertEqualElements(buffer, [10..<15, 110..<145, 0..<5, 165..<194, 99..<100].joined()) + XCTAssertEqualElements(buffer[range2], CollectionOfOne(99)) + XCTAssertEqual(buffer.header.data, 42) + XCTAssertEqual(buffer.count, 75) + XCTAssertEqual(buffer.header.count, 75) + didFinishTestStep(&buffer) + + // Remove everything. + let range3 = buffer.replaceSubrange(0..<75, with: EmptyCollection()) + XCTAssertEqualElements(buffer, []) + XCTAssertTrue(buffer.isEmpty) + XCTAssertEqual(range3, 0..<0) + XCTAssertEqual(buffer.header.data, 42) + XCTAssertEqual(buffer.count, 0) + XCTAssertEqual(buffer.header.count, 0) + didFinishTestStep(&buffer) + // Start afresh with new contents. + buffer.append(contentsOf: 100..<200) + XCTAssertEqualElements(buffer, 100..<200) + didFinishTestStep(&buffer) + + // Grow at the start. + let range4 = buffer.replaceSubrange(0..<5, with: 500..<520) + XCTAssertEqualElements(buffer, [500..<520, 105..<200].joined()) + XCTAssertEqualElements(buffer[range4], 500..<520) + XCTAssertEqual(buffer.header.data, 42) + XCTAssertEqual(buffer.count, 115) + XCTAssertEqual(buffer.header.count, 115) + didFinishTestStep(&buffer) + + // Grow in the middle. + let range5 = buffer.replaceSubrange(50..<55, with: 500..<520) + XCTAssertEqualElements(buffer, [500..<520, 105..<135, 500..<520, 140..<200].joined()) + XCTAssertEqualElements(buffer[range5], 500..<520) + XCTAssertEqual(buffer.header.data, 42) + XCTAssertEqual(buffer.count, 130) + XCTAssertEqual(buffer.header.count, 130) + didFinishTestStep(&buffer) + + // Grow at the end. + let range6 = buffer.replaceSubrange(125..<130, with: 500..<520) + XCTAssertEqualElements(buffer, [500..<520, 105..<135, 500..<520, 140..<195, 500..<520].joined()) + XCTAssertEqualElements(buffer[range6], 500..<520) + XCTAssertEqual(buffer.header.data, 42) + XCTAssertEqual(buffer.count, 145) + XCTAssertEqual(buffer.header.count, 145) + didFinishTestStep(&buffer) + + // Start afresh. + buffer.replaceSubrange(0..<145, with: EmptyCollection()) + XCTAssertEqualElements(buffer, []) + XCTAssertTrue(buffer.isEmpty) + buffer.append(contentsOf: 100..<200) + XCTAssertEqualElements(buffer, 100..<200) + didFinishTestStep(&buffer) + + // Remove from the start. + let range7 = buffer.replaceSubrange(0..<10, with: EmptyCollection()) + XCTAssertEqualElements(buffer, [110..<200].joined()) + XCTAssertEqual(range7, 0..<0) + XCTAssertEqualElements(buffer[range7], EmptyCollection()) + XCTAssertEqual(buffer.header.data, 42) + XCTAssertEqual(buffer.count, 90) + XCTAssertEqual(buffer.header.count, 90) + didFinishTestStep(&buffer) + + // Remove from the middle. + let range8 = buffer.replaceSubrange(40..<50, with: EmptyCollection()) + XCTAssertEqualElements(buffer, [110..<150, 160..<200].joined()) + XCTAssertEqual(range8, 40..<40) + XCTAssertEqualElements(buffer[range8], EmptyCollection()) + XCTAssertEqual(buffer.header.data, 42) + XCTAssertEqual(buffer.count, 80) + XCTAssertEqual(buffer.header.count, 80) + didFinishTestStep(&buffer) + + // Remove from the end. + let range9 = buffer.replaceSubrange(70..<80, with: EmptyCollection()) + XCTAssertEqualElements(buffer, [110..<150, 160..<190].joined()) + XCTAssertEqual(range9, 70..<70) + XCTAssertEqualElements(buffer[range9], EmptyCollection()) + XCTAssertEqual(buffer.header.data, 42) + XCTAssertEqual(buffer.count, 70) + XCTAssertEqual(buffer.header.count, 70) + didFinishTestStep(&buffer) + + // Start afresh. + buffer.replaceSubrange(0..<70, with: EmptyCollection()) + XCTAssertEqualElements(buffer, []) + XCTAssertTrue(buffer.isEmpty) + buffer.append(contentsOf: 100..<200) + XCTAssertEqualElements(buffer, 100..<200) + didFinishTestStep(&buffer) + + // Insert elements at the start. + let range10 = buffer.replaceSubrange(0..<0, with: 5..<10) + XCTAssertEqualElements(buffer, [5..<10, 100..<200].joined()) + XCTAssertEqualElements(buffer[range10], 5..<10) + XCTAssertEqual(buffer.header.data, 42) + XCTAssertEqual(buffer.count, 105) + XCTAssertEqual(buffer.header.count, 105) + didFinishTestStep(&buffer) + + // Insert in the middle. + let range11 = buffer.replaceSubrange(50..<50, with: 5..<10) + XCTAssertEqualElements(buffer, [5..<10, 100..<145, 5..<10, 145..<200].joined()) + XCTAssertEqualElements(buffer[range11], 5..<10) + XCTAssertEqual(buffer.header.data, 42) + XCTAssertEqual(buffer.count, 110) + XCTAssertEqual(buffer.header.count, 110) + didFinishTestStep(&buffer) + + // Insert at the end. + let range12 = buffer.replaceSubrange(110..<110, with: 5..<10) + XCTAssertEqualElements(buffer, [5..<10, 100..<145, 5..<10, 145..<200, 5..<10].joined()) + XCTAssertEqualElements(buffer[range12], 5..<10) + XCTAssertEqual(buffer.header.data, 42) + XCTAssertEqual(buffer.count, 115) + XCTAssertEqual(buffer.header.count, 115) + didFinishTestStep(&buffer) + } + + func testReplaceSubrange_inplace() { + var lastAddress: UnsafePointer? + doTestReplaceSubrange { buffer in + let thisAddress = buffer.withUnsafeBufferPointer { $0.baseAddress } + if lastAddress != nil { + XCTAssertEqual(lastAddress, thisAddress) + } + lastAddress = thisAddress + } + } + + func testReplaceSubrange_outOfPlace() { + var lastAddress: UnsafePointer? + var lastBuffer: ManagedArrayBuffer, Int>? + doTestReplaceSubrange { buffer in + let thisAddress = buffer.withUnsafeBufferPointer { $0.baseAddress } + if lastAddress != nil { + XCTAssertNotEqual(lastAddress, thisAddress) + } + lastAddress = thisAddress + lastBuffer = buffer // Escape the buffer. + } + XCTAssertNotNil(lastBuffer) // Needs to be read to silence warning. + } +} + +extension ManagedArrayBufferTests { + + func doTestRemoveSubrange(_ didFinishTestStep: (inout ManagedArrayBuffer, Int>) -> Void) { + + var buffer = ManagedArrayBuffer, Int>( + minimumCapacity: 10, initialHeader: DataHolderHeader(data: 42, count: -1, capacity: -1) + ) + buffer.reserveCapacity(500) // Make sure we never reallocate due to capacity, only due to COW. + didFinishTestStep(&buffer) + + XCTAssertEqualElements(buffer, []) + XCTAssertTrue(buffer.isEmpty) + buffer.append(contentsOf: 100..<200) + XCTAssertEqualElements(buffer, 100..<200) + didFinishTestStep(&buffer) + + // Remove from the start. + let index0 = buffer.removeSubrange(0..<10) + XCTAssertEqualElements(buffer, [110..<200].joined()) + XCTAssertEqual(index0, 0) + XCTAssertEqual(buffer.header.data, 42) + XCTAssertEqual(buffer.count, 90) + XCTAssertEqual(buffer.header.count, 90) + didFinishTestStep(&buffer) + + // Remove from the middle. + let index1 = buffer.removeSubrange(40..<50) + XCTAssertEqualElements(buffer, [110..<150, 160..<200].joined()) + XCTAssertEqual(index1, 40) + XCTAssertEqual(buffer.header.data, 42) + XCTAssertEqual(buffer.count, 80) + XCTAssertEqual(buffer.header.count, 80) + didFinishTestStep(&buffer) + + // Remove from the end. + let index2 = buffer.removeSubrange(70..<80) + XCTAssertEqualElements(buffer, [110..<150, 160..<190].joined()) + XCTAssertEqual(index2, 70) + XCTAssertEqual(buffer.header.data, 42) + XCTAssertEqual(buffer.count, 70) + XCTAssertEqual(buffer.header.count, 70) + didFinishTestStep(&buffer) + } + + func testRemoveSubrange_inplace() { + var lastAddress: UnsafePointer? + doTestRemoveSubrange { buffer in + let thisAddress = buffer.withUnsafeBufferPointer { $0.baseAddress } + if lastAddress != nil { + XCTAssertEqual(lastAddress, thisAddress) + } + lastAddress = thisAddress + } + } + + func testRemoveSubrange_outOfPlace() { + var lastAddress: UnsafePointer? + var lastBuffer: ManagedArrayBuffer, Int>? + doTestRemoveSubrange { buffer in + let thisAddress = buffer.withUnsafeBufferPointer { $0.baseAddress } + if lastAddress != nil { + XCTAssertNotEqual(lastAddress, thisAddress) + } + lastAddress = thisAddress + lastBuffer = buffer // Escape the buffer. + } + XCTAssertNotNil(lastBuffer) // Needs to be read to silence warning. + } +} diff --git a/Tests/WebURLTests/OtherUtilitiesTests.swift b/Tests/WebURLTests/OtherUtilitiesTests.swift new file mode 100644 index 000000000..05cb4834e --- /dev/null +++ b/Tests/WebURLTests/OtherUtilitiesTests.swift @@ -0,0 +1,161 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Checkit +import XCTest + +@testable import WebURL + +final class OtherUtilitiesTests: XCTestCase {} + +extension OtherUtilitiesTests { + + func testNonURLCodePoints() { + // The URL code points are ASCII alphanumeric, U+0021 (!), U+0024 ($), U+0026 (&), U+0027 ('), + // U+0028 LEFT PARENTHESIS, U+0029 RIGHT PARENTHESIS, U+002A (*), U+002B (+), U+002C (,), U+002D (-), + // U+002E (.), U+002F (/), U+003A (:), U+003B (;), U+003D (=), U+003F (?), U+0040 (@), U+005F (_), + // U+007E (~), and code points in the range U+00A0 to U+10FFFD, inclusive, excluding surrogates and noncharacters. + + XCTAssertFalse(hasNonURLCodePoints(utf8: "alpha123".utf8)) + + // ASCII. + for asciiCharacter in stringWithEveryASCIICharacter { + let isDisallowed = hasNonURLCodePoints(utf8: "alpha\(asciiCharacter)123".utf8) + switch ASCII(asciiCharacter.utf8.first!)! { + case ASCII.ranges.uppercaseAlpha, ASCII.ranges.lowercaseAlpha, + ASCII.ranges.digits, .exclamationMark, .dollarSign, .ampersand, .apostrophe, + .leftParenthesis, .rightParenthesis, .asterisk, .plus, .comma, .minus, + .period, .forwardSlash, .colon, .semicolon, .equalSign, .questionMark, .commercialAt, .underscore, + .tilde: + XCTAssertFalse(isDisallowed) + default: + XCTAssertTrue(isDisallowed, String(asciiCharacter) + "(\(asciiCharacter.utf8.first!)") + } + } + // Disallowed range up to U+00A0. + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{0080}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{0097}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{009F}123".utf8)) + // A sample of allowed non-ASCII codepoints. + XCTAssertFalse(hasNonURLCodePoints(utf8: "alpha\u{00A0}123".utf8)) + XCTAssertFalse(hasNonURLCodePoints(utf8: "alpha\u{00F0}123".utf8)) + XCTAssertFalse(hasNonURLCodePoints(utf8: "alpha\u{00B0D0}123".utf8)) + XCTAssertFalse(hasNonURLCodePoints(utf8: "alpha\u{01ABC0}123".utf8)) + XCTAssertFalse(hasNonURLCodePoints(utf8: "alpha\u{06DEF0}123".utf8)) + XCTAssertFalse(hasNonURLCodePoints(utf8: "alpha\u{10FFFD}123".utf8)) + + // Disallowed non-characters. + func codeUnitsWithScalar(_ scalar: Unicode.Scalar) -> [UInt8] { + var codeUnits: [UInt8] = [0x21] // "!" + UTF8.encode(scalar) { codeunit in codeUnits.append(codeunit) } + XCTAssertGreaterThan(codeUnits.count, 1) + codeUnits.append(0x3F) // "?" + return codeUnits + } + // String doesn't like it when we write some of these. + XCTAssertFalse(hasNonURLCodePoints(utf8: codeUnitsWithScalar(Unicode.Scalar(0xFDCF)!))) + XCTAssertTrue(hasNonURLCodePoints(utf8: codeUnitsWithScalar(Unicode.Scalar(0xFDD0)!))) + XCTAssertTrue(hasNonURLCodePoints(utf8: codeUnitsWithScalar(Unicode.Scalar(0xFDDF)!))) + XCTAssertTrue(hasNonURLCodePoints(utf8: codeUnitsWithScalar(Unicode.Scalar(0xFDEF)!))) + XCTAssertFalse(hasNonURLCodePoints(utf8: codeUnitsWithScalar(Unicode.Scalar(0xFDF0)!))) + + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{FFFE}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{FFFF}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{01FFFE}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{01FFFF}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{02FFFE}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{02FFFF}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{03FFFE}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{03FFFF}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{04FFFE}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{04FFFF}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{05FFFE}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{05FFFF}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{06FFFE}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{06FFFF}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{07FFFE}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{07FFFF}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{08FFFE}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{08FFFF}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{09FFFE}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{09FFFF}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{0AFFFE}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{0AFFFF}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{0BFFFE}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{0BFFFF}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{0CFFFE}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{0CFFFF}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{0DFFFE}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{0DFFFF}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{0EFFFE}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{0EFFFF}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{0FFFFE}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{0FFFFF}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{10FFFE}123".utf8)) + XCTAssertTrue(hasNonURLCodePoints(utf8: "alpha\u{10FFFF}123".utf8)) + + // Surrogates. + XCTAssertTrue(hasNonURLCodePoints(utf8: [0xED, 0xA0, 0x80])) // D800 + XCTAssertTrue(hasNonURLCodePoints(utf8: [0xED, 0xAA, 0xBC])) // DABC + XCTAssertTrue(hasNonURLCodePoints(utf8: [0xED, 0xBF, 0xBF])) // DFFF + } + + func testForbiddenHostCodePoint() { + /// A forbidden host code point is U+0000 NULL, U+0009 TAB, U+000A LF, U+000D CR, + /// U+0020 SPACE, U+0023 (#), U+0025 (%), U+002F (/), U+003A (:), U+003C (<), U+003E (>), + /// U+003F (?), U+0040 (@), U+005B ([), U+005C (\), U+005D (]), U+005E (^), or U+007C (|). + for char in ASCII.allCharacters { + switch char { + case .null, .horizontalTab, .lineFeed, .carriageReturn, .space, .numberSign, .percentSign, .forwardSlash, + .colon, .lessThanSign, .greaterThanSign, .questionMark, .commercialAt, .leftSquareBracket, .backslash, + .rightSquareBracket, .circumflexAccent, .verticalBar: + XCTAssertTrue(char.isForbiddenHostCodePoint) + default: + XCTAssertFalse(char.isForbiddenHostCodePoint) + } + } + } +} + +extension OtherUtilitiesTests { + + func testTempStorageIsValidURL() { + let url = WebURL(storage: _tempStorage) + XCTAssertEqual(url.serialized, "a:") + XCTAssertURLIsIdempotent(url) + } +} + +extension OtherUtilitiesTests { + + func testFastInitialize() { + let buffer = UnsafeMutableBufferPointer.allocate(capacity: 256) + XCTAssertEqual(buffer.endIndex, 256) + + // Initialize with empty contiguous source. Should return 0. + XCTAssertEqual(buffer.fastInitialize(from: []), 0) + + // Partially initialize from contiguous source. Should return number of elements written. + XCTAssertEqual(buffer.fastInitialize(from: Array(0..<100)), 100) + XCTAssertEqualElements(buffer[0..<100], 0..<100) + + // Fully initialize from contiguous source. Should return number of elements written. + XCTAssertEqual(buffer.fastInitialize(from: Array(256..<512)), 256) + XCTAssertEqualElements(buffer, 256..<512) + + // Initialize with a too-large contiguous source. Should return the buffer's endIndex. + XCTAssertEqual(buffer.fastInitialize(from: Array(512..<4096)), buffer.endIndex) + XCTAssertEqualElements(buffer, 512..<768) + } +} diff --git a/Tests/WebURLTests/PathComponentsTests.swift b/Tests/WebURLTests/PathComponentsTests.swift new file mode 100644 index 000000000..5739fc42d --- /dev/null +++ b/Tests/WebURLTests/PathComponentsTests.swift @@ -0,0 +1,2148 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Checkit +import XCTest + +@testable import WebURL + +final class PathComponentsTests: XCTestCase {} + + +// -------------------------------------------- +// MARK: - Reading components +// -------------------------------------------- + + +extension PathComponentsTests { + + func testPathComponents_documentationExamples() { + + // WebURL.PathComponents type. + do { + var url = WebURL("http://example.com/swift/packages/%F0%9F%A6%86%20tracker")! + XCTAssertEqual(url.pathComponents.first, "swift") + XCTAssertEqual(url.pathComponents.last, "🦆 tracker") + + url.pathComponents.removeLast() + url.pathComponents.append("swift-url") + XCTAssertEqual(url.serialized, "http://example.com/swift/packages/swift-url") + } + do { + var url = WebURL("file:///")! + XCTAssertEqual(url.pathComponents.last, "") + XCTAssertEqual(url.pathComponents.count, 1) + + url.pathComponents.append("usr") + XCTAssertEqual(url.serialized, "file:///usr") + XCTAssertEqual(url.pathComponents.count, 1) + + url.pathComponents += ["bin", "swift"] + XCTAssertEqual(url.serialized, "file:///usr/bin/swift") + XCTAssertEqual(url.pathComponents.last, "swift") + XCTAssertEqual(url.pathComponents.count, 3) + + url.pathComponents.ensureDirectoryPath() + XCTAssertEqual(url.serialized, "file:///usr/bin/swift/") + XCTAssertEqual(url.pathComponents.last, "") + XCTAssertEqual(url.pathComponents.count, 4) + } + // WebURL.PathComponents.replaceSubrange + do { + var url = WebURL("file:///usr/bin/swift")! + let lastTwo = url.pathComponents.index(url.pathComponents.endIndex, offsetBy: -2).. append "foo" -> "/foo", not "//foo" + + // Append a non-empty component to a root path. + do { + var url = WebURL("foo://example.com/?aQuery#someFragment")! + XCTAssertEqualElements(url.pathComponents, [""]) + + let range = url.pathComponents.replaceSubrange( + url.pathComponents.endIndex..(encodingWith encoder: (String) -> T) where T: Collection, T.Element == UInt8 { + let testData: [String] = [ + stringWithEveryASCIICharacter, + stringWithEveryASCIICharacter + "😎✈️🏝🍹" + stringWithEveryASCIICharacter.shuffled(), + "%00this is not percent-encoded: %20", + "nochange0123456789", + "", + ] + for input in testData { + let encodedUTF8 = encoder(input) + for codeUnit in encodedUTF8 { + guard let ascii = ASCII(codeUnit) else { + XCTFail("Found non-ASCII byte in percent-encoded string") + continue + } + // The 'component' encode set is a superset of the following sets. + XCTAssertFalse(PercentEncodeSet.C0Control.shouldPercentEncode(ascii: codeUnit)) + XCTAssertFalse(PercentEncodeSet.Path.shouldPercentEncode(ascii: codeUnit)) + XCTAssertFalse(PercentEncodeSet.Query_NotSpecial.shouldPercentEncode(ascii: codeUnit)) + XCTAssertFalse(PercentEncodeSet.Fragment.shouldPercentEncode(ascii: codeUnit)) + XCTAssertFalse(PercentEncodeSet.UserInfo.shouldPercentEncode(ascii: codeUnit)) + // The only set it does not contain is the special-query set, which includes the extra U+0027. + if PercentEncodeSet.Query_Special.shouldPercentEncode(ascii: codeUnit) { + XCTAssertEqual(ascii, .apostrophe) + } + // Strings encoded by the 'component' set should not contain forbidden host code-points (other than '%'). + XCTAssertFalse( + ascii.isForbiddenHostCodePoint && ascii != .percentSign, + "Forbidden host code point: \(Character(UnicodeScalar(ascii.codePoint)))" + ) + } + // The 'component' encode set should always preserve its contents, even if it contains + // things that look like percent-encode sequences (maybe someone really meant to write "%20"). + XCTAssertEqualElements(encodedUTF8.lazy.percentDecodedUTF8, input.utf8) + } + + // An important feature of the component encode-set is that it includes the % sign itself (U+0025). + XCTAssertTrue(PercentEncodeSet.Component.shouldPercentEncode(ascii: ASCII.percentSign.codePoint)) + } + + func testEncodeSet_Component() { + do_testEncodeSet_Component(encodingWith: { + $0.utf8.lazy.percentEncoded(as: \.component) + }) + } + + func testTable() { + XCTAssert(percent_encoding_table.count == 128) + } + + func testDualImplementationEquivalence() { + func testEncodeSet(_: EncodeSet.Type) { + for char in ASCII.allCharacters { + XCTAssertEqual( + EncodeSet.shouldEscape_binary(ascii: char.codePoint), + EncodeSet.shouldEscape_table(ascii: char.codePoint), + "Mismatch for character \"\(char)\" (#\(char.codePoint)) in encode set #\(EncodeSet.self)" + ) + } + } + testEncodeSet(PercentEncodeSet.C0Control.self) + testEncodeSet(PercentEncodeSet.Fragment.self) + testEncodeSet(PercentEncodeSet.Query_NotSpecial.self) + testEncodeSet(PercentEncodeSet.Query_Special.self) + testEncodeSet(PercentEncodeSet.Path.self) + testEncodeSet(PercentEncodeSet.UserInfo.self) + testEncodeSet(PercentEncodeSet.Component.self) + testEncodeSet(PercentEncodeSet.FormEncoded.self) + } +} + +extension PercentEncodingTests { + + func testPercentEncoded() { + XCTAssertEqualElements("hello, world!".percentEncoded(as: \.userInfo), "hello,%20world!") + XCTAssertEqualElements("/usr/bin/swift".percentEncoded(as: \.component), "%2Fusr%2Fbin%2Fswift") + XCTAssertEqualElements("got en%63oders?".percentEncoded(as: \.userInfo), "got%20en%63oders%3F") + XCTAssertEqualElements("king of the 🦆s".percentEncoded(as: \.form), "king+of+the+%F0%9F%A6%86s") + } + + func testURLComponentEncoded() { + XCTAssertEqual("hello, world!".urlComponentEncoded, "hello%2C%20world!") + XCTAssertEqual("/usr/bin/swift".urlComponentEncoded, "%2Fusr%2Fbin%2Fswift") + XCTAssertEqual("😎".urlComponentEncoded, "%F0%9F%98%8E") + // The .urlComponentEncoded property should use the component encode set. + do_testEncodeSet_Component(encodingWith: { $0.urlComponentEncoded.utf8 }) + } + + func testURLFormEncoded() { + let myKVPs: KeyValuePairs = ["favourite pet": "🦆, of course", "favourite foods": "🍎 & 🍦"] + let form = myKVPs.map { key, value in "\(key.urlFormEncoded)=\(value.urlFormEncoded)" } + .joined(separator: "&") + XCTAssertEqual(form, "favourite+pet=%F0%9F%A6%86%2C+of+course&favourite+foods=%F0%9F%8D%8E+%26+%F0%9F%8D%A6") + } + + func testPercentDecodedWithEncodeSet() { + XCTAssertEqual("hello,%20world!".percentDecoded(from: \.percentEncodedOnly), "hello, world!") + XCTAssertEqual("%2Fusr%2Fbin%2Fswift".percentDecoded(from: \.percentEncodedOnly), "/usr/bin/swift") + XCTAssertEqual("king+of+the+%F0%9F%A6%86s".percentDecoded(from: \.form), "king of the 🦆s") + } + + func testPercentDecoded() { + XCTAssertEqual("hello%2C%20world!".percentDecoded, "hello, world!") + XCTAssertEqual("%2Fusr%2Fbin%2Fswift".percentDecoded, "/usr/bin/swift") + XCTAssertEqual("%F0%9F%98%8E".percentDecoded, "😎") + + // Check that we only do percent-decoding, not form-decoding. + XCTAssertEqual("king+of+the+%F0%9F%A6%86s".percentDecoded, "king+of+the+🦆s") + } + + func testURLFormDecoded() { + let form = "favourite+pet=%F0%9F%A6%86%2C+of+course&favourite+foods=%F0%9F%8D%8E+%26+%F0%9F%8D%A6" + let decoded = form.split(separator: "&").map { joined_kvp in joined_kvp.split(separator: "=") } + .map { kvp in (kvp[0].urlFormDecoded, kvp[1].urlFormDecoded) } + XCTAssertEqual(decoded.count, 2) + XCTAssertTrue(decoded[0] == ("favourite pet", "🦆, of course")) + XCTAssertTrue(decoded[1] == ("favourite foods", "🍎 & 🍦")) + } +} diff --git a/Tests/WebURLTests/Resources/additional_constructor_tests.json b/Tests/WebURLTests/Resources/additional_constructor_tests.json new file mode 100644 index 000000000..fdd4a34e5 --- /dev/null +++ b/Tests/WebURLTests/Resources/additional_constructor_tests.json @@ -0,0 +1,876 @@ +[ + "[Path] Single-dot components are skipped and do not get popped", + { + "input": "file:/a/./..", + "base": "about:blank", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file:/a/./././..", + "base": "about:blank", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "[Path] Various oddball paths with base URLs.", + { + "input": ".", + "base": "file:///a/b/", + "href": "file:///a/b/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/a/b/", + "search": "", + "hash": "" + }, + { + "input": "..", + "base": "file:///a/b/c", + "href": "file:///a/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/a/", + "search": "", + "hash": "" + }, + { + "input": "...", + "base": "file:///a/b/...", + "href": "file:///a/b/...", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/a/b/...", + "search": "", + "hash": "" + }, + { + "input": "./.", + "base": "file:///a/b/", + "href": "file:///a/b/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/a/b/", + "search": "", + "hash": "" + }, + { + "input": "../", + "base": "http://example.com", + "href": "http://example.com/", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "..///", + "base": "http://example.com", + "href": "http://example.com///", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "///", + "search": "", + "hash": "" + }, + { + "input": "./.", + "base": "non-special:///a/b/", + "href": "non-special:///a/b/", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/a/b/", + "search": "", + "hash": "" + }, + { + "input": "./../1/2/../", + "base": "non-special:///a/b/c/d", + "href": "non-special:///a/b/1/", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/a/b/1/", + "search": "", + "hash": "" + }, + { + "input": "/", + "base": "non-special://somehost", + "href": "non-special://somehost/", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "somehost", + "hostname": "somehost", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "a/../../../../", + "base": "http://example.com/1/2/3/4/5/6", + "href": "http://example.com/1/2/", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/1/2/", + "search": "", + "hash": "" + }, + "[Path][Win] 0 slashes - Relative paths. The parser in the standard first checks to see if the drive is literally at the start of the string, and if not, copies in the base path components so the effective first component (e.g. after popping, etc) becomes the drive. \n\n What this means is that Windows drive components are resolved just like regular relative path components, unless they are at the very start of the string (where they force the whole thing to be treated like an absolute path).", + { + "input": "file:C|", + "base": "about:blank", + "href": "file:///C:", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:", + "search": "", + "hash": "" + }, + { + "input": "file:./D|/../foo", + "base": "about:blank", + "href": "file:///D:/foo", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/D:/foo", + "search": "", + "hash": "" + }, + { + "input": "file:./D|/../foo", + "base": "file:///bar", + "href": "file:///D:/foo", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/D:/foo", + "search": "", + "hash": "" + }, + { + "input": "file:./D|/../foo", + "base": "file:///bar/", + "href": "file:///bar/foo", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/bar/foo", + "search": "", + "hash": "" + }, + { + "input": "file:D|/../foo", + "base": "file:///bar/", + "href": "file:///D:/foo", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/D:/foo", + "search": "", + "hash": "" + }, + { + "input": "file:./D:/../foo", + "base": "file:///C:/base1/base2/", + "href": "file:///C:/base1/base2/foo", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/base1/base2/foo", + "search": "", + "hash": "" + }, + { + "input": "file:D|/../foo", + "base": "file:///C:/base1/base2/", + "href": "file:///D:/foo", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/D:/foo", + "search": "", + "hash": "" + }, + { + "input": "file:./D|/../foo", + "base": "file:///bar/baz/qux/", + "href": "file:///bar/baz/qux/foo", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/bar/baz/qux/foo", + "search": "", + "hash": "" + }, + { + "input": "file:../../../D|/../foo", + "base": "file:///bar/baz/qux/", + "href": "file:///D:/foo", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/D:/foo", + "search": "", + "hash": "" + }, + { + "input": "foo", + "base": "file:///C:/base1/base2/base3", + "href": "file:///C:/base1/base2/foo", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/base1/base2/foo", + "search": "", + "hash": "" + }, + { + "input": "a", + "base": "file:///C:", + "href": "file:///C:/a", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/a", + "search": "", + "hash": "" + }, + "[Path][Win] 1 slash - Absolute paths. If the parser in the standard goes down the 'file slash' state, absolute paths will be checked to see if they literally begin with a Windows drive letter as their first component, and otherwise (despite being absolute), will be relative to the base URL's drive letter (if it has one). They only copy the drive from the base URL, not any other parts of the path. \n\n Note that this only applies to the 'file slash' state - i.e. '/C:/Windows' or 'file:C:/Windows', and means that in a path like '/./D|/../foo' with a base URL of 'file:///C:/bar/', the 'D|' drive in the input will not be recognised because of 'C:' in the base URL (however, without that 'C:' in the base, we would recognise 'D|' as a drive).", + { + "input": "file:/D|/../foo", + "base": "file:///bar/baz/qux/", + "href": "file:///D:/foo", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/D:/foo", + "search": "", + "hash": "" + }, + { + "input": "file:/.././D|/../foo", + "base": "file:///bar/baz/qux/", + "href": "file:///D:/foo", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/D:/foo", + "search": "", + "hash": "" + }, + { + "input": "file:/abc/def/../.././D|/../foo", + "base": "file:///bar/baz/qux/", + "href": "file:///D:/foo", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/D:/foo", + "search": "", + "hash": "" + }, + { + "input": "file:/abc/def/../../ghi/./D|/../foo", + "base": "file:///bar/baz/qux/", + "href": "file:///ghi/foo", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/ghi/foo", + "search": "", + "hash": "" + }, + { + "input": "file:/D|/../foo", + "base": "file:///C:/base1/base2/", + "href": "file:///D:/foo", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/D:/foo", + "search": "", + "hash": "" + }, + { + "input": "file:/./D|/../foo", + "base": "file:///C:/base1/base2/", + "href": "file:///C:/foo", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/foo", + "search": "", + "hash": "" + }, + { + "input": "not-file:/abc/def/../.././D|/../foo", + "base": "not-file:///bar/baz/qux/", + "href": "not-file:/foo", + "protocol": "not-file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/foo", + "search": "", + "hash": "" + }, + "Absolute paths which don't have their own drive letter are still relative to the base URL drive. (because they go down the 'file slash' path).", + { + "input": "/hello", + "base": "file:///C:/bar/", + "href": "file:///C:/hello", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/hello", + "search": "", + "hash": "" + }, + { + "input": "file:/hello", + "base": "file:///C:/bar/", + "href": "file:///C:/hello", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/hello", + "search": "", + "hash": "" + }, + "But absolute paths from URLs with authorities are never relative to the base URL drive. (because they go down the 'path' path).", + { + "input": "file:///hello", + "base": "file:///C:/bar/", + "href": "file:///hello", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/hello", + "search": "", + "hash": "" + }, + "[Path][Win] 2+ slashes - Misplaced authorities. The parser in the standard has a special 'file host' state, which checks to see if the component in the hostname position is a Windows drive and, if it is, forwards that to the regular 'path' state as a first component to the otherwise empty path. \n\n The 'file host' state never copies any path components from the base URL (if present), so it is kind of \"more absolute\" than even a regular '/usr/bin/'-style absolute path. In the sense that '/usr/bin/' with a base URL of 'file:///C:/' is 'file:///C:/usr/bin/', but '///usr/bin/' always results in 'file:///usr/bin/', regardless of whether the base URL contains a Windows drive letter.", + { + "input": "\\\\D|\\..\\foo", + "base": "file:///C:/bar/baz/qux", + "href": "file:///D:/foo", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/D:/foo", + "search": "", + "hash": "" + }, + { + "input": "file://D|/../foo", + "base": "file:///C:/bar/baz/qux", + "href": "file:///D:/foo", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/D:/foo", + "search": "", + "hash": "" + }, + { + "input": "file://./D|/../foo", + "base": "file:///C:/bar/baz/qux", + "href": "file://./D:/foo", + "protocol": "file:", + "username": "", + "password": "", + "host": ".", + "hostname": ".", + "port": "", + "pathname": "/D:/foo", + "search": "", + "hash": "" + }, + "[Path] 3+ leading slashes.", + { + "input": "file:///././D|/../foo", + "base": "file:///C:/bar/baz/qux", + "href": "file:///D:/foo", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/D:/foo", + "search": "", + "hash": "" + }, + { + "input": "///usr/bin/x", + "base": "file:///C:/foo/bar/baz/", + "href": "file:///usr/bin/x", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/usr/bin/x", + "search": "", + "hash": "" + }, + { + "input": "file:///usr/bin/x", + "base": "file:///C:/foo/bar/baz/", + "href": "file:///usr/bin/x", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/usr/bin/x", + "search": "", + "hash": "" + }, + "[Path] Leading empty components disqualify a potential Windows drive.", + { + "input": "file://///////////C|/../D|/../foo", + "base": "about:blank", + "href": "file://///////////foo", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "///////////foo", + "search": "", + "hash": "" + }, + "[Path][WEBURL-SPECIFIC]: Check that components which are deferred are not popped when flushing due to a '..' component.", + { + "input": "file:../C|/foo", + "base": "file:///D:/base1/base2/base3", + "href": "file:///D:/base1/C|/foo", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/D:/base1/C|/foo", + "search": "", + "hash": "" + }, + { + "input": "file:////C:/../..", + "base": "about:blank", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "[OTHER] Check that query component is correctly not-copied for (file/special/non-special) schemes.", + { + "input": "pop", + "base": "file://hostname/o1/o2?someQuery", + "href": "file://hostname/o1/pop", + "protocol": "file:", + "username": "", + "password": "", + "host": "hostname", + "hostname": "hostname", + "port": "", + "pathname": "/o1/pop", + "search": "", + "hash": "" + }, + { + "input": "/pop", + "base": "file://hostname/o1/o2?someQuery", + "href": "file://hostname/pop", + "protocol": "file:", + "username": "", + "password": "", + "host": "hostname", + "hostname": "hostname", + "port": "", + "pathname": "/pop", + "search": "", + "hash": "" + }, + { + "input": "pop", + "base": "http://hostname/o1/o2?someQuery", + "href": "http://hostname/o1/pop", + "protocol": "http:", + "username": "", + "password": "", + "host": "hostname", + "hostname": "hostname", + "port": "", + "pathname": "/o1/pop", + "search": "", + "hash": "" + }, + { + "input": "/pop", + "base": "http://hostname/o1/o2?someQuery", + "href": "http://hostname/pop", + "protocol": "http:", + "username": "", + "password": "", + "host": "hostname", + "hostname": "hostname", + "port": "", + "pathname": "/pop", + "search": "", + "hash": "" + }, + { + "input": "pop", + "base": "non-special://hostname/o1/o2?someQuery", + "href": "non-special://hostname/o1/pop", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "hostname", + "hostname": "hostname", + "port": "", + "pathname": "/o1/pop", + "search": "", + "hash": "" + }, + { + "input": "/pop", + "base": "non-special://hostname/o1/o2?someQuery", + "href": "non-special://hostname/pop", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "hostname", + "hostname": "hostname", + "port": "", + "pathname": "/pop", + "search": "", + "hash": "" + }, + "[OTHER] File URLs with invalid hostnames should fail to parse, even if the path begins with a Windows drive letter.", + { + "input": "file://^/C:/hello", + "base": "about:blank", + "failure": true + }, + "[OTHER] Check we do not fail to yield a path when the input contributes nothing and the base URL has a 'nil' path.", + { + "input": "..", + "base": "sc://a", + "href": "sc://a/", + "protocol": "sc:", + "username": "", + "password": "", + "host": "a", + "hostname": "a", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "../..///.", + "base": "sc://a", + "href": "sc://a///", + "protocol": "sc:", + "username": "", + "password": "", + "host": "a", + "hostname": "a", + "port": "", + "pathname": "///", + "search": "", + "hash": "" + }, + "[OTHER] Ensure that we always flush trailing empties if the first component doesn't get yielded.", + { + "input": "././././////b", + "base": "sc://a", + "href": "sc://a/////b", + "protocol": "sc:", + "username": "", + "password": "", + "host": "a", + "hostname": "a", + "port": "", + "pathname": "/////b", + "search": "", + "hash": "" + }, + "[OTHER] Code coverage for relative file paths which sum to nothing.", + { + "input": ".", + "base": "file:///", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "../b/..", + "base": "file:///a", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "[OTHER] Percent-encoded/mixed-case localhost.", + { + "input": "file://loc%61lhost/some/path", + "base": "about:blank", + "href": "file:///some/path", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/some/path", + "search": "", + "hash": "" + }, + { + "input": "file://locAlhost/some/path", + "base": "about:blank", + "href": "file:///some/path", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/some/path", + "search": "", + "hash": "" + }, + "[OTHER] Double slash at start of base path (with host - not related to idempotence fix from Aug 2020).", + { + "input": "path", + "base": "non-spec://host/..//p", + "href": "non-spec://host//path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "//path", + "search": "", + "hash": "" + }, + "[OTHER] Path idempotence fixes from Aug 2020.", + { + "input": "hello", + "base": "web+demo:/.//not-a-host/test", + "href": "web+demo:/.//not-a-host/hello", + "protocol": "web+demo:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//not-a-host/hello", + "search": "", + "hash": "" + }, + { + "input": "hello/..", + "base": "web+demo:/.//not-a-host/test", + "href": "web+demo:/.//not-a-host/", + "protocol": "web+demo:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//not-a-host/", + "search": "", + "hash": "" + }, + { + "input": "hello/../..", + "base": "web+demo:/.//not-a-host/test", + "href": "web+demo:/.//", + "protocol": "web+demo:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + { + "input": "hello/../../..", + "base": "web+demo:/.//not-a-host/test", + "href": "web+demo:/", + "protocol": "web+demo:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "[HOST]: Domain is ASCII, but a component contains invalid IDNA", + { + "input": "http://a.b.c.xn--pokxncvks", + "base": "about:blank", + "failure": true + }, + { + "input": "http://10.0.0.xn--pokxncvks", + "base": "about:blank", + "failure": true + } +] diff --git a/Tests/WebURLTests/Resources/additional_setters_tests.json b/Tests/WebURLTests/Resources/additional_setters_tests.json new file mode 100644 index 000000000..7cae9a224 --- /dev/null +++ b/Tests/WebURLTests/Resources/additional_setters_tests.json @@ -0,0 +1,83 @@ +{ + "comment": [ + "## Tests for setters of https://url.spec.whatwg.org/#urlutils-members", + "", + "This file contains a JSON object.", + "Other than 'comment', each key is an attribute of the `URL` interface", + "defined in WHATWG’s URL Standard.", + "The values are arrays of test case objects for that attribute.", + "", + "To run a test case for the attribute `attr`:", + "", + "* Create a new `URL` object with the value for the 'href' key", + " the constructor single parameter. (Without a base URL.)", + " This must not throw.", + "* Set the attribute `attr` to (invoke its setter with)", + " with the value of for 'new_value' key.", + "* The value for the 'expected' key is another object.", + " For each `key` / `value` pair of that object,", + " get the attribute `key` (invoke its getter).", + " The returned string must be equal to `value`.", + "", + "Note: the 'href' setter is already covered by urltestdata.json." + ], + "protocol": [ + { + "comment": "Tabs and newlines are ignored", + "href": "a://example.net", + "new_value": "\u0009\u0009fo\u000Ao", + "expected": { + "href": "foo://example.net", + "protocol": "foo:" + } + } + ], + "username": [ + { + "comment": "Tabs and newlines are not ignored", + "href": "http://example.com/", + "new_value": "\u0009us\u000Aer", + "expected": { + "href": "http://%09us%0Aer@example.com/", + "username": "%09us%0Aer" + } + } + ], + "password": [ + { + "comment": "Tabs and newlines are not ignored", + "href": "http://example.com/", + "new_value": "pa\u0009\u000Ass\u0009", + "expected": { + "href": "http://:pa%09%0Ass%09@example.com/", + "password": "pa%09%0Ass%09" + } + } + ], + "port": [ + { + "comment": "Tabs and newlines are ignored", + "href": "http://example.com/", + "new_value": "\u00099\u000A\u00090", + "expected": { + "href": "http://example.com:90/", + "port": "90" + } + } + ], + "pathname": [ + { + "comment": "Tabs and newlines are ignored", + "href": "http://example.com/", + "new_value": "\u0009/\u000Ahel\u0009lo\u0009/\u0009\u0009world/\u0009.\u0009.", + "expected": { + "href": "http://example.com/hello/", + "pathname": "/hello/" + } + } + ], + "host": [], + "hostname": [], + "search": [], + "hash": [] +} diff --git a/Tests/WebURLTests/Resources/setters_tests.json b/Tests/WebURLTests/Resources/setters_tests.json new file mode 100644 index 000000000..8aa74d6b8 --- /dev/null +++ b/Tests/WebURLTests/Resources/setters_tests.json @@ -0,0 +1,1950 @@ +{ + "comment": [ + "## Tests for setters of https://url.spec.whatwg.org/#urlutils-members", + "", + "This file contains a JSON object.", + "Other than 'comment', each key is an attribute of the `URL` interface", + "defined in WHATWG’s URL Standard.", + "The values are arrays of test case objects for that attribute.", + "", + "To run a test case for the attribute `attr`:", + "", + "* Create a new `URL` object with the value for the 'href' key", + " the constructor single parameter. (Without a base URL.)", + " This must not throw.", + "* Set the attribute `attr` to (invoke its setter with)", + " with the value of for 'new_value' key.", + "* The value for the 'expected' key is another object.", + " For each `key` / `value` pair of that object,", + " get the attribute `key` (invoke its getter).", + " The returned string must be equal to `value`.", + "", + "Note: the 'href' setter is already covered by urltestdata.json." + ], + "protocol": [ + { + "comment": "The empty string is not a valid scheme. Setter leaves the URL unchanged.", + "href": "a://example.net", + "new_value": "", + "expected": { + "href": "a://example.net", + "protocol": "a:" + } + }, + { + "href": "a://example.net", + "new_value": "b", + "expected": { + "href": "b://example.net", + "protocol": "b:" + } + }, + { + "href": "javascript:alert(1)", + "new_value": "defuse", + "expected": { + "href": "defuse:alert(1)", + "protocol": "defuse:" + } + }, + { + "comment": "Upper-case ASCII is lower-cased", + "href": "a://example.net", + "new_value": "B", + "expected": { + "href": "b://example.net", + "protocol": "b:" + } + }, + { + "comment": "Non-ASCII is rejected", + "href": "a://example.net", + "new_value": "é", + "expected": { + "href": "a://example.net", + "protocol": "a:" + } + }, + { + "comment": "No leading digit", + "href": "a://example.net", + "new_value": "0b", + "expected": { + "href": "a://example.net", + "protocol": "a:" + } + }, + { + "comment": "No leading punctuation", + "href": "a://example.net", + "new_value": "+b", + "expected": { + "href": "a://example.net", + "protocol": "a:" + } + }, + { + "href": "a://example.net", + "new_value": "bC0+-.", + "expected": { + "href": "bc0+-.://example.net", + "protocol": "bc0+-.:" + } + }, + { + "comment": "Only some punctuation is acceptable", + "href": "a://example.net", + "new_value": "b,c", + "expected": { + "href": "a://example.net", + "protocol": "a:" + } + }, + { + "comment": "Non-ASCII is rejected", + "href": "a://example.net", + "new_value": "bé", + "expected": { + "href": "a://example.net", + "protocol": "a:" + } + }, + { + "comment": "Can’t switch from URL containing username/password/port to file", + "href": "http://test@example.net", + "new_value": "file", + "expected": { + "href": "http://test@example.net/", + "protocol": "http:" + } + }, + { + "href": "https://example.net:1234", + "new_value": "file", + "expected": { + "href": "https://example.net:1234/", + "protocol": "https:" + } + }, + { + "href": "wss://x:x@example.net:1234", + "new_value": "file", + "expected": { + "href": "wss://x:x@example.net:1234/", + "protocol": "wss:" + } + }, + { + "comment": "Can’t switch from file URL with no host", + "href": "file://localhost/", + "new_value": "http", + "expected": { + "href": "file:///", + "protocol": "file:" + } + }, + { + "href": "file:///test", + "new_value": "https", + "expected": { + "href": "file:///test", + "protocol": "file:" + } + }, + { + "href": "file:", + "new_value": "wss", + "expected": { + "href": "file:///", + "protocol": "file:" + } + }, + { + "comment": "Can’t switch from special scheme to non-special", + "href": "http://example.net", + "new_value": "b", + "expected": { + "href": "http://example.net/", + "protocol": "http:" + } + }, + { + "href": "file://hi/path", + "new_value": "s", + "expected": { + "href": "file://hi/path", + "protocol": "file:" + } + }, + { + "href": "https://example.net", + "new_value": "s", + "expected": { + "href": "https://example.net/", + "protocol": "https:" + } + }, + { + "href": "ftp://example.net", + "new_value": "test", + "expected": { + "href": "ftp://example.net/", + "protocol": "ftp:" + } + }, + { + "comment": "Cannot-be-a-base URL doesn’t have a host, but URL in a special scheme must.", + "href": "mailto:me@example.net", + "new_value": "http", + "expected": { + "href": "mailto:me@example.net", + "protocol": "mailto:" + } + }, + { + "comment": "Can’t switch from non-special scheme to special", + "href": "ssh://me@example.net", + "new_value": "http", + "expected": { + "href": "ssh://me@example.net", + "protocol": "ssh:" + } + }, + { + "href": "ssh://me@example.net", + "new_value": "https", + "expected": { + "href": "ssh://me@example.net", + "protocol": "ssh:" + } + }, + { + "href": "ssh://me@example.net", + "new_value": "file", + "expected": { + "href": "ssh://me@example.net", + "protocol": "ssh:" + } + }, + { + "href": "ssh://example.net", + "new_value": "file", + "expected": { + "href": "ssh://example.net", + "protocol": "ssh:" + } + }, + { + "href": "nonsense:///test", + "new_value": "https", + "expected": { + "href": "nonsense:///test", + "protocol": "nonsense:" + } + }, + { + "comment": "Stuff after the first ':' is ignored", + "href": "http://example.net", + "new_value": "https:foo : bar", + "expected": { + "href": "https://example.net/", + "protocol": "https:" + } + }, + { + "comment": "Stuff after the first ':' is ignored", + "href": "data:text/html,

Test", + "new_value": "view-source+data:foo : bar", + "expected": { + "href": "view-source+data:text/html,

Test", + "protocol": "view-source+data:" + } + }, + { + "comment": "Port is set to null if it is the default for new scheme.", + "href": "http://foo.com:443/", + "new_value": "https", + "expected": { + "href": "https://foo.com/", + "protocol": "https:", + "port": "" + } + } + ], + "username": [ + { + "comment": "No host means no username", + "href": "file:///home/you/index.html", + "new_value": "me", + "expected": { + "href": "file:///home/you/index.html", + "username": "" + } + }, + { + "comment": "No host means no username", + "href": "unix:/run/foo.socket", + "new_value": "me", + "expected": { + "href": "unix:/run/foo.socket", + "username": "" + } + }, + { + "comment": "Cannot-be-a-base means no username", + "href": "mailto:you@example.net", + "new_value": "me", + "expected": { + "href": "mailto:you@example.net", + "username": "" + } + }, + { + "href": "javascript:alert(1)", + "new_value": "wario", + "expected": { + "href": "javascript:alert(1)", + "username": "" + } + }, + { + "href": "http://example.net", + "new_value": "me", + "expected": { + "href": "http://me@example.net/", + "username": "me" + } + }, + { + "href": "http://:secret@example.net", + "new_value": "me", + "expected": { + "href": "http://me:secret@example.net/", + "username": "me" + } + }, + { + "href": "http://me@example.net", + "new_value": "", + "expected": { + "href": "http://example.net/", + "username": "" + } + }, + { + "href": "http://me:secret@example.net", + "new_value": "", + "expected": { + "href": "http://:secret@example.net/", + "username": "" + } + }, + { + "comment": "UTF-8 percent encoding with the userinfo encode set.", + "href": "http://example.net", + "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "expected": { + "href": "http://%00%01%09%0A%0D%1F%20!%22%23$%&'()*+,-.%2F09%3A%3B%3C%3D%3E%3F%40AZ%5B%5C%5D%5E_%60az%7B%7C%7D~%7F%C2%80%C2%81%C3%89%C3%A9@example.net/", + "username": "%00%01%09%0A%0D%1F%20!%22%23$%&'()*+,-.%2F09%3A%3B%3C%3D%3E%3F%40AZ%5B%5C%5D%5E_%60az%7B%7C%7D~%7F%C2%80%C2%81%C3%89%C3%A9" + } + }, + { + "comment": "Bytes already percent-encoded are left as-is.", + "href": "http://example.net", + "new_value": "%c3%89té", + "expected": { + "href": "http://%c3%89t%C3%A9@example.net/", + "username": "%c3%89t%C3%A9" + } + }, + { + "href": "sc:///", + "new_value": "x", + "expected": { + "href": "sc:///", + "username": "" + } + }, + { + "href": "javascript://x/", + "new_value": "wario", + "expected": { + "href": "javascript://wario@x/", + "username": "wario" + } + }, + { + "href": "file://test/", + "new_value": "test", + "expected": { + "href": "file://test/", + "username": "" + } + } + ], + "password": [ + { + "comment": "No host means no password", + "href": "file:///home/me/index.html", + "new_value": "secret", + "expected": { + "href": "file:///home/me/index.html", + "password": "" + } + }, + { + "comment": "No host means no password", + "href": "unix:/run/foo.socket", + "new_value": "secret", + "expected": { + "href": "unix:/run/foo.socket", + "password": "" + } + }, + { + "comment": "Cannot-be-a-base means no password", + "href": "mailto:me@example.net", + "new_value": "secret", + "expected": { + "href": "mailto:me@example.net", + "password": "" + } + }, + { + "href": "http://example.net", + "new_value": "secret", + "expected": { + "href": "http://:secret@example.net/", + "password": "secret" + } + }, + { + "href": "http://me@example.net", + "new_value": "secret", + "expected": { + "href": "http://me:secret@example.net/", + "password": "secret" + } + }, + { + "href": "http://:secret@example.net", + "new_value": "", + "expected": { + "href": "http://example.net/", + "password": "" + } + }, + { + "href": "http://me:secret@example.net", + "new_value": "", + "expected": { + "href": "http://me@example.net/", + "password": "" + } + }, + { + "comment": "UTF-8 percent encoding with the userinfo encode set.", + "href": "http://example.net", + "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "expected": { + "href": "http://:%00%01%09%0A%0D%1F%20!%22%23$%&'()*+,-.%2F09%3A%3B%3C%3D%3E%3F%40AZ%5B%5C%5D%5E_%60az%7B%7C%7D~%7F%C2%80%C2%81%C3%89%C3%A9@example.net/", + "password": "%00%01%09%0A%0D%1F%20!%22%23$%&'()*+,-.%2F09%3A%3B%3C%3D%3E%3F%40AZ%5B%5C%5D%5E_%60az%7B%7C%7D~%7F%C2%80%C2%81%C3%89%C3%A9" + } + }, + { + "comment": "Bytes already percent-encoded are left as-is.", + "href": "http://example.net", + "new_value": "%c3%89té", + "expected": { + "href": "http://:%c3%89t%C3%A9@example.net/", + "password": "%c3%89t%C3%A9" + } + }, + { + "href": "sc:///", + "new_value": "x", + "expected": { + "href": "sc:///", + "password": "" + } + }, + { + "href": "javascript://x/", + "new_value": "bowser", + "expected": { + "href": "javascript://:bowser@x/", + "password": "bowser" + } + }, + { + "href": "file://test/", + "new_value": "test", + "expected": { + "href": "file://test/", + "password": "" + } + } + ], + "host": [ + { + "comment": "Non-special scheme", + "href": "sc://x/", + "new_value": "\u0000", + "expected": { + "href": "sc://x/", + "host": "x", + "hostname": "x" + } + }, + { + "href": "sc://x/", + "new_value": "\u0009", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "\u000A", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "\u000D", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": " ", + "expected": { + "href": "sc://x/", + "host": "x", + "hostname": "x" + } + }, + { + "href": "sc://x/", + "new_value": "#", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "/", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "?", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "@", + "expected": { + "href": "sc://x/", + "host": "x", + "hostname": "x" + } + }, + { + "href": "sc://x/", + "new_value": "ß", + "expected": { + "href": "sc://%C3%9F/", + "host": "%C3%9F", + "hostname": "%C3%9F" + } + }, + { + "comment": "IDNA Nontransitional_Processing", + "href": "https://x/", + "new_value": "ß", + "expected": { + "href": "https://xn--zca/", + "host": "xn--zca", + "hostname": "xn--zca" + } + }, + { + "comment": "Cannot-be-a-base means no host", + "href": "mailto:me@example.net", + "new_value": "example.com", + "expected": { + "href": "mailto:me@example.net", + "host": "" + } + }, + { + "comment": "Cannot-be-a-base means no host", + "href": "data:text/plain,Stuff", + "new_value": "example.net", + "expected": { + "href": "data:text/plain,Stuff", + "host": "" + } + }, + { + "href": "http://example.net", + "new_value": "example.com:8080", + "expected": { + "href": "http://example.com:8080/", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Port number is unchanged if not specified in the new value", + "href": "http://example.net:8080", + "new_value": "example.com", + "expected": { + "href": "http://example.com:8080/", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Port number is unchanged if not specified", + "href": "http://example.net:8080", + "new_value": "example.com:", + "expected": { + "href": "http://example.com:8080/", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "The empty host is not valid for special schemes", + "href": "http://example.net", + "new_value": "", + "expected": { + "href": "http://example.net/", + "host": "example.net" + } + }, + { + "comment": "The empty host is OK for non-special schemes", + "href": "view-source+http://example.net/foo", + "new_value": "", + "expected": { + "href": "view-source+http:///foo", + "host": "" + } + }, + { + "comment": "Path-only URLs can gain a host", + "href": "a:/foo", + "new_value": "example.net", + "expected": { + "href": "a://example.net/foo", + "host": "example.net" + } + }, + { + "comment": "IPv4 address syntax is normalized", + "href": "http://example.net", + "new_value": "0x7F000001:8080", + "expected": { + "href": "http://127.0.0.1:8080/", + "host": "127.0.0.1:8080", + "hostname": "127.0.0.1", + "port": "8080" + } + }, + { + "comment": "IPv6 address syntax is normalized", + "href": "http://example.net", + "new_value": "[::0:01]:2", + "expected": { + "href": "http://[::1]:2/", + "host": "[::1]:2", + "hostname": "[::1]", + "port": "2" + } + }, + { + "comment": "IPv6 literal address with port, crbug.com/1012416", + "href": "http://example.net", + "new_value": "[2001:db8::2]:4002", + "expected": { + "href": "http://[2001:db8::2]:4002/", + "host": "[2001:db8::2]:4002", + "hostname": "[2001:db8::2]", + "port": "4002" + } + }, + { + "comment": "Default port number is removed", + "href": "http://example.net", + "new_value": "example.com:80", + "expected": { + "href": "http://example.com/", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Default port number is removed", + "href": "https://example.net", + "new_value": "example.com:443", + "expected": { + "href": "https://example.com/", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Default port number is only removed for the relevant scheme", + "href": "https://example.net", + "new_value": "example.com:80", + "expected": { + "href": "https://example.com:80/", + "host": "example.com:80", + "hostname": "example.com", + "port": "80" + } + }, + { + "comment": "Port number is removed if new port is scheme default and existing URL has a non-default port", + "href": "http://example.net:8080", + "new_value": "example.com:80", + "expected": { + "href": "http://example.com/", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Stuff after a / delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com/stuff", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Stuff after a / delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com:8080/stuff", + "expected": { + "href": "http://example.com:8080/path", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Stuff after a ? delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com?stuff", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Stuff after a ? delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com:8080?stuff", + "expected": { + "href": "http://example.com:8080/path", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Stuff after a # delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com#stuff", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Stuff after a # delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com:8080#stuff", + "expected": { + "href": "http://example.com:8080/path", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Stuff after a \\ delimiter is ignored for special schemes", + "href": "http://example.net/path", + "new_value": "example.com\\stuff", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Stuff after a \\ delimiter is ignored for special schemes", + "href": "http://example.net/path", + "new_value": "example.com:8080\\stuff", + "expected": { + "href": "http://example.com:8080/path", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "\\ is not a delimiter for non-special schemes, but still forbidden in hosts", + "href": "view-source+http://example.net/path", + "new_value": "example.com\\stuff", + "expected": { + "href": "view-source+http://example.net/path", + "host": "example.net", + "hostname": "example.net", + "port": "" + } + }, + { + "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error", + "href": "view-source+http://example.net/path", + "new_value": "example.com:8080stuff2", + "expected": { + "href": "view-source+http://example.com:8080/path", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error", + "href": "http://example.net/path", + "new_value": "example.com:8080stuff2", + "expected": { + "href": "http://example.com:8080/path", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error", + "href": "http://example.net/path", + "new_value": "example.com:8080+2", + "expected": { + "href": "http://example.com:8080/path", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Port numbers are 16 bit integers", + "href": "http://example.net/path", + "new_value": "example.com:65535", + "expected": { + "href": "http://example.com:65535/path", + "host": "example.com:65535", + "hostname": "example.com", + "port": "65535" + } + }, + { + "comment": "Port numbers are 16 bit integers, overflowing is an error. Hostname is still set, though.", + "href": "http://example.net/path", + "new_value": "example.com:65536", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Broken IPv6", + "href": "http://example.net/", + "new_value": "[google.com]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.2.3.4x]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.2.3.]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.2.]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "file://y/", + "new_value": "x:123", + "expected": { + "href": "file://y/", + "host": "y", + "hostname": "y", + "port": "" + } + }, + { + "href": "file://y/", + "new_value": "loc%41lhost", + "expected": { + "href": "file:///", + "host": "", + "hostname": "", + "port": "" + } + }, + { + "href": "file://hi/x", + "new_value": "", + "expected": { + "href": "file:///x", + "host": "", + "hostname": "", + "port": "" + } + }, + { + "href": "sc://test@test/", + "new_value": "", + "expected": { + "href": "sc://test@test/", + "host": "test", + "hostname": "test", + "username": "test" + } + }, + { + "href": "sc://test:12/", + "new_value": "", + "expected": { + "href": "sc://test:12/", + "host": "test:12", + "hostname": "test", + "port": "12" + } + } + ], + "hostname": [ + { + "comment": "Non-special scheme", + "href": "sc://x/", + "new_value": "\u0000", + "expected": { + "href": "sc://x/", + "host": "x", + "hostname": "x" + } + }, + { + "href": "sc://x/", + "new_value": "\u0009", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "\u000A", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "\u000D", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": " ", + "expected": { + "href": "sc://x/", + "host": "x", + "hostname": "x" + } + }, + { + "href": "sc://x/", + "new_value": "#", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "/", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "?", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "@", + "expected": { + "href": "sc://x/", + "host": "x", + "hostname": "x" + } + }, + { + "comment": "Cannot-be-a-base means no host", + "href": "mailto:me@example.net", + "new_value": "example.com", + "expected": { + "href": "mailto:me@example.net", + "host": "" + } + }, + { + "comment": "Cannot-be-a-base means no host", + "href": "data:text/plain,Stuff", + "new_value": "example.net", + "expected": { + "href": "data:text/plain,Stuff", + "host": "" + } + }, + { + "href": "http://example.net:8080", + "new_value": "example.com", + "expected": { + "href": "http://example.com:8080/", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "The empty host is not valid for special schemes", + "href": "http://example.net", + "new_value": "", + "expected": { + "href": "http://example.net/", + "host": "example.net" + } + }, + { + "comment": "The empty host is OK for non-special schemes", + "href": "view-source+http://example.net/foo", + "new_value": "", + "expected": { + "href": "view-source+http:///foo", + "host": "" + } + }, + { + "comment": "Path-only URLs can gain a host", + "href": "a:/foo", + "new_value": "example.net", + "expected": { + "href": "a://example.net/foo", + "host": "example.net" + } + }, + { + "comment": "IPv4 address syntax is normalized", + "href": "http://example.net:8080", + "new_value": "0x7F000001", + "expected": { + "href": "http://127.0.0.1:8080/", + "host": "127.0.0.1:8080", + "hostname": "127.0.0.1", + "port": "8080" + } + }, + { + "comment": "IPv6 address syntax is normalized", + "href": "http://example.net", + "new_value": "[::0:01]", + "expected": { + "href": "http://[::1]/", + "host": "[::1]", + "hostname": "[::1]", + "port": "" + } + }, + { + "comment": "Stuff after a : delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com:8080", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Stuff after a : delimiter is ignored", + "href": "http://example.net:8080/path", + "new_value": "example.com:", + "expected": { + "href": "http://example.com:8080/path", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Stuff after a / delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com/stuff", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Stuff after a ? delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com?stuff", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Stuff after a # delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com#stuff", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Stuff after a \\ delimiter is ignored for special schemes", + "href": "http://example.net/path", + "new_value": "example.com\\stuff", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "\\ is not a delimiter for non-special schemes, but still forbidden in hosts", + "href": "view-source+http://example.net/path", + "new_value": "example.com\\stuff", + "expected": { + "href": "view-source+http://example.net/path", + "host": "example.net", + "hostname": "example.net", + "port": "" + } + }, + { + "comment": "Broken IPv6", + "href": "http://example.net/", + "new_value": "[google.com]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.2.3.4x]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.2.3.]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.2.]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "file://y/", + "new_value": "x:123", + "expected": { + "href": "file://y/", + "host": "y", + "hostname": "y", + "port": "" + } + }, + { + "href": "file://y/", + "new_value": "loc%41lhost", + "expected": { + "href": "file:///", + "host": "", + "hostname": "", + "port": "" + } + }, + { + "href": "file://hi/x", + "new_value": "", + "expected": { + "href": "file:///x", + "host": "", + "hostname": "", + "port": "" + } + }, + { + "href": "sc://test@test/", + "new_value": "", + "expected": { + "href": "sc://test@test/", + "host": "test", + "hostname": "test", + "username": "test" + } + }, + { + "href": "sc://test:12/", + "new_value": "", + "expected": { + "href": "sc://test:12/", + "host": "test:12", + "hostname": "test", + "port": "12" + } + }, + { + "comment": "Drop /. from path", + "href": "non-spec:/.//p", + "new_value": "h", + "expected": { + "href": "non-spec://h//p", + "host": "h", + "hostname": "h", + "pathname": "//p" + } + }, + { + "href": "non-spec:/.//p", + "new_value": "", + "expected": { + "href": "non-spec:////p", + "host": "", + "hostname": "", + "pathname": "//p" + } + } + ], + "port": [ + { + "href": "http://example.net", + "new_value": "8080", + "expected": { + "href": "http://example.net:8080/", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Port number is removed if empty is the new value", + "href": "http://example.net:8080", + "new_value": "", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net", + "port": "" + } + }, + { + "comment": "Default port number is removed", + "href": "http://example.net:8080", + "new_value": "80", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net", + "port": "" + } + }, + { + "comment": "Default port number is removed", + "href": "https://example.net:4433", + "new_value": "443", + "expected": { + "href": "https://example.net/", + "host": "example.net", + "hostname": "example.net", + "port": "" + } + }, + { + "comment": "Default port number is only removed for the relevant scheme", + "href": "https://example.net", + "new_value": "80", + "expected": { + "href": "https://example.net:80/", + "host": "example.net:80", + "hostname": "example.net", + "port": "80" + } + }, + { + "comment": "Stuff after a / delimiter is ignored", + "href": "http://example.net/path", + "new_value": "8080/stuff", + "expected": { + "href": "http://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Stuff after a ? delimiter is ignored", + "href": "http://example.net/path", + "new_value": "8080?stuff", + "expected": { + "href": "http://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Stuff after a # delimiter is ignored", + "href": "http://example.net/path", + "new_value": "8080#stuff", + "expected": { + "href": "http://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Stuff after a \\ delimiter is ignored for special schemes", + "href": "http://example.net/path", + "new_value": "8080\\stuff", + "expected": { + "href": "http://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error", + "href": "view-source+http://example.net/path", + "new_value": "8080stuff2", + "expected": { + "href": "view-source+http://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error", + "href": "http://example.net/path", + "new_value": "8080stuff2", + "expected": { + "href": "http://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error", + "href": "http://example.net/path", + "new_value": "8080+2", + "expected": { + "href": "http://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Port numbers are 16 bit integers", + "href": "http://example.net/path", + "new_value": "65535", + "expected": { + "href": "http://example.net:65535/path", + "host": "example.net:65535", + "hostname": "example.net", + "port": "65535" + } + }, + { + "comment": "Port numbers are 16 bit integers, overflowing is an error", + "href": "http://example.net:8080/path", + "new_value": "65536", + "expected": { + "href": "http://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Port numbers are 16 bit integers, overflowing is an error", + "href": "non-special://example.net:8080/path", + "new_value": "65536", + "expected": { + "href": "non-special://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "href": "file://test/", + "new_value": "12", + "expected": { + "href": "file://test/", + "port": "" + } + }, + { + "href": "file://localhost/", + "new_value": "12", + "expected": { + "href": "file:///", + "port": "" + } + }, + { + "href": "non-base:value", + "new_value": "12", + "expected": { + "href": "non-base:value", + "port": "" + } + }, + { + "href": "sc:///", + "new_value": "12", + "expected": { + "href": "sc:///", + "port": "" + } + }, + { + "href": "sc://x/", + "new_value": "12", + "expected": { + "href": "sc://x:12/", + "port": "12" + } + }, + { + "href": "javascript://x/", + "new_value": "12", + "expected": { + "href": "javascript://x:12/", + "port": "12" + } + } + ], + "pathname": [ + { + "comment": "Cannot-be-a-base don’t have a path", + "href": "mailto:me@example.net", + "new_value": "/foo", + "expected": { + "href": "mailto:me@example.net", + "pathname": "me@example.net" + } + }, + { + "href": "unix:/run/foo.socket?timeout=10", + "new_value": "/var/log/../run/bar.socket", + "expected": { + "href": "unix:/var/run/bar.socket?timeout=10", + "pathname": "/var/run/bar.socket" + } + }, + { + "href": "https://example.net#nav", + "new_value": "home", + "expected": { + "href": "https://example.net/home#nav", + "pathname": "/home" + } + }, + { + "href": "https://example.net#nav", + "new_value": "../home", + "expected": { + "href": "https://example.net/home#nav", + "pathname": "/home" + } + }, + { + "comment": "\\ is a segment delimiter for 'special' URLs", + "href": "http://example.net/home?lang=fr#nav", + "new_value": "\\a\\%2E\\b\\%2e.\\c", + "expected": { + "href": "http://example.net/a/c?lang=fr#nav", + "pathname": "/a/c" + } + }, + { + "comment": "\\ is *not* a segment delimiter for non-'special' URLs", + "href": "view-source+http://example.net/home?lang=fr#nav", + "new_value": "\\a\\%2E\\b\\%2e.\\c", + "expected": { + "href": "view-source+http://example.net/\\a\\%2E\\b\\%2e.\\c?lang=fr#nav", + "pathname": "/\\a\\%2E\\b\\%2e.\\c" + } + }, + { + "comment": "UTF-8 percent encoding with the default encode set. Tabs and newlines are removed.", + "href": "a:/", + "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "expected": { + "href": "a:/%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E%3F@AZ[\\]^_%60az%7B|%7D~%7F%C2%80%C2%81%C3%89%C3%A9", + "pathname": "/%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E%3F@AZ[\\]^_%60az%7B|%7D~%7F%C2%80%C2%81%C3%89%C3%A9" + } + }, + { + "comment": "Bytes already percent-encoded are left as-is, including %2E outside dotted segments.", + "href": "http://example.net", + "new_value": "%2e%2E%c3%89té", + "expected": { + "href": "http://example.net/%2e%2E%c3%89t%C3%A9", + "pathname": "/%2e%2E%c3%89t%C3%A9" + } + }, + { + "comment": "? needs to be encoded", + "href": "http://example.net", + "new_value": "?", + "expected": { + "href": "http://example.net/%3F", + "pathname": "/%3F" + } + }, + { + "comment": "# needs to be encoded", + "href": "http://example.net", + "new_value": "#", + "expected": { + "href": "http://example.net/%23", + "pathname": "/%23" + } + }, + { + "comment": "? needs to be encoded, non-special scheme", + "href": "sc://example.net", + "new_value": "?", + "expected": { + "href": "sc://example.net/%3F", + "pathname": "/%3F" + } + }, + { + "comment": "# needs to be encoded, non-special scheme", + "href": "sc://example.net", + "new_value": "#", + "expected": { + "href": "sc://example.net/%23", + "pathname": "/%23" + } + }, + { + "comment": "File URLs and (back)slashes", + "href": "file://monkey/", + "new_value": "\\\\", + "expected": { + "href": "file://monkey//", + "pathname": "//" + } + }, + { + "comment": "File URLs and (back)slashes", + "href": "file:///unicorn", + "new_value": "//\\/", + "expected": { + "href": "file://////", + "pathname": "////" + } + }, + { + "comment": "File URLs and (back)slashes", + "href": "file:///unicorn", + "new_value": "//monkey/..//", + "expected": { + "href": "file://///", + "pathname": "///" + } + }, + { + "comment": "Serialize /. in path", + "href": "non-spec:/", + "new_value": "/.//p", + "expected": { + "href": "non-spec:/.//p", + "pathname": "//p" + } + }, + { + "href": "non-spec:/", + "new_value": "/..//p", + "expected": { + "href": "non-spec:/.//p", + "pathname": "//p" + } + }, + { + "href": "non-spec:/", + "new_value": "//p", + "expected": { + "href": "non-spec:/.//p", + "pathname": "//p" + } + }, + { + "comment": "Drop /. from path", + "href": "non-spec:/.//", + "new_value": "p", + "expected": { + "href": "non-spec:/p", + "pathname": "/p" + } + } + ], + "search": [ + { + "href": "https://example.net#nav", + "new_value": "lang=fr", + "expected": { + "href": "https://example.net/?lang=fr#nav", + "search": "?lang=fr" + } + }, + { + "href": "https://example.net?lang=en-US#nav", + "new_value": "lang=fr", + "expected": { + "href": "https://example.net/?lang=fr#nav", + "search": "?lang=fr" + } + }, + { + "href": "https://example.net?lang=en-US#nav", + "new_value": "?lang=fr", + "expected": { + "href": "https://example.net/?lang=fr#nav", + "search": "?lang=fr" + } + }, + { + "href": "https://example.net?lang=en-US#nav", + "new_value": "??lang=fr", + "expected": { + "href": "https://example.net/??lang=fr#nav", + "search": "??lang=fr" + } + }, + { + "href": "https://example.net?lang=en-US#nav", + "new_value": "?", + "expected": { + "href": "https://example.net/?#nav", + "search": "" + } + }, + { + "href": "https://example.net?lang=en-US#nav", + "new_value": "", + "expected": { + "href": "https://example.net/#nav", + "search": "" + } + }, + { + "href": "https://example.net?lang=en-US", + "new_value": "", + "expected": { + "href": "https://example.net/", + "search": "" + } + }, + { + "href": "https://example.net", + "new_value": "", + "expected": { + "href": "https://example.net/", + "search": "" + } + }, + { + "comment": "UTF-8 percent encoding with the query encode set. Tabs and newlines are removed.", + "href": "a:/", + "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "expected": { + "href": "a:/?%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9", + "search": "?%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9" + } + }, + { + "comment": "Bytes already percent-encoded are left as-is", + "href": "http://example.net", + "new_value": "%c3%89té", + "expected": { + "href": "http://example.net/?%c3%89t%C3%A9", + "search": "?%c3%89t%C3%A9" + } + } + ], + "hash": [ + { + "href": "https://example.net", + "new_value": "main", + "expected": { + "href": "https://example.net/#main", + "hash": "#main" + } + }, + { + "href": "https://example.net#nav", + "new_value": "main", + "expected": { + "href": "https://example.net/#main", + "hash": "#main" + } + }, + { + "href": "https://example.net?lang=en-US", + "new_value": "##nav", + "expected": { + "href": "https://example.net/?lang=en-US##nav", + "hash": "##nav" + } + }, + { + "href": "https://example.net?lang=en-US#nav", + "new_value": "#main", + "expected": { + "href": "https://example.net/?lang=en-US#main", + "hash": "#main" + } + }, + { + "href": "https://example.net?lang=en-US#nav", + "new_value": "#", + "expected": { + "href": "https://example.net/?lang=en-US#", + "hash": "" + } + }, + { + "href": "https://example.net?lang=en-US#nav", + "new_value": "", + "expected": { + "href": "https://example.net/?lang=en-US", + "hash": "" + } + }, + { + "href": "http://example.net", + "new_value": "#foo bar", + "expected": { + "href": "http://example.net/#foo%20bar", + "hash": "#foo%20bar" + } + }, + { + "href": "http://example.net", + "new_value": "#foo\"bar", + "expected": { + "href": "http://example.net/#foo%22bar", + "hash": "#foo%22bar" + } + }, + { + "href": "http://example.net", + "new_value": "#foobar", + "expected": { + "href": "http://example.net/#foo%3Ebar", + "hash": "#foo%3Ebar" + } + }, + { + "href": "http://example.net", + "new_value": "#foo`bar", + "expected": { + "href": "http://example.net/#foo%60bar", + "hash": "#foo%60bar" + } + }, + { + "comment": "Simple percent-encoding; tabs and newlines are removed", + "href": "a:/", + "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "expected": { + "href": "a:/#%00%01%1F%20!%22#$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_%60az{|}~%7F%C2%80%C2%81%C3%89%C3%A9", + "hash": "#%00%01%1F%20!%22#$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_%60az{|}~%7F%C2%80%C2%81%C3%89%C3%A9" + } + }, + { + "comment": "Percent-encode NULLs in fragment", + "href": "http://example.net", + "new_value": "a\u0000b", + "expected": { + "href": "http://example.net/#a%00b", + "hash": "#a%00b" + } + }, + { + "comment": "Percent-encode NULLs in fragment", + "href": "non-spec:/", + "new_value": "a\u0000b", + "expected": { + "href": "non-spec:/#a%00b", + "hash": "#a%00b" + } + }, + { + "comment": "Bytes already percent-encoded are left as-is", + "href": "http://example.net", + "new_value": "%c3%89té", + "expected": { + "href": "http://example.net/#%c3%89t%C3%A9", + "hash": "#%c3%89t%C3%A9" + } + }, + { + "href": "javascript:alert(1)", + "new_value": "castle", + "expected": { + "href": "javascript:alert(1)#castle", + "hash": "#castle" + } + } + ] +} diff --git a/Tests/WebURLTests/Resources/urltestdata.json b/Tests/WebURLTests/Resources/urltestdata.json new file mode 100644 index 000000000..1b7367afe --- /dev/null +++ b/Tests/WebURLTests/Resources/urltestdata.json @@ -0,0 +1,7777 @@ +[ + "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/script-tests/segments.js", + { + "input": "http://example\t.\norg", + "base": "http://example.org/foo/bar", + "href": "http://example.org/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://user:pass@foo:21/bar;par?b#c", + "base": "http://example.org/foo/bar", + "href": "http://user:pass@foo:21/bar;par?b#c", + "origin": "http://foo:21", + "protocol": "http:", + "username": "user", + "password": "pass", + "host": "foo:21", + "hostname": "foo", + "port": "21", + "pathname": "/bar;par", + "search": "?b", + "hash": "#c" + }, + { + "input": "https://test:@test", + "base": "about:blank", + "href": "https://test@test/", + "origin": "https://test", + "protocol": "https:", + "username": "test", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https://:@test", + "base": "about:blank", + "href": "https://test/", + "origin": "https://test", + "protocol": "https:", + "username": "", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "non-special://test:@test/x", + "base": "about:blank", + "href": "non-special://test@test/x", + "origin": "null", + "protocol": "non-special:", + "username": "test", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/x", + "search": "", + "hash": "" + }, + { + "input": "non-special://:@test/x", + "base": "about:blank", + "href": "non-special://test/x", + "origin": "null", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/x", + "search": "", + "hash": "" + }, + { + "input": "http:foo.com", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/foo.com", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/foo.com", + "search": "", + "hash": "" + }, + { + "input": "\t :foo.com \n", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:foo.com", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:foo.com", + "search": "", + "hash": "" + }, + { + "input": " foo.com ", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/foo.com", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/foo.com", + "search": "", + "hash": "" + }, + { + "input": "a:\t foo.com", + "base": "http://example.org/foo/bar", + "href": "a: foo.com", + "origin": "null", + "protocol": "a:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": " foo.com", + "search": "", + "hash": "" + }, + { + "input": "http://f:21/ b ? d # e ", + "base": "http://example.org/foo/bar", + "href": "http://f:21/%20b%20?%20d%20#%20e", + "origin": "http://f:21", + "protocol": "http:", + "username": "", + "password": "", + "host": "f:21", + "hostname": "f", + "port": "21", + "pathname": "/%20b%20", + "search": "?%20d%20", + "hash": "#%20e" + }, + { + "input": "lolscheme:x x#x x", + "base": "about:blank", + "href": "lolscheme:x x#x%20x", + "protocol": "lolscheme:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "x x", + "search": "", + "hash": "#x%20x" + }, + { + "input": "http://f:/c", + "base": "http://example.org/foo/bar", + "href": "http://f/c", + "origin": "http://f", + "protocol": "http:", + "username": "", + "password": "", + "host": "f", + "hostname": "f", + "port": "", + "pathname": "/c", + "search": "", + "hash": "" + }, + { + "input": "http://f:0/c", + "base": "http://example.org/foo/bar", + "href": "http://f:0/c", + "origin": "http://f:0", + "protocol": "http:", + "username": "", + "password": "", + "host": "f:0", + "hostname": "f", + "port": "0", + "pathname": "/c", + "search": "", + "hash": "" + }, + { + "input": "http://f:00000000000000/c", + "base": "http://example.org/foo/bar", + "href": "http://f:0/c", + "origin": "http://f:0", + "protocol": "http:", + "username": "", + "password": "", + "host": "f:0", + "hostname": "f", + "port": "0", + "pathname": "/c", + "search": "", + "hash": "" + }, + { + "input": "http://f:00000000000000000000080/c", + "base": "http://example.org/foo/bar", + "href": "http://f/c", + "origin": "http://f", + "protocol": "http:", + "username": "", + "password": "", + "host": "f", + "hostname": "f", + "port": "", + "pathname": "/c", + "search": "", + "hash": "" + }, + { + "input": "http://f:b/c", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "http://f: /c", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "http://f:\n/c", + "base": "http://example.org/foo/bar", + "href": "http://f/c", + "origin": "http://f", + "protocol": "http:", + "username": "", + "password": "", + "host": "f", + "hostname": "f", + "port": "", + "pathname": "/c", + "search": "", + "hash": "" + }, + { + "input": "http://f:fifty-two/c", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "http://f:999999/c", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "non-special://f:999999/c", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "http://f: 21 / b ? d # e ", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "" + }, + { + "input": " \t", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "" + }, + { + "input": ":foo.com/", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:foo.com/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:foo.com/", + "search": "", + "hash": "" + }, + { + "input": ":foo.com\\", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:foo.com/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:foo.com/", + "search": "", + "hash": "" + }, + { + "input": ":", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:", + "search": "", + "hash": "" + }, + { + "input": ":a", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:a", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:a", + "search": "", + "hash": "" + }, + { + "input": ":/", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:/", + "search": "", + "hash": "" + }, + { + "input": ":\\", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:/", + "search": "", + "hash": "" + }, + { + "input": ":#", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:#", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:", + "search": "", + "hash": "" + }, + { + "input": "#", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar#", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "#/", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar#/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "#/" + }, + { + "input": "#\\", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar#\\", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "#\\" + }, + { + "input": "#;?", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar#;?", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "#;?" + }, + { + "input": "?", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar?", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "/", + "base": "http://example.org/foo/bar", + "href": "http://example.org/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": ":23", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:23", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:23", + "search": "", + "hash": "" + }, + { + "input": "/:23", + "base": "http://example.org/foo/bar", + "href": "http://example.org/:23", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/:23", + "search": "", + "hash": "" + }, + { + "input": "::", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/::", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/::", + "search": "", + "hash": "" + }, + { + "input": "::23", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/::23", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/::23", + "search": "", + "hash": "" + }, + { + "input": "foo://", + "base": "http://example.org/foo/bar", + "href": "foo://", + "origin": "null", + "protocol": "foo:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, + { + "input": "http://a:b@c:29/d", + "base": "http://example.org/foo/bar", + "href": "http://a:b@c:29/d", + "origin": "http://c:29", + "protocol": "http:", + "username": "a", + "password": "b", + "host": "c:29", + "hostname": "c", + "port": "29", + "pathname": "/d", + "search": "", + "hash": "" + }, + { + "input": "http::@c:29", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:@c:29", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:@c:29", + "search": "", + "hash": "" + }, + { + "input": "http://&a:foo(b]c@d:2/", + "base": "http://example.org/foo/bar", + "href": "http://&a:foo(b%5Dc@d:2/", + "origin": "http://d:2", + "protocol": "http:", + "username": "&a", + "password": "foo(b%5Dc", + "host": "d:2", + "hostname": "d", + "port": "2", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://::@c@d:2", + "base": "http://example.org/foo/bar", + "href": "http://:%3A%40c@d:2/", + "origin": "http://d:2", + "protocol": "http:", + "username": "", + "password": "%3A%40c", + "host": "d:2", + "hostname": "d", + "port": "2", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://foo.com:b@d/", + "base": "http://example.org/foo/bar", + "href": "http://foo.com:b@d/", + "origin": "http://d", + "protocol": "http:", + "username": "foo.com", + "password": "b", + "host": "d", + "hostname": "d", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://foo.com/\\@", + "base": "http://example.org/foo/bar", + "href": "http://foo.com//@", + "origin": "http://foo.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo.com", + "hostname": "foo.com", + "port": "", + "pathname": "//@", + "search": "", + "hash": "" + }, + { + "input": "http:\\\\foo.com\\", + "base": "http://example.org/foo/bar", + "href": "http://foo.com/", + "origin": "http://foo.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo.com", + "hostname": "foo.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:\\\\a\\b:c\\d@foo.com\\", + "base": "http://example.org/foo/bar", + "href": "http://a/b:c/d@foo.com/", + "origin": "http://a", + "protocol": "http:", + "username": "", + "password": "", + "host": "a", + "hostname": "a", + "port": "", + "pathname": "/b:c/d@foo.com/", + "search": "", + "hash": "" + }, + { + "input": "foo:/", + "base": "http://example.org/foo/bar", + "href": "foo:/", + "origin": "null", + "protocol": "foo:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "foo:/bar.com/", + "base": "http://example.org/foo/bar", + "href": "foo:/bar.com/", + "origin": "null", + "protocol": "foo:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/bar.com/", + "search": "", + "hash": "" + }, + { + "input": "foo://///////", + "base": "http://example.org/foo/bar", + "href": "foo://///////", + "origin": "null", + "protocol": "foo:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "///////", + "search": "", + "hash": "" + }, + { + "input": "foo://///////bar.com/", + "base": "http://example.org/foo/bar", + "href": "foo://///////bar.com/", + "origin": "null", + "protocol": "foo:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "///////bar.com/", + "search": "", + "hash": "" + }, + { + "input": "foo:////://///", + "base": "http://example.org/foo/bar", + "href": "foo:////://///", + "origin": "null", + "protocol": "foo:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//://///", + "search": "", + "hash": "" + }, + { + "input": "c:/foo", + "base": "http://example.org/foo/bar", + "href": "c:/foo", + "origin": "null", + "protocol": "c:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/foo", + "search": "", + "hash": "" + }, + { + "input": "//foo/bar", + "base": "http://example.org/foo/bar", + "href": "http://foo/bar", + "origin": "http://foo", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/bar", + "search": "", + "hash": "" + }, + { + "input": "http://foo/path;a??e#f#g", + "base": "http://example.org/foo/bar", + "href": "http://foo/path;a??e#f#g", + "origin": "http://foo", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/path;a", + "search": "??e", + "hash": "#f#g" + }, + { + "input": "http://foo/abcd?efgh?ijkl", + "base": "http://example.org/foo/bar", + "href": "http://foo/abcd?efgh?ijkl", + "origin": "http://foo", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/abcd", + "search": "?efgh?ijkl", + "hash": "" + }, + { + "input": "http://foo/abcd#foo?bar", + "base": "http://example.org/foo/bar", + "href": "http://foo/abcd#foo?bar", + "origin": "http://foo", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/abcd", + "search": "", + "hash": "#foo?bar" + }, + { + "input": "[61:24:74]:98", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/[61:24:74]:98", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/[61:24:74]:98", + "search": "", + "hash": "" + }, + { + "input": "http:[61:27]/:foo", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/[61:27]/:foo", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/[61:27]/:foo", + "search": "", + "hash": "" + }, + { + "input": "http://[1::2]:3:4", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "http://2001::1", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "http://2001::1]", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "http://2001::1]:80", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "http://[2001::1]", + "base": "http://example.org/foo/bar", + "href": "http://[2001::1]/", + "origin": "http://[2001::1]", + "protocol": "http:", + "username": "", + "password": "", + "host": "[2001::1]", + "hostname": "[2001::1]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://[::127.0.0.1]", + "base": "http://example.org/foo/bar", + "href": "http://[::7f00:1]/", + "origin": "http://[::7f00:1]", + "protocol": "http:", + "username": "", + "password": "", + "host": "[::7f00:1]", + "hostname": "[::7f00:1]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://[0:0:0:0:0:0:13.1.68.3]", + "base": "http://example.org/foo/bar", + "href": "http://[::d01:4403]/", + "origin": "http://[::d01:4403]", + "protocol": "http:", + "username": "", + "password": "", + "host": "[::d01:4403]", + "hostname": "[::d01:4403]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://[2001::1]:80", + "base": "http://example.org/foo/bar", + "href": "http://[2001::1]/", + "origin": "http://[2001::1]", + "protocol": "http:", + "username": "", + "password": "", + "host": "[2001::1]", + "hostname": "[2001::1]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:/example.com/", + "base": "http://example.org/foo/bar", + "href": "http://example.org/example.com/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "ftp:/example.com/", + "base": "http://example.org/foo/bar", + "href": "ftp://example.com/", + "origin": "ftp://example.com", + "protocol": "ftp:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https:/example.com/", + "base": "http://example.org/foo/bar", + "href": "https://example.com/", + "origin": "https://example.com", + "protocol": "https:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "madeupscheme:/example.com/", + "base": "http://example.org/foo/bar", + "href": "madeupscheme:/example.com/", + "origin": "null", + "protocol": "madeupscheme:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "file:/example.com/", + "base": "http://example.org/foo/bar", + "href": "file:///example.com/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "file://example:1/", + "base": "about:blank", + "failure": true + }, + { + "input": "file://example:test/", + "base": "about:blank", + "failure": true + }, + { + "input": "file://example%/", + "base": "about:blank", + "failure": true + }, + { + "input": "file://[example]/", + "base": "about:blank", + "failure": true + }, + { + "input": "ftps:/example.com/", + "base": "http://example.org/foo/bar", + "href": "ftps:/example.com/", + "origin": "null", + "protocol": "ftps:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "gopher:/example.com/", + "base": "http://example.org/foo/bar", + "href": "gopher:/example.com/", + "origin": "null", + "protocol": "gopher:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "ws:/example.com/", + "base": "http://example.org/foo/bar", + "href": "ws://example.com/", + "origin": "ws://example.com", + "protocol": "ws:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "wss:/example.com/", + "base": "http://example.org/foo/bar", + "href": "wss://example.com/", + "origin": "wss://example.com", + "protocol": "wss:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "data:/example.com/", + "base": "http://example.org/foo/bar", + "href": "data:/example.com/", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "javascript:/example.com/", + "base": "http://example.org/foo/bar", + "href": "javascript:/example.com/", + "origin": "null", + "protocol": "javascript:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "mailto:/example.com/", + "base": "http://example.org/foo/bar", + "href": "mailto:/example.com/", + "origin": "null", + "protocol": "mailto:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "http:example.com/", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/example.com/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/example.com/", + "search": "", + "hash": "" + }, + { + "input": "ftp:example.com/", + "base": "http://example.org/foo/bar", + "href": "ftp://example.com/", + "origin": "ftp://example.com", + "protocol": "ftp:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https:example.com/", + "base": "http://example.org/foo/bar", + "href": "https://example.com/", + "origin": "https://example.com", + "protocol": "https:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "madeupscheme:example.com/", + "base": "http://example.org/foo/bar", + "href": "madeupscheme:example.com/", + "origin": "null", + "protocol": "madeupscheme:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "ftps:example.com/", + "base": "http://example.org/foo/bar", + "href": "ftps:example.com/", + "origin": "null", + "protocol": "ftps:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "gopher:example.com/", + "base": "http://example.org/foo/bar", + "href": "gopher:example.com/", + "origin": "null", + "protocol": "gopher:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "ws:example.com/", + "base": "http://example.org/foo/bar", + "href": "ws://example.com/", + "origin": "ws://example.com", + "protocol": "ws:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "wss:example.com/", + "base": "http://example.org/foo/bar", + "href": "wss://example.com/", + "origin": "wss://example.com", + "protocol": "wss:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "data:example.com/", + "base": "http://example.org/foo/bar", + "href": "data:example.com/", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "javascript:example.com/", + "base": "http://example.org/foo/bar", + "href": "javascript:example.com/", + "origin": "null", + "protocol": "javascript:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "mailto:example.com/", + "base": "http://example.org/foo/bar", + "href": "mailto:example.com/", + "origin": "null", + "protocol": "mailto:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "/a/b/c", + "base": "http://example.org/foo/bar", + "href": "http://example.org/a/b/c", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/a/b/c", + "search": "", + "hash": "" + }, + { + "input": "/a/ /c", + "base": "http://example.org/foo/bar", + "href": "http://example.org/a/%20/c", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/a/%20/c", + "search": "", + "hash": "" + }, + { + "input": "/a%2fc", + "base": "http://example.org/foo/bar", + "href": "http://example.org/a%2fc", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/a%2fc", + "search": "", + "hash": "" + }, + { + "input": "/a/%2f/c", + "base": "http://example.org/foo/bar", + "href": "http://example.org/a/%2f/c", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/a/%2f/c", + "search": "", + "hash": "" + }, + { + "input": "#β", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar#%CE%B2", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "#%CE%B2" + }, + { + "input": "data:text/html,test#test", + "base": "http://example.org/foo/bar", + "href": "data:text/html,test#test", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "text/html,test", + "search": "", + "hash": "#test" + }, + { + "input": "tel:1234567890", + "base": "http://example.org/foo/bar", + "href": "tel:1234567890", + "origin": "null", + "protocol": "tel:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "1234567890", + "search": "", + "hash": "" + }, + "# Based on https://felixfbecker.github.io/whatwg-url-custom-host-repro/", + { + "input": "ssh://example.com/foo/bar.git", + "base": "http://example.org/", + "href": "ssh://example.com/foo/bar.git", + "origin": "null", + "protocol": "ssh:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/bar.git", + "search": "", + "hash": "" + }, + "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/file.html", + { + "input": "file:c:\\foo\\bar.html", + "base": "file:///tmp/mock/path", + "href": "file:///c:/foo/bar.html", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/c:/foo/bar.html", + "search": "", + "hash": "" + }, + { + "input": " File:c|////foo\\bar.html", + "base": "file:///tmp/mock/path", + "href": "file:///c:////foo/bar.html", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/c:////foo/bar.html", + "search": "", + "hash": "" + }, + { + "input": "C|/foo/bar", + "base": "file:///tmp/mock/path", + "href": "file:///C:/foo/bar", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "/C|\\foo\\bar", + "base": "file:///tmp/mock/path", + "href": "file:///C:/foo/bar", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "//C|/foo/bar", + "base": "file:///tmp/mock/path", + "href": "file:///C:/foo/bar", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "//server/file", + "base": "file:///tmp/mock/path", + "href": "file://server/file", + "protocol": "file:", + "username": "", + "password": "", + "host": "server", + "hostname": "server", + "port": "", + "pathname": "/file", + "search": "", + "hash": "" + }, + { + "input": "\\\\server\\file", + "base": "file:///tmp/mock/path", + "href": "file://server/file", + "protocol": "file:", + "username": "", + "password": "", + "host": "server", + "hostname": "server", + "port": "", + "pathname": "/file", + "search": "", + "hash": "" + }, + { + "input": "/\\server/file", + "base": "file:///tmp/mock/path", + "href": "file://server/file", + "protocol": "file:", + "username": "", + "password": "", + "host": "server", + "hostname": "server", + "port": "", + "pathname": "/file", + "search": "", + "hash": "" + }, + { + "input": "file:///foo/bar.txt", + "base": "file:///tmp/mock/path", + "href": "file:///foo/bar.txt", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/foo/bar.txt", + "search": "", + "hash": "" + }, + { + "input": "file:///home/me", + "base": "file:///tmp/mock/path", + "href": "file:///home/me", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/home/me", + "search": "", + "hash": "" + }, + { + "input": "//", + "base": "file:///tmp/mock/path", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "///", + "base": "file:///tmp/mock/path", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "///test", + "base": "file:///tmp/mock/path", + "href": "file:///test", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "", + "hash": "" + }, + { + "input": "file://test", + "base": "file:///tmp/mock/path", + "href": "file://test/", + "protocol": "file:", + "username": "", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file://localhost", + "base": "file:///tmp/mock/path", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file://localhost/", + "base": "file:///tmp/mock/path", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file://localhost/test", + "base": "file:///tmp/mock/path", + "href": "file:///test", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "", + "hash": "" + }, + { + "input": "test", + "base": "file:///tmp/mock/path", + "href": "file:///tmp/mock/test", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/tmp/mock/test", + "search": "", + "hash": "" + }, + { + "input": "file:test", + "base": "file:///tmp/mock/path", + "href": "file:///tmp/mock/test", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/tmp/mock/test", + "search": "", + "hash": "" + }, + "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/script-tests/path.js", + { + "input": "http://example.com/././foo", + "base": "about:blank", + "href": "http://example.com/foo", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/./.foo", + "base": "about:blank", + "href": "http://example.com/.foo", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/.foo", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/.", + "base": "about:blank", + "href": "http://example.com/foo/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/./", + "base": "about:blank", + "href": "http://example.com/foo/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/bar/..", + "base": "about:blank", + "href": "http://example.com/foo/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/bar/../", + "base": "about:blank", + "href": "http://example.com/foo/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/..bar", + "base": "about:blank", + "href": "http://example.com/foo/..bar", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/..bar", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/bar/../ton", + "base": "about:blank", + "href": "http://example.com/foo/ton", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/ton", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/bar/../ton/../../a", + "base": "about:blank", + "href": "http://example.com/a", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/a", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/../../..", + "base": "about:blank", + "href": "http://example.com/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/../../../ton", + "base": "about:blank", + "href": "http://example.com/ton", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/ton", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/%2e", + "base": "about:blank", + "href": "http://example.com/foo/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/%2e%2", + "base": "about:blank", + "href": "http://example.com/foo/%2e%2", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/%2e%2", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/%2e./%2e%2e/.%2e/%2e.bar", + "base": "about:blank", + "href": "http://example.com/%2e.bar", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/%2e.bar", + "search": "", + "hash": "" + }, + { + "input": "http://example.com////../..", + "base": "about:blank", + "href": "http://example.com//", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/bar//../..", + "base": "about:blank", + "href": "http://example.com/foo/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/bar//..", + "base": "about:blank", + "href": "http://example.com/foo/bar/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/bar/", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo", + "base": "about:blank", + "href": "http://example.com/foo", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/%20foo", + "base": "about:blank", + "href": "http://example.com/%20foo", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/%20foo", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo%", + "base": "about:blank", + "href": "http://example.com/foo%", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo%", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo%2", + "base": "about:blank", + "href": "http://example.com/foo%2", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo%2", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo%2zbar", + "base": "about:blank", + "href": "http://example.com/foo%2zbar", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo%2zbar", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo%2©zbar", + "base": "about:blank", + "href": "http://example.com/foo%2%C3%82%C2%A9zbar", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo%2%C3%82%C2%A9zbar", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo%41%7a", + "base": "about:blank", + "href": "http://example.com/foo%41%7a", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo%41%7a", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo\t\u0091%91", + "base": "about:blank", + "href": "http://example.com/foo%C2%91%91", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo%C2%91%91", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo%00%51", + "base": "about:blank", + "href": "http://example.com/foo%00%51", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo%00%51", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/(%28:%3A%29)", + "base": "about:blank", + "href": "http://example.com/(%28:%3A%29)", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/(%28:%3A%29)", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/%3A%3a%3C%3c", + "base": "about:blank", + "href": "http://example.com/%3A%3a%3C%3c", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/%3A%3a%3C%3c", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo\tbar", + "base": "about:blank", + "href": "http://example.com/foobar", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foobar", + "search": "", + "hash": "" + }, + { + "input": "http://example.com\\\\foo\\\\bar", + "base": "about:blank", + "href": "http://example.com//foo//bar", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "//foo//bar", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/%7Ffp3%3Eju%3Dduvgw%3Dd", + "base": "about:blank", + "href": "http://example.com/%7Ffp3%3Eju%3Dduvgw%3Dd", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/%7Ffp3%3Eju%3Dduvgw%3Dd", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/@asdf%40", + "base": "about:blank", + "href": "http://example.com/@asdf%40", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/@asdf%40", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/你好你好", + "base": "about:blank", + "href": "http://example.com/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/‥/foo", + "base": "about:blank", + "href": "http://example.com/%E2%80%A5/foo", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/%E2%80%A5/foo", + "search": "", + "hash": "" + }, + { + "input": "http://example.com//foo", + "base": "about:blank", + "href": "http://example.com/%EF%BB%BF/foo", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/%EF%BB%BF/foo", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/‮/foo/‭/bar", + "base": "about:blank", + "href": "http://example.com/%E2%80%AE/foo/%E2%80%AD/bar", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/%E2%80%AE/foo/%E2%80%AD/bar", + "search": "", + "hash": "" + }, + "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/script-tests/relative.js", + { + "input": "http://www.google.com/foo?bar=baz#", + "base": "about:blank", + "href": "http://www.google.com/foo?bar=baz#", + "origin": "http://www.google.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.google.com", + "hostname": "www.google.com", + "port": "", + "pathname": "/foo", + "search": "?bar=baz", + "hash": "" + }, + { + "input": "http://www.google.com/foo?bar=baz# »", + "base": "about:blank", + "href": "http://www.google.com/foo?bar=baz#%20%C2%BB", + "origin": "http://www.google.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.google.com", + "hostname": "www.google.com", + "port": "", + "pathname": "/foo", + "search": "?bar=baz", + "hash": "#%20%C2%BB" + }, + { + "input": "data:test# »", + "base": "about:blank", + "href": "data:test#%20%C2%BB", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "test", + "search": "", + "hash": "#%20%C2%BB" + }, + { + "input": "http://www.google.com", + "base": "about:blank", + "href": "http://www.google.com/", + "origin": "http://www.google.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.google.com", + "hostname": "www.google.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://192.0x00A80001", + "base": "about:blank", + "href": "http://192.168.0.1/", + "origin": "http://192.168.0.1", + "protocol": "http:", + "username": "", + "password": "", + "host": "192.168.0.1", + "hostname": "192.168.0.1", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://www/foo%2Ehtml", + "base": "about:blank", + "href": "http://www/foo%2Ehtml", + "origin": "http://www", + "protocol": "http:", + "username": "", + "password": "", + "host": "www", + "hostname": "www", + "port": "", + "pathname": "/foo%2Ehtml", + "search": "", + "hash": "" + }, + { + "input": "http://www/foo/%2E/html", + "base": "about:blank", + "href": "http://www/foo/html", + "origin": "http://www", + "protocol": "http:", + "username": "", + "password": "", + "host": "www", + "hostname": "www", + "port": "", + "pathname": "/foo/html", + "search": "", + "hash": "" + }, + { + "input": "http://user:pass@/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://%25DOMAIN:foobar@foodomain.com/", + "base": "about:blank", + "href": "http://%25DOMAIN:foobar@foodomain.com/", + "origin": "http://foodomain.com", + "protocol": "http:", + "username": "%25DOMAIN", + "password": "foobar", + "host": "foodomain.com", + "hostname": "foodomain.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:\\\\www.google.com\\foo", + "base": "about:blank", + "href": "http://www.google.com/foo", + "origin": "http://www.google.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.google.com", + "hostname": "www.google.com", + "port": "", + "pathname": "/foo", + "search": "", + "hash": "" + }, + { + "input": "http://foo:80/", + "base": "about:blank", + "href": "http://foo/", + "origin": "http://foo", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://foo:81/", + "base": "about:blank", + "href": "http://foo:81/", + "origin": "http://foo:81", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo:81", + "hostname": "foo", + "port": "81", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "httpa://foo:80/", + "base": "about:blank", + "href": "httpa://foo:80/", + "origin": "null", + "protocol": "httpa:", + "username": "", + "password": "", + "host": "foo:80", + "hostname": "foo", + "port": "80", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://foo:-80/", + "base": "about:blank", + "failure": true + }, + { + "input": "https://foo:443/", + "base": "about:blank", + "href": "https://foo/", + "origin": "https://foo", + "protocol": "https:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https://foo:80/", + "base": "about:blank", + "href": "https://foo:80/", + "origin": "https://foo:80", + "protocol": "https:", + "username": "", + "password": "", + "host": "foo:80", + "hostname": "foo", + "port": "80", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ftp://foo:21/", + "base": "about:blank", + "href": "ftp://foo/", + "origin": "ftp://foo", + "protocol": "ftp:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ftp://foo:80/", + "base": "about:blank", + "href": "ftp://foo:80/", + "origin": "ftp://foo:80", + "protocol": "ftp:", + "username": "", + "password": "", + "host": "foo:80", + "hostname": "foo", + "port": "80", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "gopher://foo:70/", + "base": "about:blank", + "href": "gopher://foo:70/", + "origin": "null", + "protocol": "gopher:", + "username": "", + "password": "", + "host": "foo:70", + "hostname": "foo", + "port": "70", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "gopher://foo:443/", + "base": "about:blank", + "href": "gopher://foo:443/", + "origin": "null", + "protocol": "gopher:", + "username": "", + "password": "", + "host": "foo:443", + "hostname": "foo", + "port": "443", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ws://foo:80/", + "base": "about:blank", + "href": "ws://foo/", + "origin": "ws://foo", + "protocol": "ws:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ws://foo:81/", + "base": "about:blank", + "href": "ws://foo:81/", + "origin": "ws://foo:81", + "protocol": "ws:", + "username": "", + "password": "", + "host": "foo:81", + "hostname": "foo", + "port": "81", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ws://foo:443/", + "base": "about:blank", + "href": "ws://foo:443/", + "origin": "ws://foo:443", + "protocol": "ws:", + "username": "", + "password": "", + "host": "foo:443", + "hostname": "foo", + "port": "443", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ws://foo:815/", + "base": "about:blank", + "href": "ws://foo:815/", + "origin": "ws://foo:815", + "protocol": "ws:", + "username": "", + "password": "", + "host": "foo:815", + "hostname": "foo", + "port": "815", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "wss://foo:80/", + "base": "about:blank", + "href": "wss://foo:80/", + "origin": "wss://foo:80", + "protocol": "wss:", + "username": "", + "password": "", + "host": "foo:80", + "hostname": "foo", + "port": "80", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "wss://foo:81/", + "base": "about:blank", + "href": "wss://foo:81/", + "origin": "wss://foo:81", + "protocol": "wss:", + "username": "", + "password": "", + "host": "foo:81", + "hostname": "foo", + "port": "81", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "wss://foo:443/", + "base": "about:blank", + "href": "wss://foo/", + "origin": "wss://foo", + "protocol": "wss:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "wss://foo:815/", + "base": "about:blank", + "href": "wss://foo:815/", + "origin": "wss://foo:815", + "protocol": "wss:", + "username": "", + "password": "", + "host": "foo:815", + "hostname": "foo", + "port": "815", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:/example.com/", + "base": "about:blank", + "href": "http://example.com/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ftp:/example.com/", + "base": "about:blank", + "href": "ftp://example.com/", + "origin": "ftp://example.com", + "protocol": "ftp:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https:/example.com/", + "base": "about:blank", + "href": "https://example.com/", + "origin": "https://example.com", + "protocol": "https:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "madeupscheme:/example.com/", + "base": "about:blank", + "href": "madeupscheme:/example.com/", + "origin": "null", + "protocol": "madeupscheme:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "file:/example.com/", + "base": "about:blank", + "href": "file:///example.com/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "ftps:/example.com/", + "base": "about:blank", + "href": "ftps:/example.com/", + "origin": "null", + "protocol": "ftps:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "gopher:/example.com/", + "base": "about:blank", + "href": "gopher:/example.com/", + "origin": "null", + "protocol": "gopher:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "ws:/example.com/", + "base": "about:blank", + "href": "ws://example.com/", + "origin": "ws://example.com", + "protocol": "ws:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "wss:/example.com/", + "base": "about:blank", + "href": "wss://example.com/", + "origin": "wss://example.com", + "protocol": "wss:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "data:/example.com/", + "base": "about:blank", + "href": "data:/example.com/", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "javascript:/example.com/", + "base": "about:blank", + "href": "javascript:/example.com/", + "origin": "null", + "protocol": "javascript:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "mailto:/example.com/", + "base": "about:blank", + "href": "mailto:/example.com/", + "origin": "null", + "protocol": "mailto:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "http:example.com/", + "base": "about:blank", + "href": "http://example.com/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ftp:example.com/", + "base": "about:blank", + "href": "ftp://example.com/", + "origin": "ftp://example.com", + "protocol": "ftp:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https:example.com/", + "base": "about:blank", + "href": "https://example.com/", + "origin": "https://example.com", + "protocol": "https:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "madeupscheme:example.com/", + "base": "about:blank", + "href": "madeupscheme:example.com/", + "origin": "null", + "protocol": "madeupscheme:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "ftps:example.com/", + "base": "about:blank", + "href": "ftps:example.com/", + "origin": "null", + "protocol": "ftps:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "gopher:example.com/", + "base": "about:blank", + "href": "gopher:example.com/", + "origin": "null", + "protocol": "gopher:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "ws:example.com/", + "base": "about:blank", + "href": "ws://example.com/", + "origin": "ws://example.com", + "protocol": "ws:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "wss:example.com/", + "base": "about:blank", + "href": "wss://example.com/", + "origin": "wss://example.com", + "protocol": "wss:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "data:example.com/", + "base": "about:blank", + "href": "data:example.com/", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "javascript:example.com/", + "base": "about:blank", + "href": "javascript:example.com/", + "origin": "null", + "protocol": "javascript:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "mailto:example.com/", + "base": "about:blank", + "href": "mailto:example.com/", + "origin": "null", + "protocol": "mailto:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/segments-userinfo-vs-host.html", + { + "input": "http:@www.example.com", + "base": "about:blank", + "href": "http://www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:/@www.example.com", + "base": "about:blank", + "href": "http://www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://@www.example.com", + "base": "about:blank", + "href": "http://www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:a:b@www.example.com", + "base": "about:blank", + "href": "http://a:b@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "a", + "password": "b", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:/a:b@www.example.com", + "base": "about:blank", + "href": "http://a:b@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "a", + "password": "b", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://a:b@www.example.com", + "base": "about:blank", + "href": "http://a:b@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "a", + "password": "b", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://@pple.com", + "base": "about:blank", + "href": "http://pple.com/", + "origin": "http://pple.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "pple.com", + "hostname": "pple.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http::b@www.example.com", + "base": "about:blank", + "href": "http://:b@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "b", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:/:b@www.example.com", + "base": "about:blank", + "href": "http://:b@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "b", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://:b@www.example.com", + "base": "about:blank", + "href": "http://:b@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "b", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:/:@/www.example.com", + "base": "about:blank", + "failure": true + }, + { + "input": "http://user@/www.example.com", + "base": "about:blank", + "failure": true + }, + { + "input": "http:@/www.example.com", + "base": "about:blank", + "failure": true + }, + { + "input": "http:/@/www.example.com", + "base": "about:blank", + "failure": true + }, + { + "input": "http://@/www.example.com", + "base": "about:blank", + "failure": true + }, + { + "input": "https:@/www.example.com", + "base": "about:blank", + "failure": true + }, + { + "input": "http:a:b@/www.example.com", + "base": "about:blank", + "failure": true + }, + { + "input": "http:/a:b@/www.example.com", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a:b@/www.example.com", + "base": "about:blank", + "failure": true + }, + { + "input": "http::@/www.example.com", + "base": "about:blank", + "failure": true + }, + { + "input": "http:a:@www.example.com", + "base": "about:blank", + "href": "http://a@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "a", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:/a:@www.example.com", + "base": "about:blank", + "href": "http://a@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "a", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://a:@www.example.com", + "base": "about:blank", + "href": "http://a@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "a", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://www.@pple.com", + "base": "about:blank", + "href": "http://www.@pple.com/", + "origin": "http://pple.com", + "protocol": "http:", + "username": "www.", + "password": "", + "host": "pple.com", + "hostname": "pple.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:@:www.example.com", + "base": "about:blank", + "failure": true + }, + { + "input": "http:/@:www.example.com", + "base": "about:blank", + "failure": true + }, + { + "input": "http://@:www.example.com", + "base": "about:blank", + "failure": true + }, + { + "input": "http://:@www.example.com", + "base": "about:blank", + "href": "http://www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "# Others", + { + "input": "/", + "base": "http://www.example.com/test", + "href": "http://www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "/test.txt", + "base": "http://www.example.com/test", + "href": "http://www.example.com/test.txt", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/test.txt", + "search": "", + "hash": "" + }, + { + "input": ".", + "base": "http://www.example.com/test", + "href": "http://www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "..", + "base": "http://www.example.com/test", + "href": "http://www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "test.txt", + "base": "http://www.example.com/test", + "href": "http://www.example.com/test.txt", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/test.txt", + "search": "", + "hash": "" + }, + { + "input": "./test.txt", + "base": "http://www.example.com/test", + "href": "http://www.example.com/test.txt", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/test.txt", + "search": "", + "hash": "" + }, + { + "input": "../test.txt", + "base": "http://www.example.com/test", + "href": "http://www.example.com/test.txt", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/test.txt", + "search": "", + "hash": "" + }, + { + "input": "../aaa/test.txt", + "base": "http://www.example.com/test", + "href": "http://www.example.com/aaa/test.txt", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/aaa/test.txt", + "search": "", + "hash": "" + }, + { + "input": "../../test.txt", + "base": "http://www.example.com/test", + "href": "http://www.example.com/test.txt", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/test.txt", + "search": "", + "hash": "" + }, + { + "input": "中/test.txt", + "base": "http://www.example.com/test", + "href": "http://www.example.com/%E4%B8%AD/test.txt", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/%E4%B8%AD/test.txt", + "search": "", + "hash": "" + }, + { + "input": "http://www.example2.com", + "base": "http://www.example.com/test", + "href": "http://www.example2.com/", + "origin": "http://www.example2.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example2.com", + "hostname": "www.example2.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "//www.example2.com", + "base": "http://www.example.com/test", + "href": "http://www.example2.com/", + "origin": "http://www.example2.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example2.com", + "hostname": "www.example2.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file:...", + "base": "http://www.example.com/test", + "href": "file:///...", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/...", + "search": "", + "hash": "" + }, + { + "input": "file:..", + "base": "http://www.example.com/test", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file:a", + "base": "http://www.example.com/test", + "href": "file:///a", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/a", + "search": "", + "hash": "" + }, + "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/host.html", + "Basic canonicalization, uppercase should be converted to lowercase", + { + "input": "http://ExAmPlE.CoM", + "base": "http://other.com/", + "href": "http://example.com/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://example example.com", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://Goo%20 goo%7C|.com", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[]", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[:]", + "base": "http://other.com/", + "failure": true + }, + "U+3000 is mapped to U+0020 (space) which is disallowed", + { + "input": "http://GOO\u00a0\u3000goo.com", + "base": "http://other.com/", + "failure": true + }, + "Other types of space (no-break, zero-width, zero-width-no-break) are name-prepped away to nothing. U+200B, U+2060, and U+FEFF, are ignored", + { + "input": "http://GOO\u200b\u2060\ufeffgoo.com", + "base": "http://other.com/", + "href": "http://googoo.com/", + "origin": "http://googoo.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "googoo.com", + "hostname": "googoo.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "Leading and trailing C0 control or space", + { + "input": "\u0000\u001b\u0004\u0012 http://example.com/\u001f \u000d ", + "base": "about:blank", + "href": "http://example.com/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "Ideographic full stop (full-width period for Chinese, etc.) should be treated as a dot. U+3002 is mapped to U+002E (dot)", + { + "input": "http://www.foo。bar.com", + "base": "http://other.com/", + "href": "http://www.foo.bar.com/", + "origin": "http://www.foo.bar.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.foo.bar.com", + "hostname": "www.foo.bar.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "Invalid unicode characters should fail... U+FDD0 is disallowed; %ef%b7%90 is U+FDD0", + { + "input": "http://\ufdd0zyx.com", + "base": "http://other.com/", + "failure": true + }, + "This is the same as previous but escaped", + { + "input": "http://%ef%b7%90zyx.com", + "base": "http://other.com/", + "failure": true + }, + "U+FFFD", + { + "input": "https://\ufffd", + "base": "about:blank", + "failure": true + }, + { + "input": "https://%EF%BF%BD", + "base": "about:blank", + "failure": true + }, + { + "input": "https://x/\ufffd?\ufffd#\ufffd", + "base": "about:blank", + "href": "https://x/%EF%BF%BD?%EF%BF%BD#%EF%BF%BD", + "origin": "https://x", + "protocol": "https:", + "username": "", + "password": "", + "host": "x", + "hostname": "x", + "port": "", + "pathname": "/%EF%BF%BD", + "search": "?%EF%BF%BD", + "hash": "#%EF%BF%BD" + }, + "Test name prepping, fullwidth input should be converted to ASCII and NOT IDN-ized. This is 'Go' in fullwidth UTF-8/UTF-16.", + { + "input": "http://Go.com", + "base": "http://other.com/", + "href": "http://go.com/", + "origin": "http://go.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "go.com", + "hostname": "go.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "URL spec forbids the following. https://www.w3.org/Bugs/Public/show_bug.cgi?id=24257", + { + "input": "http://%41.com", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://%ef%bc%85%ef%bc%94%ef%bc%91.com", + "base": "http://other.com/", + "failure": true + }, + "...%00 in fullwidth should fail (also as escaped UTF-8 input)", + { + "input": "http://%00.com", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://%ef%bc%85%ef%bc%90%ef%bc%90.com", + "base": "http://other.com/", + "failure": true + }, + "Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN", + { + "input": "http://你好你好", + "base": "http://other.com/", + "href": "http://xn--6qqa088eba/", + "origin": "http://xn--6qqa088eba", + "protocol": "http:", + "username": "", + "password": "", + "host": "xn--6qqa088eba", + "hostname": "xn--6qqa088eba", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https://faß.ExAmPlE/", + "base": "about:blank", + "href": "https://xn--fa-hia.example/", + "origin": "https://xn--fa-hia.example", + "protocol": "https:", + "username": "", + "password": "", + "host": "xn--fa-hia.example", + "hostname": "xn--fa-hia.example", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "sc://faß.ExAmPlE/", + "base": "about:blank", + "href": "sc://fa%C3%9F.ExAmPlE/", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "fa%C3%9F.ExAmPlE", + "hostname": "fa%C3%9F.ExAmPlE", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "Invalid escaped characters should fail and the percents should be escaped. https://www.w3.org/Bugs/Public/show_bug.cgi?id=24191", + { + "input": "http://%zz%66%a.com", + "base": "http://other.com/", + "failure": true + }, + "If we get an invalid character that has been escaped.", + { + "input": "http://%25", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://hello%00", + "base": "http://other.com/", + "failure": true + }, + "Escaped numbers should be treated like IP addresses if they are.", + { + "input": "http://%30%78%63%30%2e%30%32%35%30.01", + "base": "http://other.com/", + "href": "http://192.168.0.1/", + "origin": "http://192.168.0.1", + "protocol": "http:", + "username": "", + "password": "", + "host": "192.168.0.1", + "hostname": "192.168.0.1", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://%30%78%63%30%2e%30%32%35%30.01%2e", + "base": "http://other.com/", + "href": "http://192.168.0.1/", + "origin": "http://192.168.0.1", + "protocol": "http:", + "username": "", + "password": "", + "host": "192.168.0.1", + "hostname": "192.168.0.1", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://192.168.0.257", + "base": "http://other.com/", + "failure": true + }, + "Invalid escaping in hosts causes failure", + { + "input": "http://%3g%78%63%30%2e%30%32%35%30%2E.01", + "base": "http://other.com/", + "failure": true + }, + "A space in a host causes failure", + { + "input": "http://192.168.0.1 hello", + "base": "http://other.com/", + "failure": true + }, + { + "input": "https://x x:12", + "base": "about:blank", + "failure": true + }, + "Fullwidth and escaped UTF-8 fullwidth should still be treated as IP", + { + "input": "http://0Xc0.0250.01", + "base": "http://other.com/", + "href": "http://192.168.0.1/", + "origin": "http://192.168.0.1", + "protocol": "http:", + "username": "", + "password": "", + "host": "192.168.0.1", + "hostname": "192.168.0.1", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "Domains with empty labels", + { + "input": "http://./", + "base": "about:blank", + "href": "http://./", + "origin": "http://.", + "protocol": "http:", + "username": "", + "password": "", + "host": ".", + "hostname": ".", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://../", + "base": "about:blank", + "href": "http://../", + "origin": "http://..", + "protocol": "http:", + "username": "", + "password": "", + "host": "..", + "hostname": "..", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://0..0x300/", + "base": "about:blank", + "href": "http://0..0x300/", + "origin": "http://0..0x300", + "protocol": "http:", + "username": "", + "password": "", + "host": "0..0x300", + "hostname": "0..0x300", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "Broken IPv6", + { + "input": "http://[www.google.com]/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://[google.com]", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[::1.2.3.4x]", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[::1.2.3.]", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[::1.2.]", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[::1.]", + "base": "http://other.com/", + "failure": true + }, + "Misc Unicode", + { + "input": "http://foo:💩@example.com/bar", + "base": "http://other.com/", + "href": "http://foo:%F0%9F%92%A9@example.com/bar", + "origin": "http://example.com", + "protocol": "http:", + "username": "foo", + "password": "%F0%9F%92%A9", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/bar", + "search": "", + "hash": "" + }, + "# resolving a fragment against any scheme succeeds", + { + "input": "#", + "base": "test:test", + "href": "test:test#", + "origin": "null", + "protocol": "test:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "test", + "search": "", + "hash": "" + }, + { + "input": "#x", + "base": "mailto:x@x.com", + "href": "mailto:x@x.com#x", + "origin": "null", + "protocol": "mailto:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "x@x.com", + "search": "", + "hash": "#x" + }, + { + "input": "#x", + "base": "data:,", + "href": "data:,#x", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": ",", + "search": "", + "hash": "#x" + }, + { + "input": "#x", + "base": "about:blank", + "href": "about:blank#x", + "origin": "null", + "protocol": "about:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "blank", + "search": "", + "hash": "#x" + }, + { + "input": "#", + "base": "test:test?test", + "href": "test:test?test#", + "origin": "null", + "protocol": "test:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "test", + "search": "?test", + "hash": "" + }, + "# multiple @ in authority state", + { + "input": "https://@test@test@example:800/", + "base": "http://doesnotmatter/", + "href": "https://%40test%40test@example:800/", + "origin": "https://example:800", + "protocol": "https:", + "username": "%40test%40test", + "password": "", + "host": "example:800", + "hostname": "example", + "port": "800", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https://@@@example", + "base": "http://doesnotmatter/", + "href": "https://%40%40@example/", + "origin": "https://example", + "protocol": "https:", + "username": "%40%40", + "password": "", + "host": "example", + "hostname": "example", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "non-az-09 characters", + { + "input": "http://`{}:`{}@h/`{}?`{}", + "base": "http://doesnotmatter/", + "href": "http://%60%7B%7D:%60%7B%7D@h/%60%7B%7D?`{}", + "origin": "http://h", + "protocol": "http:", + "username": "%60%7B%7D", + "password": "%60%7B%7D", + "host": "h", + "hostname": "h", + "port": "", + "pathname": "/%60%7B%7D", + "search": "?`{}", + "hash": "" + }, + "byte is ' and url is special", + { + "input": "http://host/?'", + "base": "about:blank", + "href": "http://host/?%27", + "origin": "http://host", + "protocol": "http:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/", + "search": "?%27", + "hash": "" + }, + { + "input": "notspecial://host/?'", + "base": "about:blank", + "href": "notspecial://host/?'", + "origin": "null", + "protocol": "notspecial:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/", + "search": "?'", + "hash": "" + }, + "# Credentials in base", + { + "input": "/some/path", + "base": "http://user@example.org/smth", + "href": "http://user@example.org/some/path", + "origin": "http://example.org", + "protocol": "http:", + "username": "user", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/some/path", + "search": "", + "hash": "" + }, + { + "input": "", + "base": "http://user:pass@example.org:21/smth", + "href": "http://user:pass@example.org:21/smth", + "origin": "http://example.org:21", + "protocol": "http:", + "username": "user", + "password": "pass", + "host": "example.org:21", + "hostname": "example.org", + "port": "21", + "pathname": "/smth", + "search": "", + "hash": "" + }, + { + "input": "/some/path", + "base": "http://user:pass@example.org:21/smth", + "href": "http://user:pass@example.org:21/some/path", + "origin": "http://example.org:21", + "protocol": "http:", + "username": "user", + "password": "pass", + "host": "example.org:21", + "hostname": "example.org", + "port": "21", + "pathname": "/some/path", + "search": "", + "hash": "" + }, + "# a set of tests designed by zcorpan for relative URLs with unknown schemes", + { + "input": "i", + "base": "sc:sd", + "failure": true + }, + { + "input": "i", + "base": "sc:sd/sd", + "failure": true + }, + { + "input": "i", + "base": "sc:/pa/pa", + "href": "sc:/pa/i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pa/i", + "search": "", + "hash": "" + }, + { + "input": "i", + "base": "sc://ho/pa", + "href": "sc://ho/i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "ho", + "hostname": "ho", + "port": "", + "pathname": "/i", + "search": "", + "hash": "" + }, + { + "input": "i", + "base": "sc:///pa/pa", + "href": "sc:///pa/i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pa/i", + "search": "", + "hash": "" + }, + { + "input": "../i", + "base": "sc:sd", + "failure": true + }, + { + "input": "../i", + "base": "sc:sd/sd", + "failure": true + }, + { + "input": "../i", + "base": "sc:/pa/pa", + "href": "sc:/i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/i", + "search": "", + "hash": "" + }, + { + "input": "../i", + "base": "sc://ho/pa", + "href": "sc://ho/i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "ho", + "hostname": "ho", + "port": "", + "pathname": "/i", + "search": "", + "hash": "" + }, + { + "input": "../i", + "base": "sc:///pa/pa", + "href": "sc:///i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/i", + "search": "", + "hash": "" + }, + { + "input": "/i", + "base": "sc:sd", + "failure": true + }, + { + "input": "/i", + "base": "sc:sd/sd", + "failure": true + }, + { + "input": "/i", + "base": "sc:/pa/pa", + "href": "sc:/i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/i", + "search": "", + "hash": "" + }, + { + "input": "/i", + "base": "sc://ho/pa", + "href": "sc://ho/i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "ho", + "hostname": "ho", + "port": "", + "pathname": "/i", + "search": "", + "hash": "" + }, + { + "input": "/i", + "base": "sc:///pa/pa", + "href": "sc:///i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/i", + "search": "", + "hash": "" + }, + { + "input": "?i", + "base": "sc:sd", + "failure": true + }, + { + "input": "?i", + "base": "sc:sd/sd", + "failure": true + }, + { + "input": "?i", + "base": "sc:/pa/pa", + "href": "sc:/pa/pa?i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pa/pa", + "search": "?i", + "hash": "" + }, + { + "input": "?i", + "base": "sc://ho/pa", + "href": "sc://ho/pa?i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "ho", + "hostname": "ho", + "port": "", + "pathname": "/pa", + "search": "?i", + "hash": "" + }, + { + "input": "?i", + "base": "sc:///pa/pa", + "href": "sc:///pa/pa?i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pa/pa", + "search": "?i", + "hash": "" + }, + { + "input": "#i", + "base": "sc:sd", + "href": "sc:sd#i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "sd", + "search": "", + "hash": "#i" + }, + { + "input": "#i", + "base": "sc:sd/sd", + "href": "sc:sd/sd#i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "sd/sd", + "search": "", + "hash": "#i" + }, + { + "input": "#i", + "base": "sc:/pa/pa", + "href": "sc:/pa/pa#i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pa/pa", + "search": "", + "hash": "#i" + }, + { + "input": "#i", + "base": "sc://ho/pa", + "href": "sc://ho/pa#i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "ho", + "hostname": "ho", + "port": "", + "pathname": "/pa", + "search": "", + "hash": "#i" + }, + { + "input": "#i", + "base": "sc:///pa/pa", + "href": "sc:///pa/pa#i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pa/pa", + "search": "", + "hash": "#i" + }, + "# make sure that relative URL logic works on known typically non-relative schemes too", + { + "input": "about:/../", + "base": "about:blank", + "href": "about:/", + "origin": "null", + "protocol": "about:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "data:/../", + "base": "about:blank", + "href": "data:/", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "javascript:/../", + "base": "about:blank", + "href": "javascript:/", + "origin": "null", + "protocol": "javascript:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "mailto:/../", + "base": "about:blank", + "href": "mailto:/", + "origin": "null", + "protocol": "mailto:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "# unknown schemes and their hosts", + { + "input": "sc://ñ.test/", + "base": "about:blank", + "href": "sc://%C3%B1.test/", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1.test", + "hostname": "%C3%B1.test", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "sc://\u0000/", + "base": "about:blank", + "failure": true + }, + { + "input": "sc:// /", + "base": "about:blank", + "failure": true + }, + { + "input": "sc://%/", + "base": "about:blank", + "href": "sc://%/", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%", + "hostname": "%", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "sc://@/", + "base": "about:blank", + "failure": true + }, + { + "input": "sc://te@s:t@/", + "base": "about:blank", + "failure": true + }, + { + "input": "sc://:/", + "base": "about:blank", + "failure": true + }, + { + "input": "sc://:12/", + "base": "about:blank", + "failure": true + }, + { + "input": "sc://[/", + "base": "about:blank", + "failure": true + }, + { + "input": "sc://\\/", + "base": "about:blank", + "failure": true + }, + { + "input": "sc://]/", + "base": "about:blank", + "failure": true + }, + { + "input": "x", + "base": "sc://ñ", + "href": "sc://%C3%B1/x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "/x", + "search": "", + "hash": "" + }, + "# unknown schemes and backslashes", + { + "input": "sc:\\../", + "base": "about:blank", + "href": "sc:\\../", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "\\../", + "search": "", + "hash": "" + }, + "# unknown scheme with path looking like a password", + { + "input": "sc::a@example.net", + "base": "about:blank", + "href": "sc::a@example.net", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": ":a@example.net", + "search": "", + "hash": "" + }, + "# unknown scheme with bogus percent-encoding", + { + "input": "wow:%NBD", + "base": "about:blank", + "href": "wow:%NBD", + "origin": "null", + "protocol": "wow:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "%NBD", + "search": "", + "hash": "" + }, + { + "input": "wow:%1G", + "base": "about:blank", + "href": "wow:%1G", + "origin": "null", + "protocol": "wow:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "%1G", + "search": "", + "hash": "" + }, + "# unknown scheme with non-URL characters", + { + "input": "wow:\uFFFF", + "base": "about:blank", + "href": "wow:%EF%BF%BF", + "origin": "null", + "protocol": "wow:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "%EF%BF%BF", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/\uFDD0\uFDCF\uFDEF\uFDF0\uFFFE\uFFFF?\uFDD0\uFDCF\uFDEF\uFDF0\uFFFE\uFFFF", + "base": "about:blank", + "href": "http://example.com/%EF%B7%90%EF%B7%8F%EF%B7%AF%EF%B7%B0%EF%BF%BE%EF%BF%BF?%EF%B7%90%EF%B7%8F%EF%B7%AF%EF%B7%B0%EF%BF%BE%EF%BF%BF", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/%EF%B7%90%EF%B7%8F%EF%B7%AF%EF%B7%B0%EF%BF%BE%EF%BF%BF", + "search": "?%EF%B7%90%EF%B7%8F%EF%B7%AF%EF%B7%B0%EF%BF%BE%EF%BF%BF", + "hash": "" + }, + "Forbidden host code points", + { + "input": "http://ab", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a^b", + "base": "about:blank", + "failure": true + }, + { + "input": "non-special://ab", + "base": "about:blank", + "failure": true + }, + { + "input": "non-special://a^b", + "base": "about:blank", + "failure": true + }, + "Allowed host code points", + { + "input": "http://\u001F!\"$&'()*+,-.;=_`{}~/", + "base": "about:blank", + "href": "http://\u001F!\"$&'()*+,-.;=_`{}~/", + "origin": "http://\u001F!\"$&'()*+,-.;=_`{}~", + "protocol": "http:", + "username": "", + "password": "", + "host": "\u001F!\"$&'()*+,-.;=_`{}~", + "hostname": "\u001F!\"$&'()*+,-.;=_`{}~", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "sc://\u001F!\"$&'()*+,-.;=_`{}~/", + "base": "about:blank", + "href": "sc://%1F!\"$&'()*+,-.;=_`{}~/", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%1F!\"$&'()*+,-.;=_`{}~", + "hostname": "%1F!\"$&'()*+,-.;=_`{}~", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "# Hosts and percent-encoding", + { + "input": "ftp://example.com%80/", + "base": "about:blank", + "failure": true + }, + { + "input": "ftp://example.com%A0/", + "base": "about:blank", + "failure": true + }, + { + "input": "https://example.com%80/", + "base": "about:blank", + "failure": true + }, + { + "input": "https://example.com%A0/", + "base": "about:blank", + "failure": true + }, + { + "input": "ftp://%e2%98%83", + "base": "about:blank", + "href": "ftp://xn--n3h/", + "origin": "ftp://xn--n3h", + "protocol": "ftp:", + "username": "", + "password": "", + "host": "xn--n3h", + "hostname": "xn--n3h", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https://%e2%98%83", + "base": "about:blank", + "href": "https://xn--n3h/", + "origin": "https://xn--n3h", + "protocol": "https:", + "username": "", + "password": "", + "host": "xn--n3h", + "hostname": "xn--n3h", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "# tests from jsdom/whatwg-url designed for code coverage", + { + "input": "http://127.0.0.1:10100/relative_import.html", + "base": "about:blank", + "href": "http://127.0.0.1:10100/relative_import.html", + "origin": "http://127.0.0.1:10100", + "protocol": "http:", + "username": "", + "password": "", + "host": "127.0.0.1:10100", + "hostname": "127.0.0.1", + "port": "10100", + "pathname": "/relative_import.html", + "search": "", + "hash": "" + }, + { + "input": "http://facebook.com/?foo=%7B%22abc%22", + "base": "about:blank", + "href": "http://facebook.com/?foo=%7B%22abc%22", + "origin": "http://facebook.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "facebook.com", + "hostname": "facebook.com", + "port": "", + "pathname": "/", + "search": "?foo=%7B%22abc%22", + "hash": "" + }, + { + "input": "https://localhost:3000/jqueryui@1.2.3", + "base": "about:blank", + "href": "https://localhost:3000/jqueryui@1.2.3", + "origin": "https://localhost:3000", + "protocol": "https:", + "username": "", + "password": "", + "host": "localhost:3000", + "hostname": "localhost", + "port": "3000", + "pathname": "/jqueryui@1.2.3", + "search": "", + "hash": "" + }, + "# tab/LF/CR", + { + "input": "h\tt\nt\rp://h\to\ns\rt:9\t0\n0\r0/p\ta\nt\rh?q\tu\ne\rry#f\tr\na\rg", + "base": "about:blank", + "href": "http://host:9000/path?query#frag", + "origin": "http://host:9000", + "protocol": "http:", + "username": "", + "password": "", + "host": "host:9000", + "hostname": "host", + "port": "9000", + "pathname": "/path", + "search": "?query", + "hash": "#frag" + }, + "# Stringification of URL.searchParams", + { + "input": "?a=b&c=d", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar?a=b&c=d", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "?a=b&c=d", + "searchParams": "a=b&c=d", + "hash": "" + }, + { + "input": "??a=b&c=d", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar??a=b&c=d", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "??a=b&c=d", + "searchParams": "%3Fa=b&c=d", + "hash": "" + }, + "# Scheme only", + { + "input": "http:", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "searchParams": "", + "hash": "" + }, + { + "input": "http:", + "base": "https://example.org/foo/bar", + "failure": true + }, + { + "input": "sc:", + "base": "https://example.org/foo/bar", + "href": "sc:", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "", + "search": "", + "searchParams": "", + "hash": "" + }, + "# Percent encoding of fragments", + { + "input": "http://foo.bar/baz?qux#foo\bbar", + "base": "about:blank", + "href": "http://foo.bar/baz?qux#foo%08bar", + "origin": "http://foo.bar", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo.bar", + "hostname": "foo.bar", + "port": "", + "pathname": "/baz", + "search": "?qux", + "searchParams": "qux=", + "hash": "#foo%08bar" + }, + { + "input": "http://foo.bar/baz?qux#foo\"bar", + "base": "about:blank", + "href": "http://foo.bar/baz?qux#foo%22bar", + "origin": "http://foo.bar", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo.bar", + "hostname": "foo.bar", + "port": "", + "pathname": "/baz", + "search": "?qux", + "searchParams": "qux=", + "hash": "#foo%22bar" + }, + { + "input": "http://foo.bar/baz?qux#foobar", + "base": "about:blank", + "href": "http://foo.bar/baz?qux#foo%3Ebar", + "origin": "http://foo.bar", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo.bar", + "hostname": "foo.bar", + "port": "", + "pathname": "/baz", + "search": "?qux", + "searchParams": "qux=", + "hash": "#foo%3Ebar" + }, + { + "input": "http://foo.bar/baz?qux#foo`bar", + "base": "about:blank", + "href": "http://foo.bar/baz?qux#foo%60bar", + "origin": "http://foo.bar", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo.bar", + "hostname": "foo.bar", + "port": "", + "pathname": "/baz", + "search": "?qux", + "searchParams": "qux=", + "hash": "#foo%60bar" + }, + "# IPv4 parsing (via https://github.com/nodejs/node/pull/10317)", + { + "input": "http://192.168.257", + "base": "http://other.com/", + "href": "http://192.168.1.1/", + "origin": "http://192.168.1.1", + "protocol": "http:", + "username": "", + "password": "", + "host": "192.168.1.1", + "hostname": "192.168.1.1", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://192.168.257.com", + "base": "http://other.com/", + "href": "http://192.168.257.com/", + "origin": "http://192.168.257.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "192.168.257.com", + "hostname": "192.168.257.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://256", + "base": "http://other.com/", + "href": "http://0.0.1.0/", + "origin": "http://0.0.1.0", + "protocol": "http:", + "username": "", + "password": "", + "host": "0.0.1.0", + "hostname": "0.0.1.0", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://256.com", + "base": "http://other.com/", + "href": "http://256.com/", + "origin": "http://256.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "256.com", + "hostname": "256.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://999999999", + "base": "http://other.com/", + "href": "http://59.154.201.255/", + "origin": "http://59.154.201.255", + "protocol": "http:", + "username": "", + "password": "", + "host": "59.154.201.255", + "hostname": "59.154.201.255", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://999999999.com", + "base": "http://other.com/", + "href": "http://999999999.com/", + "origin": "http://999999999.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "999999999.com", + "hostname": "999999999.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://10000000000", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://10000000000.com", + "base": "http://other.com/", + "href": "http://10000000000.com/", + "origin": "http://10000000000.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "10000000000.com", + "hostname": "10000000000.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://4294967295", + "base": "http://other.com/", + "href": "http://255.255.255.255/", + "origin": "http://255.255.255.255", + "protocol": "http:", + "username": "", + "password": "", + "host": "255.255.255.255", + "hostname": "255.255.255.255", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://4294967296", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://0xffffffff", + "base": "http://other.com/", + "href": "http://255.255.255.255/", + "origin": "http://255.255.255.255", + "protocol": "http:", + "username": "", + "password": "", + "host": "255.255.255.255", + "hostname": "255.255.255.255", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://0xffffffff1", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://256.256.256.256", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://256.256.256.256.256", + "base": "http://other.com/", + "href": "http://256.256.256.256.256/", + "origin": "http://256.256.256.256.256", + "protocol": "http:", + "username": "", + "password": "", + "host": "256.256.256.256.256", + "hostname": "256.256.256.256.256", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https://0x.0x.0", + "base": "about:blank", + "href": "https://0.0.0.0/", + "origin": "https://0.0.0.0", + "protocol": "https:", + "username": "", + "password": "", + "host": "0.0.0.0", + "hostname": "0.0.0.0", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "More IPv4 parsing (via https://github.com/jsdom/whatwg-url/issues/92)", + { + "input": "https://0x100000000/test", + "base": "about:blank", + "failure": true + }, + { + "input": "https://256.0.0.1/test", + "base": "about:blank", + "failure": true + }, + "# file URLs containing percent-encoded Windows drive letters (shouldn't work)", + { + "input": "file:///C%3A/", + "base": "about:blank", + "href": "file:///C%3A/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C%3A/", + "search": "", + "hash": "" + }, + { + "input": "file:///C%7C/", + "base": "about:blank", + "href": "file:///C%7C/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C%7C/", + "search": "", + "hash": "" + }, + { + "input": "file://%43%3A", + "base": "about:blank", + "failure": true + }, + { + "input": "file://%43%7C", + "base": "about:blank", + "failure": true + }, + { + "input": "file://%43|", + "base": "about:blank", + "failure": true + }, + { + "input": "file://C%7C", + "base": "about:blank", + "failure": true + }, + { + "input": "file://%43%7C/", + "base": "about:blank", + "failure": true + }, + { + "input": "https://%43%7C/", + "base": "about:blank", + "failure": true + }, + { + "input": "asdf://%43|/", + "base": "about:blank", + "failure": true + }, + { + "input": "asdf://%43%7C/", + "base": "about:blank", + "href": "asdf://%43%7C/", + "origin": "null", + "protocol": "asdf:", + "username": "", + "password": "", + "host": "%43%7C", + "hostname": "%43%7C", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "# file URLs relative to other file URLs (via https://github.com/jsdom/whatwg-url/pull/60)", + { + "input": "pix/submit.gif", + "base": "file:///C:/Users/Domenic/Dropbox/GitHub/tmpvar/jsdom/test/level2/html/files/anchor.html", + "href": "file:///C:/Users/Domenic/Dropbox/GitHub/tmpvar/jsdom/test/level2/html/files/pix/submit.gif", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/Users/Domenic/Dropbox/GitHub/tmpvar/jsdom/test/level2/html/files/pix/submit.gif", + "search": "", + "hash": "" + }, + { + "input": "..", + "base": "file:///C:/", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "..", + "base": "file:///", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "# More file URL tests by zcorpan and annevk", + { + "input": "/", + "base": "file:///C:/a/b", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "/", + "base": "file://h/C:/a/b", + "href": "file://h/C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "h", + "hostname": "h", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "/", + "base": "file://h/a/b", + "href": "file://h/", + "protocol": "file:", + "username": "", + "password": "", + "host": "h", + "hostname": "h", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "//d:", + "base": "file:///C:/a/b", + "href": "file:///d:", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/d:", + "search": "", + "hash": "" + }, + { + "input": "//d:/..", + "base": "file:///C:/a/b", + "href": "file:///d:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/d:/", + "search": "", + "hash": "" + }, + { + "input": "..", + "base": "file:///ab:/", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "..", + "base": "file:///1:/", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "", + "base": "file:///test?test#test", + "href": "file:///test?test", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "?test", + "hash": "" + }, + { + "input": "file:", + "base": "file:///test?test#test", + "href": "file:///test?test", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "?test", + "hash": "" + }, + { + "input": "?x", + "base": "file:///test?test#test", + "href": "file:///test?x", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "?x", + "hash": "" + }, + { + "input": "file:?x", + "base": "file:///test?test#test", + "href": "file:///test?x", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "?x", + "hash": "" + }, + { + "input": "#x", + "base": "file:///test?test#test", + "href": "file:///test?test#x", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "?test", + "hash": "#x" + }, + { + "input": "file:#x", + "base": "file:///test?test#test", + "href": "file:///test?test#x", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "?test", + "hash": "#x" + }, + "# File URLs and many (back)slashes", + { + "input": "file:\\\\//", + "base": "about:blank", + "href": "file:////", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + { + "input": "file:\\\\\\\\", + "base": "about:blank", + "href": "file:////", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + { + "input": "file:\\\\\\\\?fox", + "base": "about:blank", + "href": "file:////?fox", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "?fox", + "hash": "" + }, + { + "input": "file:\\\\\\\\#guppy", + "base": "about:blank", + "href": "file:////#guppy", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "#guppy" + }, + { + "input": "file://spider///", + "base": "about:blank", + "href": "file://spider///", + "protocol": "file:", + "username": "", + "password": "", + "host": "spider", + "hostname": "spider", + "port": "", + "pathname": "///", + "search": "", + "hash": "" + }, + { + "input": "file:\\\\localhost//", + "base": "about:blank", + "href": "file:////", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + { + "input": "file:///localhost//cat", + "base": "about:blank", + "href": "file:///localhost//cat", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/localhost//cat", + "search": "", + "hash": "" + }, + { + "input": "file://\\/localhost//cat", + "base": "about:blank", + "href": "file:////localhost//cat", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//localhost//cat", + "search": "", + "hash": "" + }, + { + "input": "file://localhost//a//../..//", + "base": "about:blank", + "href": "file://///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "///", + "search": "", + "hash": "" + }, + { + "input": "/////mouse", + "base": "file:///elephant", + "href": "file://///mouse", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "///mouse", + "search": "", + "hash": "" + }, + { + "input": "\\//pig", + "base": "file://lion/", + "href": "file:///pig", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pig", + "search": "", + "hash": "" + }, + { + "input": "\\/localhost//pig", + "base": "file://lion/", + "href": "file:////pig", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//pig", + "search": "", + "hash": "" + }, + { + "input": "//localhost//pig", + "base": "file://lion/", + "href": "file:////pig", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//pig", + "search": "", + "hash": "" + }, + { + "input": "/..//localhost//pig", + "base": "file://lion/", + "href": "file://lion//localhost//pig", + "protocol": "file:", + "username": "", + "password": "", + "host": "lion", + "hostname": "lion", + "port": "", + "pathname": "//localhost//pig", + "search": "", + "hash": "" + }, + { + "input": "file://", + "base": "file://ape/", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "# File URLs with non-empty hosts", + { + "input": "/rooibos", + "base": "file://tea/", + "href": "file://tea/rooibos", + "protocol": "file:", + "username": "", + "password": "", + "host": "tea", + "hostname": "tea", + "port": "", + "pathname": "/rooibos", + "search": "", + "hash": "" + }, + { + "input": "/?chai", + "base": "file://tea/", + "href": "file://tea/?chai", + "protocol": "file:", + "username": "", + "password": "", + "host": "tea", + "hostname": "tea", + "port": "", + "pathname": "/", + "search": "?chai", + "hash": "" + }, + "# Windows drive letter handling with the 'file:' base URL", + { + "input": "C|", + "base": "file://host/dir/file", + "href": "file://host/C:", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/C:", + "search": "", + "hash": "" + }, + { + "input": "C|", + "base": "file://host/D:/dir1/dir2/file", + "href": "file://host/C:", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/C:", + "search": "", + "hash": "" + }, + { + "input": "C|#", + "base": "file://host/dir/file", + "href": "file://host/C:#", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/C:", + "search": "", + "hash": "" + }, + { + "input": "C|?", + "base": "file://host/dir/file", + "href": "file://host/C:?", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/C:", + "search": "", + "hash": "" + }, + { + "input": "C|/", + "base": "file://host/dir/file", + "href": "file://host/C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "C|\n/", + "base": "file://host/dir/file", + "href": "file://host/C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "C|\\", + "base": "file://host/dir/file", + "href": "file://host/C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "C", + "base": "file://host/dir/file", + "href": "file://host/dir/C", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/dir/C", + "search": "", + "hash": "" + }, + { + "input": "C|a", + "base": "file://host/dir/file", + "href": "file://host/dir/C|a", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/dir/C|a", + "search": "", + "hash": "" + }, + "# Windows drive letter quirk in the file slash state", + { + "input": "/c:/foo/bar", + "base": "file:///c:/baz/qux", + "href": "file:///c:/foo/bar", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/c:/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "/c|/foo/bar", + "base": "file:///c:/baz/qux", + "href": "file:///c:/foo/bar", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/c:/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "file:\\c:\\foo\\bar", + "base": "file:///c:/baz/qux", + "href": "file:///c:/foo/bar", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/c:/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "/c:/foo/bar", + "base": "file://host/path", + "href": "file://host/c:/foo/bar", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/c:/foo/bar", + "search": "", + "hash": "" + }, + "# Do not drop the host in the presence of a drive letter", + { + "input": "file://example.net/C:/", + "base": "about:blank", + "href": "file://example.net/C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "example.net", + "hostname": "example.net", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "file://1.2.3.4/C:/", + "base": "about:blank", + "href": "file://1.2.3.4/C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "1.2.3.4", + "hostname": "1.2.3.4", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "file://[1::8]/C:/", + "base": "about:blank", + "href": "file://[1::8]/C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "[1::8]", + "hostname": "[1::8]", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + "# Copy the host from the base URL in the following cases", + { + "input": "C|/", + "base": "file://host/", + "href": "file://host/C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "/C:/", + "base": "file://host/", + "href": "file://host/C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "file:C:/", + "base": "file://host/", + "href": "file://host/C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "file:/C:/", + "base": "file://host/", + "href": "file://host/C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + "# Copy the empty host from the input in the following cases", + { + "input": "//C:/", + "base": "file://host/", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "file://C:/", + "base": "file://host/", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "///C:/", + "base": "file://host/", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "file:///C:/", + "base": "file://host/", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + "# Windows drive letter quirk (no host)", + { + "input": "file:/C|/", + "base": "about:blank", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "file://C|/", + "base": "about:blank", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + "# file URLs without base URL by Rimas Misevičius", + { + "input": "file:", + "base": "about:blank", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file:?q=v", + "base": "about:blank", + "href": "file:///?q=v", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "?q=v", + "hash": "" + }, + { + "input": "file:#frag", + "base": "about:blank", + "href": "file:///#frag", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "#frag" + }, + "# file: drive letter cases from https://crbug.com/1078698", + { + "input": "file:///Y:", + "base": "about:blank", + "href": "file:///Y:", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/Y:", + "search": "", + "hash": "" + }, + { + "input": "file:///Y:/", + "base": "about:blank", + "href": "file:///Y:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/Y:/", + "search": "", + "hash": "" + }, + { + "input": "file:///./Y", + "base": "about:blank", + "href": "file:///Y", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/Y", + "search": "", + "hash": "" + }, + { + "input": "file:///./Y:", + "base": "about:blank", + "href": "file:///Y:", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/Y:", + "search": "", + "hash": "" + }, + { + "input": "\\\\\\.\\Y:", + "base": "about:blank", + "failure": true + }, + "# file: drive letter cases from https://crbug.com/1078698 but lowercased", + { + "input": "file:///y:", + "base": "about:blank", + "href": "file:///y:", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/y:", + "search": "", + "hash": "" + }, + { + "input": "file:///y:/", + "base": "about:blank", + "href": "file:///y:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/y:/", + "search": "", + "hash": "" + }, + { + "input": "file:///./y", + "base": "about:blank", + "href": "file:///y", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/y", + "search": "", + "hash": "" + }, + { + "input": "file:///./y:", + "base": "about:blank", + "href": "file:///y:", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/y:", + "search": "", + "hash": "" + }, + { + "input": "\\\\\\.\\y:", + "base": "about:blank", + "failure": true + }, + "# Additional file URL tests for (https://github.com/whatwg/url/issues/405)", + { + "input": "file://localhost//a//../..//foo", + "base": "about:blank", + "href": "file://///foo", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "///foo", + "search": "", + "hash": "" + }, + { + "input": "file://localhost////foo", + "base": "about:blank", + "href": "file://////foo", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "////foo", + "search": "", + "hash": "" + }, + { + "input": "file:////foo", + "base": "about:blank", + "href": "file:////foo", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//foo", + "search": "", + "hash": "" + }, + { + "input": "file:///one/two", + "base": "file:///", + "href": "file:///one/two", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/one/two", + "search": "", + "hash": "" + }, + { + "input": "file:////one/two", + "base": "file:///", + "href": "file:////one/two", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//one/two", + "search": "", + "hash": "" + }, + { + "input": "//one/two", + "base": "file:///", + "href": "file://one/two", + "protocol": "file:", + "username": "", + "password": "", + "host": "one", + "hostname": "one", + "port": "", + "pathname": "/two", + "search": "", + "hash": "" + }, + { + "input": "///one/two", + "base": "file:///", + "href": "file:///one/two", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/one/two", + "search": "", + "hash": "" + }, + { + "input": "////one/two", + "base": "file:///", + "href": "file:////one/two", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//one/two", + "search": "", + "hash": "" + }, + { + "input": "file:///.//", + "base": "file:////", + "href": "file:////", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + "File URL tests for https://github.com/whatwg/url/issues/549", + { + "input": "file:.//p", + "base": "about:blank", + "href": "file:////p", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//p", + "search": "", + "hash": "" + }, + { + "input": "file:/.//p", + "base": "about:blank", + "href": "file:////p", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//p", + "search": "", + "hash": "" + }, + "# IPv6 tests", + { + "input": "http://[1:0::]", + "base": "http://example.net/", + "href": "http://[1::]/", + "origin": "http://[1::]", + "protocol": "http:", + "username": "", + "password": "", + "host": "[1::]", + "hostname": "[1::]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://[0:1:2:3:4:5:6:7:8]", + "base": "http://example.net/", + "failure": true + }, + { + "input": "https://[0::0::0]", + "base": "about:blank", + "failure": true + }, + { + "input": "https://[0:.0]", + "base": "about:blank", + "failure": true + }, + { + "input": "https://[0:0:]", + "base": "about:blank", + "failure": true + }, + { + "input": "https://[0:1:2:3:4:5:6:7.0.0.0.1]", + "base": "about:blank", + "failure": true + }, + { + "input": "https://[0:1.00.0.0.0]", + "base": "about:blank", + "failure": true + }, + { + "input": "https://[0:1.290.0.0.0]", + "base": "about:blank", + "failure": true + }, + { + "input": "https://[0:1.23.23]", + "base": "about:blank", + "failure": true + }, + "# Empty host", + { + "input": "http://?", + "base": "about:blank", + "failure": true + }, + { + "input": "http://#", + "base": "about:blank", + "failure": true + }, + "Port overflow (2^32 + 81)", + { + "input": "http://f:4294967377/c", + "base": "http://example.org/", + "failure": true + }, + "Port overflow (2^64 + 81)", + { + "input": "http://f:18446744073709551697/c", + "base": "http://example.org/", + "failure": true + }, + "Port overflow (2^128 + 81)", + { + "input": "http://f:340282366920938463463374607431768211537/c", + "base": "http://example.org/", + "failure": true + }, + "# Non-special-URL path tests", + { + "input": "sc://ñ", + "base": "about:blank", + "href": "sc://%C3%B1", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, + { + "input": "sc://ñ?x", + "base": "about:blank", + "href": "sc://%C3%B1?x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "?x", + "hash": "" + }, + { + "input": "sc://ñ#x", + "base": "about:blank", + "href": "sc://%C3%B1#x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "", + "hash": "#x" + }, + { + "input": "#x", + "base": "sc://ñ", + "href": "sc://%C3%B1#x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "", + "hash": "#x" + }, + { + "input": "?x", + "base": "sc://ñ", + "href": "sc://%C3%B1?x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "?x", + "hash": "" + }, + { + "input": "sc://?", + "base": "about:blank", + "href": "sc://?", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, + { + "input": "sc://#", + "base": "about:blank", + "href": "sc://#", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, + { + "input": "///", + "base": "sc://x/", + "href": "sc:///", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "////", + "base": "sc://x/", + "href": "sc:////", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + { + "input": "////x/", + "base": "sc://x/", + "href": "sc:////x/", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//x/", + "search": "", + "hash": "" + }, + { + "input": "tftp://foobar.com/someconfig;mode=netascii", + "base": "about:blank", + "href": "tftp://foobar.com/someconfig;mode=netascii", + "origin": "null", + "protocol": "tftp:", + "username": "", + "password": "", + "host": "foobar.com", + "hostname": "foobar.com", + "port": "", + "pathname": "/someconfig;mode=netascii", + "search": "", + "hash": "" + }, + { + "input": "telnet://user:pass@foobar.com:23/", + "base": "about:blank", + "href": "telnet://user:pass@foobar.com:23/", + "origin": "null", + "protocol": "telnet:", + "username": "user", + "password": "pass", + "host": "foobar.com:23", + "hostname": "foobar.com", + "port": "23", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ut2004://10.10.10.10:7777/Index.ut2", + "base": "about:blank", + "href": "ut2004://10.10.10.10:7777/Index.ut2", + "origin": "null", + "protocol": "ut2004:", + "username": "", + "password": "", + "host": "10.10.10.10:7777", + "hostname": "10.10.10.10", + "port": "7777", + "pathname": "/Index.ut2", + "search": "", + "hash": "" + }, + { + "input": "redis://foo:bar@somehost:6379/0?baz=bam&qux=baz", + "base": "about:blank", + "href": "redis://foo:bar@somehost:6379/0?baz=bam&qux=baz", + "origin": "null", + "protocol": "redis:", + "username": "foo", + "password": "bar", + "host": "somehost:6379", + "hostname": "somehost", + "port": "6379", + "pathname": "/0", + "search": "?baz=bam&qux=baz", + "hash": "" + }, + { + "input": "rsync://foo@host:911/sup", + "base": "about:blank", + "href": "rsync://foo@host:911/sup", + "origin": "null", + "protocol": "rsync:", + "username": "foo", + "password": "", + "host": "host:911", + "hostname": "host", + "port": "911", + "pathname": "/sup", + "search": "", + "hash": "" + }, + { + "input": "git://github.com/foo/bar.git", + "base": "about:blank", + "href": "git://github.com/foo/bar.git", + "origin": "null", + "protocol": "git:", + "username": "", + "password": "", + "host": "github.com", + "hostname": "github.com", + "port": "", + "pathname": "/foo/bar.git", + "search": "", + "hash": "" + }, + { + "input": "irc://myserver.com:6999/channel?passwd", + "base": "about:blank", + "href": "irc://myserver.com:6999/channel?passwd", + "origin": "null", + "protocol": "irc:", + "username": "", + "password": "", + "host": "myserver.com:6999", + "hostname": "myserver.com", + "port": "6999", + "pathname": "/channel", + "search": "?passwd", + "hash": "" + }, + { + "input": "dns://fw.example.org:9999/foo.bar.org?type=TXT", + "base": "about:blank", + "href": "dns://fw.example.org:9999/foo.bar.org?type=TXT", + "origin": "null", + "protocol": "dns:", + "username": "", + "password": "", + "host": "fw.example.org:9999", + "hostname": "fw.example.org", + "port": "9999", + "pathname": "/foo.bar.org", + "search": "?type=TXT", + "hash": "" + }, + { + "input": "ldap://localhost:389/ou=People,o=JNDITutorial", + "base": "about:blank", + "href": "ldap://localhost:389/ou=People,o=JNDITutorial", + "origin": "null", + "protocol": "ldap:", + "username": "", + "password": "", + "host": "localhost:389", + "hostname": "localhost", + "port": "389", + "pathname": "/ou=People,o=JNDITutorial", + "search": "", + "hash": "" + }, + { + "input": "git+https://github.com/foo/bar", + "base": "about:blank", + "href": "git+https://github.com/foo/bar", + "origin": "null", + "protocol": "git+https:", + "username": "", + "password": "", + "host": "github.com", + "hostname": "github.com", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "urn:ietf:rfc:2648", + "base": "about:blank", + "href": "urn:ietf:rfc:2648", + "origin": "null", + "protocol": "urn:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "ietf:rfc:2648", + "search": "", + "hash": "" + }, + { + "input": "tag:joe@example.org,2001:foo/bar", + "base": "about:blank", + "href": "tag:joe@example.org,2001:foo/bar", + "origin": "null", + "protocol": "tag:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "joe@example.org,2001:foo/bar", + "search": "", + "hash": "" + }, + "Serialize /. in path", + { + "input": "non-spec:/.//", + "base": "about:blank", + "href": "non-spec:/.//", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + { + "input": "non-spec:/..//", + "base": "about:blank", + "href": "non-spec:/.//", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + { + "input": "non-spec:/a/..//", + "base": "about:blank", + "href": "non-spec:/.//", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + { + "input": "non-spec:/.//path", + "base": "about:blank", + "href": "non-spec:/.//path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//path", + "search": "", + "hash": "" + }, + { + "input": "non-spec:/..//path", + "base": "about:blank", + "href": "non-spec:/.//path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//path", + "search": "", + "hash": "" + }, + { + "input": "non-spec:/a/..//path", + "base": "about:blank", + "href": "non-spec:/.//path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//path", + "search": "", + "hash": "" + }, + { + "input": "/.//path", + "base": "non-spec:/p", + "href": "non-spec:/.//path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//path", + "search": "", + "hash": "" + }, + { + "input": "/..//path", + "base": "non-spec:/p", + "href": "non-spec:/.//path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//path", + "search": "", + "hash": "" + }, + { + "input": "..//path", + "base": "non-spec:/p", + "href": "non-spec:/.//path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//path", + "search": "", + "hash": "" + }, + { + "input": "a/..//path", + "base": "non-spec:/p", + "href": "non-spec:/.//path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//path", + "search": "", + "hash": "" + }, + { + "input": "", + "base": "non-spec:/..//p", + "href": "non-spec:/.//p", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//p", + "search": "", + "hash": "" + }, + { + "input": "path", + "base": "non-spec:/..//p", + "href": "non-spec:/.//path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//path", + "search": "", + "hash": "" + }, + "Do not serialize /. in path", + { + "input": "../path", + "base": "non-spec:/.//p", + "href": "non-spec:/path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/path", + "search": "", + "hash": "" + }, + "# percent encoded hosts in non-special-URLs", + { + "input": "non-special://%E2%80%A0/", + "base": "about:blank", + "href": "non-special://%E2%80%A0/", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "%E2%80%A0", + "hostname": "%E2%80%A0", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "non-special://H%4fSt/path", + "base": "about:blank", + "href": "non-special://H%4fSt/path", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "H%4fSt", + "hostname": "H%4fSt", + "port": "", + "pathname": "/path", + "search": "", + "hash": "" + }, + "# IPv6 in non-special-URLs", + { + "input": "non-special://[1:2:0:0:5:0:0:0]/", + "base": "about:blank", + "href": "non-special://[1:2:0:0:5::]/", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "[1:2:0:0:5::]", + "hostname": "[1:2:0:0:5::]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "non-special://[1:2:0:0:0:0:0:3]/", + "base": "about:blank", + "href": "non-special://[1:2::3]/", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "[1:2::3]", + "hostname": "[1:2::3]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "non-special://[1:2::3]:80/", + "base": "about:blank", + "href": "non-special://[1:2::3]:80/", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "[1:2::3]:80", + "hostname": "[1:2::3]", + "port": "80", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "non-special://[:80/", + "base": "about:blank", + "failure": true + }, + { + "input": "blob:https://example.com:443/", + "base": "about:blank", + "href": "blob:https://example.com:443/", + "protocol": "blob:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "https://example.com:443/", + "search": "", + "hash": "" + }, + { + "input": "blob:d3958f5c-0777-0845-9dcf-2cb28783acaf", + "base": "about:blank", + "href": "blob:d3958f5c-0777-0845-9dcf-2cb28783acaf", + "protocol": "blob:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "d3958f5c-0777-0845-9dcf-2cb28783acaf", + "search": "", + "hash": "" + }, + "Invalid IPv4 radix digits", + { + "input": "http://0177.0.0.0189", + "base": "about:blank", + "href": "http://0177.0.0.0189/", + "protocol": "http:", + "username": "", + "password": "", + "host": "0177.0.0.0189", + "hostname": "0177.0.0.0189", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://0x7f.0.0.0x7g", + "base": "about:blank", + "href": "http://0x7f.0.0.0x7g/", + "protocol": "http:", + "username": "", + "password": "", + "host": "0x7f.0.0.0x7g", + "hostname": "0x7f.0.0.0x7g", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://0X7F.0.0.0X7G", + "base": "about:blank", + "href": "http://0x7f.0.0.0x7g/", + "protocol": "http:", + "username": "", + "password": "", + "host": "0x7f.0.0.0x7g", + "hostname": "0x7f.0.0.0x7g", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "Invalid IPv4 portion of IPv6 address", + { + "input": "http://[::127.0.0.0.1]", + "base": "about:blank", + "failure": true + }, + "Uncompressed IPv6 addresses with 0", + { + "input": "http://[0:1:0:1:0:1:0:1]", + "base": "about:blank", + "href": "http://[0:1:0:1:0:1:0:1]/", + "protocol": "http:", + "username": "", + "password": "", + "host": "[0:1:0:1:0:1:0:1]", + "hostname": "[0:1:0:1:0:1:0:1]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://[1:0:1:0:1:0:1:0]", + "base": "about:blank", + "href": "http://[1:0:1:0:1:0:1:0]/", + "protocol": "http:", + "username": "", + "password": "", + "host": "[1:0:1:0:1:0:1:0]", + "hostname": "[1:0:1:0:1:0:1:0]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "Percent-encoded query and fragment", + { + "input": "http://example.org/test?\u0022", + "base": "about:blank", + "href": "http://example.org/test?%22", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?%22", + "hash": "" + }, + { + "input": "http://example.org/test?\u0023", + "base": "about:blank", + "href": "http://example.org/test?#", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "", + "hash": "" + }, + { + "input": "http://example.org/test?\u003C", + "base": "about:blank", + "href": "http://example.org/test?%3C", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?%3C", + "hash": "" + }, + { + "input": "http://example.org/test?\u003E", + "base": "about:blank", + "href": "http://example.org/test?%3E", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?%3E", + "hash": "" + }, + { + "input": "http://example.org/test?\u2323", + "base": "about:blank", + "href": "http://example.org/test?%E2%8C%A3", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?%E2%8C%A3", + "hash": "" + }, + { + "input": "http://example.org/test?%23%23", + "base": "about:blank", + "href": "http://example.org/test?%23%23", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?%23%23", + "hash": "" + }, + { + "input": "http://example.org/test?%GH", + "base": "about:blank", + "href": "http://example.org/test?%GH", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?%GH", + "hash": "" + }, + { + "input": "http://example.org/test?a#%EF", + "base": "about:blank", + "href": "http://example.org/test?a#%EF", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?a", + "hash": "#%EF" + }, + { + "input": "http://example.org/test?a#%GH", + "base": "about:blank", + "href": "http://example.org/test?a#%GH", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?a", + "hash": "#%GH" + }, + "URLs that require a non-about:blank base. (Also serve as invalid base tests.)", + { + "input": "a", + "base": "about:blank", + "failure": true + }, + { + "input": "a/", + "base": "about:blank", + "failure": true + }, + { + "input": "a//", + "base": "about:blank", + "failure": true + }, + "Bases that don't fail to parse but fail to be bases", + { + "input": "test-a-colon.html", + "base": "a:", + "failure": true + }, + { + "input": "test-a-colon-b.html", + "base": "a:b", + "failure": true + }, + "Other base URL tests, that must succeed", + { + "input": "test-a-colon-slash.html", + "base": "a:/", + "href": "a:/test-a-colon-slash.html", + "protocol": "a:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test-a-colon-slash.html", + "search": "", + "hash": "" + }, + { + "input": "test-a-colon-slash-slash.html", + "base": "a://", + "href": "a:///test-a-colon-slash-slash.html", + "protocol": "a:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test-a-colon-slash-slash.html", + "search": "", + "hash": "" + }, + { + "input": "test-a-colon-slash-b.html", + "base": "a:/b", + "href": "a:/test-a-colon-slash-b.html", + "protocol": "a:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test-a-colon-slash-b.html", + "search": "", + "hash": "" + }, + { + "input": "test-a-colon-slash-slash-b.html", + "base": "a://b", + "href": "a://b/test-a-colon-slash-slash-b.html", + "protocol": "a:", + "username": "", + "password": "", + "host": "b", + "hostname": "b", + "port": "", + "pathname": "/test-a-colon-slash-slash-b.html", + "search": "", + "hash": "" + }, + "Null code point in fragment", + { + "input": "http://example.org/test?a#b\u0000c", + "base": "about:blank", + "href": "http://example.org/test?a#b%00c", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?a", + "hash": "#b%00c" + }, + { + "input": "non-spec://example.org/test?a#b\u0000c", + "base": "about:blank", + "href": "non-spec://example.org/test?a#b%00c", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?a", + "hash": "#b%00c" + }, + { + "input": "non-spec:/test?a#b\u0000c", + "base": "about:blank", + "href": "non-spec:/test?a#b%00c", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "?a", + "hash": "#b%00c" + }, + "First scheme char - not allowed: https://github.com/whatwg/url/issues/464", + { + "input": "10.0.0.7:8080/foo.html", + "base": "file:///some/dir/bar.html", + "href": "file:///some/dir/10.0.0.7:8080/foo.html", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/some/dir/10.0.0.7:8080/foo.html", + "search": "", + "hash": "" + }, + "Subsequent scheme chars - not allowed", + { + "input": "a!@$*=/foo.html", + "base": "file:///some/dir/bar.html", + "href": "file:///some/dir/a!@$*=/foo.html", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/some/dir/a!@$*=/foo.html", + "search": "", + "hash": "" + }, + "First and subsequent scheme chars - allowed", + { + "input": "a1234567890-+.:foo/bar", + "base": "http://example.com/dir/file", + "href": "a1234567890-+.:foo/bar", + "protocol": "a1234567890-+.:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "foo/bar", + "search": "", + "hash": "" + }, + "IDNA ignored code points in file URLs hosts", + { + "input": "file://a\u00ADb/p", + "base": "about:blank", + "href": "file://ab/p", + "protocol": "file:", + "username": "", + "password": "", + "host": "ab", + "hostname": "ab", + "port": "", + "pathname": "/p", + "search": "", + "hash": "" + }, + { + "input": "file://a%C2%ADb/p", + "base": "about:blank", + "href": "file://ab/p", + "protocol": "file:", + "username": "", + "password": "", + "host": "ab", + "hostname": "ab", + "port": "", + "pathname": "/p", + "search": "", + "hash": "" + }, + "Empty host after the domain to ASCII", + { + "input": "file://\u00ad/p", + "base": "about:blank", + "failure": true + }, + { + "input": "file://%C2%AD/p", + "base": "about:blank", + "failure": true + }, + { + "input": "file://xn--/p", + "base": "about:blank", + "failure": true + }, + "https://bugzilla.mozilla.org/show_bug.cgi?id=1647058", + { + "input": "#link", + "base": "https://example.org/##link", + "href": "https://example.org/#link", + "protocol": "https:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/", + "search": "", + "hash": "#link" + }, + "UTF-8 percent-encode of C0 control percent-encode set and supersets", + { + "input": "non-special:cannot-be-a-base-url-\u0000\u0001\u001F\u001E\u007E\u007F\u0080", + "base": "about:blank", + "hash": "", + "host": "", + "hostname": "", + "href": "non-special:cannot-be-a-base-url-%00%01%1F%1E~%7F%C2%80", + "origin": "null", + "password": "", + "pathname": "cannot-be-a-base-url-%00%01%1F%1E~%7F%C2%80", + "port": "", + "protocol": "non-special:", + "search": "", + "username": "" + }, + { + "input": "https://www.example.com/path{\u007Fpath.html?query'\u007F=query#fragment<\u007Ffragment", + "base": "about:blank", + "hash": "#fragment%3C%7Ffragment", + "host": "www.example.com", + "hostname": "www.example.com", + "href": "https://www.example.com/path%7B%7Fpath.html?query%27%7F=query#fragment%3C%7Ffragment", + "origin": "https://www.example.com", + "password": "", + "pathname": "/path%7B%7Fpath.html", + "port": "", + "protocol": "https:", + "search": "?query%27%7F=query", + "username": "" + }, + { + "input": "https://user:pass[\u007F@foo/bar", + "base": "http://example.org", + "hash": "", + "host": "foo", + "hostname": "foo", + "href": "https://user:pass%5B%7F@foo/bar", + "origin": "https://foo", + "password": "pass%5B%7F", + "pathname": "/bar", + "port": "", + "protocol": "https:", + "search": "", + "username": "user" + }, + "Tests for the distinct percent-encode sets", + { + "input": "foo:// !\"$%&'()*+,-.;<=>@[\\]^_`{|}~@host/", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href": "foo://%20!%22$%&'()*+,-.%3B%3C%3D%3E%40%5B%5C%5D%5E_%60%7B%7C%7D~@host/", + "origin": "null", + "password": "", + "pathname": "/", + "port":"", + "protocol": "foo:", + "search": "", + "username": "%20!%22$%&'()*+,-.%3B%3C%3D%3E%40%5B%5C%5D%5E_%60%7B%7C%7D~" + }, + { + "input": "wss:// !\"$%&'()*+,-.;<=>@[]^_`{|}~@host/", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href": "wss://%20!%22$%&'()*+,-.%3B%3C%3D%3E%40%5B%5D%5E_%60%7B%7C%7D~@host/", + "origin": "wss://host", + "password": "", + "pathname": "/", + "port":"", + "protocol": "wss:", + "search": "", + "username": "%20!%22$%&'()*+,-.%3B%3C%3D%3E%40%5B%5D%5E_%60%7B%7C%7D~" + }, + { + "input": "foo://joe: !\"$%&'()*+,-.:;<=>@[\\]^_`{|}~@host/", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href": "foo://joe:%20!%22$%&'()*+,-.%3A%3B%3C%3D%3E%40%5B%5C%5D%5E_%60%7B%7C%7D~@host/", + "origin": "null", + "password": "%20!%22$%&'()*+,-.%3A%3B%3C%3D%3E%40%5B%5C%5D%5E_%60%7B%7C%7D~", + "pathname": "/", + "port":"", + "protocol": "foo:", + "search": "", + "username": "joe" + }, + { + "input": "wss://joe: !\"$%&'()*+,-.:;<=>@[]^_`{|}~@host/", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href": "wss://joe:%20!%22$%&'()*+,-.%3A%3B%3C%3D%3E%40%5B%5D%5E_%60%7B%7C%7D~@host/", + "origin": "wss://host", + "password": "%20!%22$%&'()*+,-.%3A%3B%3C%3D%3E%40%5B%5D%5E_%60%7B%7C%7D~", + "pathname": "/", + "port":"", + "protocol": "wss:", + "search": "", + "username": "joe" + }, + { "input": "foo://!\"$%&'()*+,-.;=_`{}~/", + "base": "about:blank", + "hash": "", + "host": "!\"$%&'()*+,-.;=_`{}~", + "hostname": "!\"$%&'()*+,-.;=_`{}~", + "href":"foo://!\"$%&'()*+,-.;=_`{}~/", + "origin": "null", + "password": "", + "pathname": "/", + "port":"", + "protocol": "foo:", + "search": "", + "username": "" + }, + { + "input": "wss://!\"$&'()*+,-.;=_`{}~/", + "base": "about:blank", + "hash": "", + "host": "!\"$&'()*+,-.;=_`{}~", + "hostname": "!\"$&'()*+,-.;=_`{}~", + "href":"wss://!\"$&'()*+,-.;=_`{}~/", + "origin": "wss://!\"$&'()*+,-.;=_`{}~", + "password": "", + "pathname": "/", + "port":"", + "protocol": "wss:", + "search": "", + "username": "" + }, + { + "input": "foo://host/ !\"$%&'()*+,-./:;<=>@[\\]^_`{|}~", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href": "foo://host/%20!%22$%&'()*+,-./:;%3C=%3E@[\\]^_%60%7B|%7D~", + "origin": "null", + "password": "", + "pathname": "/%20!%22$%&'()*+,-./:;%3C=%3E@[\\]^_%60%7B|%7D~", + "port":"", + "protocol": "foo:", + "search": "", + "username": "" + }, + { + "input": "wss://host/ !\"$%&'()*+,-./:;<=>@[\\]^_`{|}~", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href": "wss://host/%20!%22$%&'()*+,-./:;%3C=%3E@[/]^_%60%7B|%7D~", + "origin": "wss://host", + "password": "", + "pathname": "/%20!%22$%&'()*+,-./:;%3C=%3E@[/]^_%60%7B|%7D~", + "port":"", + "protocol": "wss:", + "search": "", + "username": "" + }, + { + "input": "foo://host/dir/? !\"$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href": "foo://host/dir/?%20!%22$%&'()*+,-./:;%3C=%3E?@[\\]^_`{|}~", + "origin": "null", + "password": "", + "pathname": "/dir/", + "port":"", + "protocol": "foo:", + "search": "?%20!%22$%&'()*+,-./:;%3C=%3E?@[\\]^_`{|}~", + "username": "" + }, + { + "input": "wss://host/dir/? !\"$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href": "wss://host/dir/?%20!%22$%&%27()*+,-./:;%3C=%3E?@[\\]^_`{|}~", + "origin": "wss://host", + "password": "", + "pathname": "/dir/", + "port":"", + "protocol": "wss:", + "search": "?%20!%22$%&%27()*+,-./:;%3C=%3E?@[\\]^_`{|}~", + "username": "" + }, + { + "input": "foo://host/dir/# !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + "base": "about:blank", + "hash": "#%20!%22#$%&'()*+,-./:;%3C=%3E?@[\\]^_%60{|}~", + "host": "host", + "hostname": "host", + "href": "foo://host/dir/#%20!%22#$%&'()*+,-./:;%3C=%3E?@[\\]^_%60{|}~", + "origin": "null", + "password": "", + "pathname": "/dir/", + "port":"", + "protocol": "foo:", + "search": "", + "username": "" + }, + { + "input": "wss://host/dir/# !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + "base": "about:blank", + "hash": "#%20!%22#$%&'()*+,-./:;%3C=%3E?@[\\]^_%60{|}~", + "host": "host", + "hostname": "host", + "href": "wss://host/dir/#%20!%22#$%&'()*+,-./:;%3C=%3E?@[\\]^_%60{|}~", + "origin": "wss://host", + "password": "", + "pathname": "/dir/", + "port":"", + "protocol": "wss:", + "search": "", + "username": "" + } +] diff --git a/Tests/WebURLTests/SchemeKindTests.swift b/Tests/WebURLTests/SchemeKindTests.swift new file mode 100644 index 000000000..bd5136318 --- /dev/null +++ b/Tests/WebURLTests/SchemeKindTests.swift @@ -0,0 +1,84 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import XCTest + +@testable import WebURL + +/// Tests for `WebURL.SchemeKind`. +/// +final class SchemeKindTests: XCTestCase { + + func testParser() { + + let testData: [(String, WebURL.SchemeKind, Bool)] = [ + ("ftp", .ftp, true), + ("file", .file, true), + ("http", .http, true), + ("https", .https, true), + ("ws", .ws, true), + ("wss", .wss, true), + ("foo", .other, false), + ("✌️", .other, false), + ] + for (name, expectedSchemeKind, expectedIsSpecial) in testData { + XCTAssertEqual(WebURL.SchemeKind(parsing: name.utf8), expectedSchemeKind) + XCTAssertEqual(expectedSchemeKind.isSpecial, expectedIsSpecial) + // The parser should not allow any trailing content. + let nameWithSchemeTerminator = name + ":" + XCTAssertEqual(WebURL.SchemeKind(parsing: nameWithSchemeTerminator.utf8), .other) + let nameWithASCII = name + "x" + XCTAssertEqual(WebURL.SchemeKind(parsing: nameWithASCII.utf8), .other) + let nameWithNonASCII = name + "✌️" + XCTAssertEqual(WebURL.SchemeKind(parsing: nameWithNonASCII.utf8), .other) + } + XCTAssertEqual(WebURL.SchemeKind(parsing: "".utf8), .other) + XCTAssertEqual(WebURL.SchemeKind(parsing: "\n".utf8), .other) + } + + func testDefaultPorts() { + + let testData: [(WebURL.SchemeKind, UInt16?)] = [ + (.ftp, 21), + (.http, 80), + (.https, 443), + (.ws, 80), + (.wss, 443), + (.file, nil), + (.other, nil), + ] + for (schemeKind, expectedDefaultPort) in testData { + XCTAssertEqual(schemeKind.defaultPort, expectedDefaultPort) + if let defaultPort = expectedDefaultPort { + // isDefaultPortString should only return 'true' for the literal port number as an ASCII string. + let serialized = String(defaultPort) + XCTAssertTrue(schemeKind.isDefaultPort(utf8: serialized.utf8)) + XCTAssertFalse(schemeKind.isDefaultPort(utf8: (":" + serialized).utf8)) + XCTAssertFalse(schemeKind.isDefaultPort(utf8: (serialized + "0").utf8)) + XCTAssertFalse(schemeKind.isDefaultPort(utf8: (serialized + "\n").utf8)) + XCTAssertFalse(schemeKind.isDefaultPort(utf8: (serialized + "🦩").utf8)) + // Schemes with default ports are special. + XCTAssertTrue(schemeKind.isSpecial) + } else { + // If there is no default port, everything should return 'false'. + XCTAssertFalse(schemeKind.isDefaultPort(utf8: "80".utf8)) + XCTAssertFalse(schemeKind.isDefaultPort(utf8: ":80".utf8)) + XCTAssertFalse(schemeKind.isDefaultPort(utf8: ":80\n".utf8)) + XCTAssertFalse(schemeKind.isDefaultPort(utf8: "🦩".utf8)) + } + XCTAssertFalse(schemeKind.isDefaultPort(utf8: "".utf8)) + XCTAssertFalse(schemeKind.isDefaultPort(utf8: "\n".utf8)) + } + } +} diff --git a/Tests/WebURLTests/Utils.swift b/Tests/WebURLTests/Utils.swift new file mode 100644 index 000000000..3e5270371 --- /dev/null +++ b/Tests/WebURLTests/Utils.swift @@ -0,0 +1,79 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import XCTest + +@testable import WebURL + +/// Asserts that two sequences contain the same elements in the same order. +/// +func XCTAssertEqualElements( + _ left: Left, _ right: Right, file: StaticString = #file, line: UInt = #line +) where Left.Element == Right.Element, Left.Element: Equatable { + XCTAssertTrue(left.elementsEqual(right), file: file, line: line) +} + +/// Aseerts that a closure throws a particular error. +/// +func XCTAssertThrowsSpecific( + _ expectedError: E, file: StaticString = #file, line: UInt = #line, _ body: () throws -> Void +) where E: Error, E: Equatable { + do { + try body() + XCTFail("Expected an error to be thrown") + } catch let error as E { + XCTAssertEqual(error, expectedError) + } catch { + XCTFail("Unexpected error") + } +} + +/// A String containing all 128 ASCII characters (`0..<128`), in order. +/// +let stringWithEveryASCIICharacter: String = { + let asciiChars: Range = 0..<128 + let str = String(asciiChars.lazy.map { Character(UnicodeScalar($0)) }) + precondition(str.utf8.elementsEqual(asciiChars)) + return str +}() + + +// -------------------------------------------- +// MARK: - WebURL test utilities +// -------------------------------------------- + + +@inline(__always) +func checkDoesNotCopy(_ url: inout WebURL, _ body: (inout WebURL) -> Void) { + let addressBefore = url.utf8.withUnsafeBufferPointer { $0.baseAddress } + body(&url) + XCTAssertEqual(addressBefore, url.utf8.withUnsafeBufferPointer { $0.baseAddress }) +} + +/// Checks that the given URL returns precisely the same value when its serialized representation is re-parsed. +/// +func XCTAssertURLIsIdempotent(_ url: WebURL) { + var serialized = url.serialized + serialized.makeContiguousUTF8() + guard let reparsed = WebURL(serialized) else { + XCTFail("Failed to reparse URL string: \(serialized)") + return + } + // Check that the URLStructure (i.e. code-unit offsets, flags, etc) are the same. + XCTAssertTrue(url.storage.structure.describesSameStructure(as: reparsed.storage.structure)) + // Check that the code-units are the same. + XCTAssertEqualElements(url.utf8, reparsed.utf8) + // Triple check: check that the serialized representations are the same. + XCTAssertEqual(serialized, reparsed.serialized) +} diff --git a/Tests/WebURLTests/WebPlatformTests.swift b/Tests/WebURLTests/WebPlatformTests.swift new file mode 100644 index 000000000..1771de037 --- /dev/null +++ b/Tests/WebURLTests/WebPlatformTests.swift @@ -0,0 +1,135 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import WebURLTestSupport +import XCTest + +@testable import WebURL + +final class WebPlatformTests: ReportGeneratingTestCase {} + +func loadTestResource(name: String) -> Data? { + // Yeah. This is for real. + // I'm pretty massively disappointed that I need to do this. + #if os(macOS) + let url = Bundle.module.url(forResource: "Resources/\(name)", withExtension: "json")! + return try? Data(contentsOf: url) + #else + var path = #filePath + path.removeLast(22) // "WebPlatformTests.swift" + path += "Resources/\(name).json" + return FileManager.default.contents(atPath: path) + #endif +} + + +// -------------------------------------------- +// MARK: - URL Constructor +// -------------------------------------------- +// https://github.com/web-platform-tests/wpt/blob/master/url/resources/urltestdata.json +// at version 52e358a1209a23c42e9443641c7ed0ba23600c93 +// Adjusted to remove an invalid surrogate pair which Foundation's JSON parser refuses to parse. + + +extension WebPlatformTests { + + func testURLConstructor() throws { + let data = loadTestResource(name: "urltestdata")! + let testFile = try JSONDecoder().decode(WPTConstructorTest.TestFile.self, from: data) + assert( + testFile.tests.count == 696, + "Incorrect number of test cases. If you updated the test list, be sure to update the expected failure indexes" + ) + + var harness = WPTConstructorTest.WebURLReportHarness(expectedFailures: [ + // These test failures are due to us not having implemented the `domain2ascii` transform, + // often in combination with other features (e.g. with percent encoding). + // + 261, // domain2ascii: (no-break, zero-width, zero-width-no-break) are name-prepped away to nothing. + 263, // domain2ascii: U+3002 is mapped to U+002E (dot). + 269, // domain2ascii: fullwidth input should be converted to ASCII and NOT IDN-ized. + 274, // domain2ascii: Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN. + 275, // domain2ascii: Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN. + 286, // domain2ascii: Fullwidth and escaped UTF-8 fullwidth should still be treated as IP. + 369, // domain2ascii: Hosts and percent-encoding. + 370, // domain2ascii: Hosts and percent-encoding. + 582, // domain2ascii: IDNA ignored code points in file URLs hosts. + 583, // domain2ascii: IDNA ignored code points in file URLs hosts. + ]) + harness.runTests(testFile) + XCTAssert(harness.entriesSeen == 696, "Unexpected number of tests executed.") + XCTAssertFalse(harness.report.hasUnexpectedResults, "Test failed") + + let reportURL = fileURLForReport(named: "weburl_constructor_wpt.txt") + try harness.report.generateReport().write(to: reportURL, atomically: false, encoding: .utf8) + print("ℹ️ Report written to \(reportURL)") + } + + func testURLConstructor_additional() throws { + let data = loadTestResource(name: "additional_constructor_tests")! + let testFile = try JSONDecoder().decode(WPTConstructorTest.TestFile.self, from: data) + assert( + testFile.tests.count == 82, + "Incorrect number of test cases. If you updated the test list, be sure to update the expected failure indexes" + ) + + var harness = WPTConstructorTest.WebURLReportHarness() + harness.runTests(testFile) + XCTAssert(harness.entriesSeen == 82, "Unexpected number of tests executed.") + XCTAssertFalse(harness.report.hasUnexpectedResults, "Test failed") + + let reportURL = fileURLForReport(named: "weburl_constructor_more.txt") + try harness.report.generateReport().write(to: reportURL, atomically: false, encoding: .utf8) + print("ℹ️ Report written to \(reportURL)") + } +} + + +// -------------------------------------------- +// MARK: - Setters +// -------------------------------------------- +// https://github.com/web-platform-tests/wpt/blob/master/url/resources/setters_tests.json +// at version 050308a616a8388f1ad5d6e87eac0270fd35023f + + +extension WebPlatformTests { + + func testURLSetters() throws { + let data = loadTestResource(name: "setters_tests")! + let testFile = try JSONDecoder().decode(WPTSetterTest.TestFile.self, from: data) + + var harness = WPTSetterTest.WebURLReportHarness() + harness.runTests(testFile) + XCTAssertEqual(harness.entriesSeen, 156, "Unexpected number of tests executed.") + XCTAssertFalse(harness.report.hasUnexpectedResults, "Test failed") + + let reportURL = fileURLForReport(named: "weburl_setters_wpt.txt") + try harness.report.generateReport().write(to: reportURL, atomically: false, encoding: .utf8) + print("ℹ️ Report written to \(reportURL)") + } + + func testURLSetters_additional() throws { + let data = loadTestResource(name: "additional_setters_tests")! + let testFile = try JSONDecoder().decode(WPTSetterTest.TestFile.self, from: data) + + var harness = WPTSetterTest.WebURLReportHarness() + harness.runTests(testFile) + XCTAssertEqual(harness.entriesSeen, 5, "Unexpected number of tests executed.") + XCTAssertFalse(harness.report.hasUnexpectedResults, "Test failed") + + let reportURL = fileURLForReport(named: "weburl_setters_more.txt") + try harness.report.generateReport().write(to: reportURL, atomically: false, encoding: .utf8) + print("ℹ️ Report written to \(reportURL)") + } +} diff --git a/Tests/WebURLTests/WebURLTests.swift b/Tests/WebURLTests/WebURLTests.swift new file mode 100644 index 000000000..6ac81c094 --- /dev/null +++ b/Tests/WebURLTests/WebURLTests.swift @@ -0,0 +1,1016 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import WebURLTestSupport +import XCTest + +@testable import WebURL + +class WebURLTests: XCTestCase {} + +extension WebURLTests { + + /// Tests that setters copy to new storage when the mutated URL is not a unique reference. + /// + func testCopyOnWrite_nonUnique() { + + // TODO: Can we rule out copying due to needing more capacity or changing header type? + // - Maybe add an internal 'reserveCapacity' function? + // TODO: These are by no means all paths that each setter can take. + var url = WebURL("http://example.com/a/b?c=d&e=f#gh")! + let original = url + + func checkOriginalHasNotChanged() { + XCTAssertEqual(original.serialized, "http://example.com/a/b?c=d&e=f#gh") + XCTAssertEqual(original.scheme, "http") + } + // Scheme. + url.scheme = "https" + XCTAssertEqual(url.serialized, "https://example.com/a/b?c=d&e=f#gh") + XCTAssertEqual(url.scheme, "https") + XCTAssertURLIsIdempotent(url) + checkOriginalHasNotChanged() + url = original + // Username. + url.username = "user" + XCTAssertEqual(url.serialized, "http://user@example.com/a/b?c=d&e=f#gh") + XCTAssertEqual(url.username, "user") + XCTAssertURLIsIdempotent(url) + checkOriginalHasNotChanged() + url = original + // Password. + url.password = "pass" + XCTAssertEqual(url.serialized, "http://:pass@example.com/a/b?c=d&e=f#gh") + XCTAssertEqual(url.password, "pass") + XCTAssertURLIsIdempotent(url) + checkOriginalHasNotChanged() + url = original + // Hostname. + url.hostname = "test.test" + XCTAssertEqual(url.serialized, "http://test.test/a/b?c=d&e=f#gh") + XCTAssertEqual(url.hostname, "test.test") + XCTAssertURLIsIdempotent(url) + checkOriginalHasNotChanged() + url = original + // Port. + url.port = 8080 + XCTAssertEqual(url.serialized, "http://example.com:8080/a/b?c=d&e=f#gh") + XCTAssertEqual(url.port, 8080) + XCTAssertURLIsIdempotent(url) + checkOriginalHasNotChanged() + url = original + // Path. + url.path = "/foo/bar/baz" + XCTAssertEqual(url.serialized, "http://example.com/foo/bar/baz?c=d&e=f#gh") + XCTAssertEqual(url.path, "/foo/bar/baz") + XCTAssertURLIsIdempotent(url) + checkOriginalHasNotChanged() + url = original + // Query + url.query = "foo=bar&baz=qux" + XCTAssertEqual(url.serialized, "http://example.com/a/b?foo=bar&baz=qux#gh") + XCTAssertEqual(url.query, "foo=bar&baz=qux") + XCTAssertURLIsIdempotent(url) + checkOriginalHasNotChanged() + url = original + // Fragment + url.fragment = "foo" + XCTAssertEqual(url.serialized, "http://example.com/a/b?c=d&e=f#foo") + XCTAssertEqual(url.fragment, "foo") + XCTAssertURLIsIdempotent(url) + checkOriginalHasNotChanged() + } + + // Note: This is likely to be a bit fragile, since it relies on optimisations which might not happen at -Onone. + // For now, it works. + + /// Tests that setters on a uniquely referenced URL are performed in-place. + /// + func testCopyOnWrite_unique() { + + var url = WebURL("wss://user:pass@example.com:90/a/b?c=d&e=f#gh")! + XCTAssertEqual(url.serialized, "wss://user:pass@example.com:90/a/b?c=d&e=f#gh") + + // All new values must be the same length, so we can be sure we have enough capacity. + + // Scheme. + checkDoesNotCopy(&url) { + $0.scheme = "ftp" + } + XCTAssertEqual(url.serialized, "ftp://user:pass@example.com:90/a/b?c=d&e=f#gh") + XCTAssertURLIsIdempotent(url) + // Username. + checkDoesNotCopy(&url) { + $0.username = "resu" + } + XCTAssertEqual(url.serialized, "ftp://resu:pass@example.com:90/a/b?c=d&e=f#gh") + XCTAssertURLIsIdempotent(url) + // Password. + checkDoesNotCopy(&url) { + $0.password = "ssap" + } + XCTAssertEqual(url.serialized, "ftp://resu:ssap@example.com:90/a/b?c=d&e=f#gh") + XCTAssertURLIsIdempotent(url) + // Hostname. + checkDoesNotCopy(&url) { + $0.hostname = "moc.elpmaxe" + } + XCTAssertEqual(url.serialized, "ftp://resu:ssap@moc.elpmaxe:90/a/b?c=d&e=f#gh") + XCTAssertURLIsIdempotent(url) + // Port. + checkDoesNotCopy(&url) { + $0.port = 42 + } + XCTAssertEqual(url.serialized, "ftp://resu:ssap@moc.elpmaxe:42/a/b?c=d&e=f#gh") + XCTAssertURLIsIdempotent(url) + // Path + checkDoesNotCopy(&url) { + $0.path = "/j/k" + } + XCTAssertEqual(url.serialized, "ftp://resu:ssap@moc.elpmaxe:42/j/k?c=d&e=f#gh") + XCTAssertURLIsIdempotent(url) + // Query + checkDoesNotCopy(&url) { + $0.query = "m=n&o=p" + } + XCTAssertEqual(url.serialized, "ftp://resu:ssap@moc.elpmaxe:42/j/k?m=n&o=p#gh") + XCTAssertURLIsIdempotent(url) + // Fragment + checkDoesNotCopy(&url) { + $0.fragment = "zz" + } + XCTAssertEqual(url.serialized, "ftp://resu:ssap@moc.elpmaxe:42/j/k?m=n&o=p#zz") + XCTAssertURLIsIdempotent(url) + // Chained modifying wrappers. + checkDoesNotCopy(&url) { + $0.jsModel.swiftModel.jsModel.swiftModel.jsModel.swiftModel.fragment = "aa" + } + XCTAssertEqual(url.serialized, "ftp://resu:ssap@moc.elpmaxe:42/j/k?m=n&o=p#aa") + XCTAssertURLIsIdempotent(url) + } +} + +// WebURL component tests. +// +// The behaviour of getters and setters are tested via the JS model according to the WPT test files. +// However, the JS model is in many ways not ideal for use in Swift, so these tests only cover deviations from that +// model, including errors that can be thrown by the setters. + +extension WebURLTests { + + /// Tests the WebURL scheme setter. + /// + /// The Swift model deviates from the JS model in that it does not trim or filter the new value when setting. + /// + func testSchemeSetter() { + + do { + // [Throw] Invalid scheme. + var url = WebURL("http://example.com/a/b?c=d&e=f#gh")! + XCTAssertThrowsSpecific(URLSetterError.invalidScheme) { + try url.setScheme("🤯") + } + XCTAssertEqual(url.serialized, "http://example.com/a/b?c=d&e=f#gh") + XCTAssertURLIsIdempotent(url) + + // [Throw] Change of special-ness. + XCTAssertThrowsSpecific(URLSetterError.changeOfSchemeSpecialness) { + try url.setScheme("foo") + } + XCTAssertEqual(url.serialized, "http://example.com/a/b?c=d&e=f#gh") + XCTAssertURLIsIdempotent(url) + + // [Deviation] If there is content after the ":", the operation fails. The JS model silently discards it. + XCTAssertThrowsSpecific(URLSetterError.invalidScheme) { + try url.setScheme("http://foo/") + } + XCTAssertEqual(url.serialized, "http://example.com/a/b?c=d&e=f#gh") + XCTAssertURLIsIdempotent(url) + + // ":" is allowed as the final character, but not required. + XCTAssertNoThrow(try url.setScheme("ws")) + XCTAssertEqual(url.serialized, "ws://example.com/a/b?c=d&e=f#gh") + XCTAssertURLIsIdempotent(url) + + XCTAssertNoThrow(try url.setScheme("https:")) + XCTAssertEqual(url.serialized, "https://example.com/a/b?c=d&e=f#gh") + XCTAssertURLIsIdempotent(url) + + // [Deviation] Tabs and newlines are not ignored, cause setter to fail. The JS model ignores them. + XCTAssertThrowsSpecific(URLSetterError.invalidScheme) { + try url.setScheme("\th\nttp:") + } + XCTAssertEqual(url.serialized, "https://example.com/a/b?c=d&e=f#gh") + XCTAssertURLIsIdempotent(url) + } + + do { + // [Throw] URL with credentials or port changing to scheme which does not allow them. + var url = WebURL("http://user:pass@somehost/")! + XCTAssertThrowsSpecific(URLSetterError.newSchemeCannotHaveCredentialsOrPort) { + try url.setScheme("file") + } + XCTAssertNoThrow(try url.setScheme("https")) + XCTAssertEqual(url.serialized, "https://user:pass@somehost/") + XCTAssertURLIsIdempotent(url) + + url = WebURL("http://somehost:8080/")! + XCTAssertThrowsSpecific(URLSetterError.newSchemeCannotHaveCredentialsOrPort) { + try url.setScheme("file") + } + XCTAssertNoThrow(try url.setScheme("https")) + XCTAssertEqual(url.serialized, "https://somehost:8080/") + XCTAssertURLIsIdempotent(url) + } + + do { + // [Throw] URL with empty hostname changing to scheme which does not allow them. + var url = WebURL("file:///")! + XCTAssertThrowsSpecific(URLSetterError.newSchemeCannotHaveEmptyHostname) { + try url.setScheme("http") + } + XCTAssertNoThrow(try url.setScheme("file")) + XCTAssertEqual(url.serialized, "file:///") + XCTAssertURLIsIdempotent(url) + } + } + + /// Tests the Swift model 'username' property. + /// + /// The Swift model deviates from the JS model in that it presents empty/not present usernames as 'nil'. + /// + func testUsername() { + + // [Deviation] Empty usernames are entirely removed (including separator), + // therefore the Swift model returns 'nil' to mean 'not present'. + var url = WebURL("http://example.com/")! + XCTAssertNil(url.username) + XCTAssertEqual(url.serialized, "http://example.com/") + XCTAssertURLIsIdempotent(url) + + url.username = "some username" + XCTAssertEqual(url.username, "some%20username") + XCTAssertEqual(url.serialized, "http://some%20username@example.com/") + XCTAssertURLIsIdempotent(url) + + // [Deviation] Setting the empty string is the same as setting 'nil'. + url.username = "" + XCTAssertNil(url.username) + XCTAssertEqual(url.serialized, "http://example.com/") + XCTAssertURLIsIdempotent(url) + + url.username = "some username" + XCTAssertEqual(url.username, "some%20username") + XCTAssertEqual(url.serialized, "http://some%20username@example.com/") + XCTAssertURLIsIdempotent(url) + + // [Deviation] Setting 'nil' is the same as setting the empty string. + url.username = nil + XCTAssertNil(url.username) + XCTAssertEqual(url.serialized, "http://example.com/") + XCTAssertURLIsIdempotent(url) + + // [Throw] Setting credentials when the scheme does not allow them. + url = WebURL("file://somehost/p1/p2")! + XCTAssertNil(url.username) + XCTAssertEqual(url.serialized, "file://somehost/p1/p2") + XCTAssertThrowsSpecific(URLSetterError.cannotHaveCredentialsOrPort) { try url.setUsername("user") } + XCTAssertEqual(url.serialized, "file://somehost/p1/p2") + XCTAssertURLIsIdempotent(url) + } + + /// Tests the Swift model 'password' property. + /// + /// The Swift model deviates from the JS model in that it presents empty/not present passwords as 'nil'. + /// + func testPassword() { + + // [Deviation] Empty passwords are entirely removed (including separator), + // therefore the Swift model returns 'nil' to mean 'not present'. + var url = WebURL("http://example.com/")! + XCTAssertNil(url.password) + XCTAssertEqual(url.serialized, "http://example.com/") + XCTAssertURLIsIdempotent(url) + + url.password = "🤫" + XCTAssertEqual(url.password, "%F0%9F%A4%AB") + XCTAssertEqual(url.serialized, "http://:%F0%9F%A4%AB@example.com/") + XCTAssertURLIsIdempotent(url) + + // [Deviation] Setting the empty string is the same as setting 'nil'. + url.password = "" + XCTAssertNil(url.password) + XCTAssertEqual(url.serialized, "http://example.com/") + XCTAssertURLIsIdempotent(url) + + url.password = "🤫" + XCTAssertEqual(url.password, "%F0%9F%A4%AB") + XCTAssertEqual(url.serialized, "http://:%F0%9F%A4%AB@example.com/") + XCTAssertURLIsIdempotent(url) + + // [Deviation] Setting the 'nil' is the same as setting the empty string. + url.password = nil + XCTAssertNil(url.password) + XCTAssertEqual(url.serialized, "http://example.com/") + XCTAssertURLIsIdempotent(url) + + // [Throw]: Setting credentials when the scheme does not allow them. + url = WebURL("file://somehost/p1/p2")! + XCTAssertNil(url.password) + XCTAssertEqual(url.serialized, "file://somehost/p1/p2") + XCTAssertThrowsSpecific(URLSetterError.cannotHaveCredentialsOrPort) { try url.setPassword("pass") } + XCTAssertEqual(url.serialized, "file://somehost/p1/p2") + XCTAssertURLIsIdempotent(url) + } + + /// Tests the Swift model 'hostname' property. + /// + /// The Swift model deviates from the JS model in that it does not trim or filter the new value when setting, can represent not-present hosts as 'nil', and supports + /// setting hosts to 'nil'. + /// + func testHostname() { + + // [Deviation] Hostname is not trimmed; invalid host code points such as "?", "#", or ":" cause the setter to fail. + var url = WebURL("http://example.com/")! + XCTAssertThrowsSpecific(URLSetterError.invalidHostname) { try url.setHostname("hello?") } + XCTAssertEqual(url.serialized, "http://example.com/") + XCTAssertURLIsIdempotent(url) + + XCTAssertThrowsSpecific(URLSetterError.invalidHostname) { try url.setHostname("hello#") } + XCTAssertEqual(url.serialized, "http://example.com/") + XCTAssertURLIsIdempotent(url) + + XCTAssertThrowsSpecific(URLSetterError.invalidHostname) { try url.setHostname("hel:lo") } + XCTAssertEqual(url.serialized, "http://example.com/") + XCTAssertURLIsIdempotent(url) + + // [Deviation] Hostname is not filtered. Tabs and newlines are invalid host code points, cause setter to fail. + XCTAssertThrowsSpecific(URLSetterError.invalidHostname) { try url.setHostname("\thel\nlo") } + XCTAssertEqual(url.serialized, "http://example.com/") + XCTAssertURLIsIdempotent(url) + + // [Deviation] Swift model can distinguish between empty and not-present hostnames. + XCTAssertNil(WebURL("unix:/some/path")!.hostname) + XCTAssertEqual(WebURL("unix:///some/path")!.hostname, "") + + // [Deviation] Swift model allows setting hostname to nil (removing it, not just making it empty). + // Special schemes do not allow 'nil' hostnames. + XCTAssertEqual(url.scheme, "http") + XCTAssertThrowsSpecific(URLSetterError.schemeDoesNotSupportNilOrEmptyHostnames) { + try url.setHostname(String?.none) + } + XCTAssertEqual(url.serialized, "http://example.com/") + XCTAssertURLIsIdempotent(url) + + // 'file' allows empty hostnames, but not 'nil' hostnames. + url = WebURL("file:///some/path")! + XCTAssertEqual(url.hostname, "") + XCTAssertEqual(url.scheme, "file") + XCTAssertEqual(url.serialized, "file:///some/path") + XCTAssertThrowsSpecific(URLSetterError.schemeDoesNotSupportNilOrEmptyHostnames) { + try url.setHostname(String?.none) + } + XCTAssertEqual(url.serialized, "file:///some/path") + XCTAssertURLIsIdempotent(url) + + // Non-special schemes allow 'nil' hostnames. + url = WebURL("unix:///some/path")! + XCTAssertNoThrow(try url.setHostname(String?.none)) + XCTAssertEqual(url.serialized, "unix:/some/path") + XCTAssertURLIsIdempotent(url) + // But not if they already have credentials or ports. + url = WebURL("unix://user:pass@example/some/path")! + XCTAssertEqual(url.hostname, "example") + XCTAssertEqual(url.username, "user") + XCTAssertThrowsSpecific(URLSetterError.cannotSetEmptyHostnameWithCredentialsOrPort) { + try url.setHostname(String?.none) + } + XCTAssertEqual(url.serialized, "unix://user:pass@example/some/path") + XCTAssertURLIsIdempotent(url) + + url = WebURL("unix://example:99/some/path")! + XCTAssertEqual(url.hostname, "example") + XCTAssertEqual(url.port, 99) + XCTAssertThrowsSpecific(URLSetterError.cannotSetEmptyHostnameWithCredentialsOrPort) { + try url.setHostname(String?.none) + } + XCTAssertEqual(url.serialized, "unix://example:99/some/path") + XCTAssertURLIsIdempotent(url) + // When setting a hostname to/from 'nil', we may need to add/remove a path sigil. + do { + func check_has_path_sigil(url: WebURL) { + XCTAssertEqual(url.serialized, "web+demo:/.//not-a-host/test") + XCTAssertEqual(url.storage.structure.sigil, .path) + XCTAssertEqual(url.hostname, nil) + XCTAssertEqual(url.path, "//not-a-host/test") + XCTAssertURLIsIdempotent(url) + } + func check_has_auth_sigil(url: WebURL, hostname: String) { + XCTAssertEqual(url.serialized, "web+demo://\(hostname)//not-a-host/test") + XCTAssertEqual(url.storage.structure.sigil, .authority) + XCTAssertEqual(url.hostname, hostname) + XCTAssertEqual(url.path, "//not-a-host/test") + XCTAssertURLIsIdempotent(url) + } + // Start with a 'nil' host, path sigil. + var test_url = WebURL("web+demo:/.//not-a-host/test")! + check_has_path_sigil(url: test_url) + // Switch to a non-empty host. We should gain an authority sigil. + test_url.hostname = "host" + check_has_auth_sigil(url: test_url, hostname: "host") + // Switch to an empty host. We should still have an authority sigil. + test_url.hostname = "" + check_has_auth_sigil(url: test_url, hostname: "") + // Switch to a 'nil' host. We should change the authority sigil to a path sigil. + test_url.hostname = nil + check_has_path_sigil(url: test_url) + } + + // [Throw] Cannot set hostname on cannot-be-a-base URLs. + url = WebURL("mailto:bob")! + XCTAssertNil(url.hostname) + XCTAssertTrue(url.cannotBeABase) + XCTAssertEqual(url.serialized, "mailto:bob") + XCTAssertThrowsSpecific(URLSetterError.cannotSetHostOnCannotBeABaseURL) { + try url.setHostname("somehost") + } + XCTAssertEqual(url.serialized, "mailto:bob") + XCTAssertURLIsIdempotent(url) + + // [Throw] Cannot set empty hostname on special schemes. + url = WebURL("http://example.com/p1/p2")! + XCTAssertEqual(url.hostname, "example.com") + XCTAssertEqual(url.serialized, "http://example.com/p1/p2") + XCTAssertThrowsSpecific(URLSetterError.schemeDoesNotSupportNilOrEmptyHostnames) { + try url.setHostname("") + } + XCTAssertEqual(url.serialized, "http://example.com/p1/p2") + XCTAssertURLIsIdempotent(url) + + // [Throw] Cannot set empty hostname if the URL contains credentials or port. + url = WebURL("foo://user@example.com/p1/p2")! + XCTAssertEqual(url.username, "user") + XCTAssertEqual(url.hostname, "example.com") + XCTAssertEqual(url.serialized, "foo://user@example.com/p1/p2") + XCTAssertThrowsSpecific(URLSetterError.cannotSetEmptyHostnameWithCredentialsOrPort) { + try url.setHostname("") + } + XCTAssertEqual(url.serialized, "foo://user@example.com/p1/p2") + XCTAssertURLIsIdempotent(url) + + url = WebURL("foo://example.com:8080/p1/p2")! + XCTAssertEqual(url.port, 8080) + XCTAssertEqual(url.hostname, "example.com") + XCTAssertEqual(url.serialized, "foo://example.com:8080/p1/p2") + XCTAssertThrowsSpecific(URLSetterError.cannotSetEmptyHostnameWithCredentialsOrPort) { + try url.setHostname("") + } + XCTAssertEqual(url.serialized, "foo://example.com:8080/p1/p2") + XCTAssertURLIsIdempotent(url) + + // [Throw] Invalid hostnames. + url = WebURL("foo://example.com/")! + XCTAssertEqual(url.hostname, "example.com") + XCTAssertEqual(url.serialized, "foo://example.com/") + XCTAssertThrowsSpecific(URLSetterError.invalidHostname) { try url.setHostname("@") } + XCTAssertEqual(url.serialized, "foo://example.com/") + XCTAssertURLIsIdempotent(url) + + XCTAssertThrowsSpecific(URLSetterError.invalidHostname) { try url.setHostname("/a/b/c") } + XCTAssertEqual(url.serialized, "foo://example.com/") + XCTAssertURLIsIdempotent(url) + + XCTAssertThrowsSpecific(URLSetterError.invalidHostname) { try url.setHostname("[:::]") } + XCTAssertEqual(url.serialized, "foo://example.com/") + XCTAssertURLIsIdempotent(url) + } + + /// Tests the Swift model 'port' property. + /// + /// The Swift model deviates from the JS model in that it takes an `Int?` rather than a string. + /// + func testPort() { + + // [Throw] Adding a port to a URL which does not allow them. + var url = WebURL("file://somehost/p1/p2")! + XCTAssertThrowsSpecific(URLSetterError.cannotHaveCredentialsOrPort) { try url.setPort(99) } + XCTAssertEqual(url.serialized, "file://somehost/p1/p2") + XCTAssertURLIsIdempotent(url) + + // [Throw] Setting a port to a non-valid UInt16 value. + url = WebURL("http://example.com/p1/p2")! + XCTAssertThrowsSpecific(URLSetterError.portValueOutOfBounds) { try url.setPort(-99) } + XCTAssertEqual(url.serialized, "http://example.com/p1/p2") + XCTAssertURLIsIdempotent(url) + XCTAssertThrowsSpecific(URLSetterError.portValueOutOfBounds) { try url.setPort(Int(UInt32.max)) } + XCTAssertEqual(url.serialized, "http://example.com/p1/p2") + XCTAssertURLIsIdempotent(url) + + // [Deviation] Non-present port is represented as 'nil', rather than empty string. + XCTAssertNil(url.port) + // Set the port to a non-nil value. + XCTAssertNoThrow(try url.setPort(42)) + XCTAssertEqual(url.port, 42) + XCTAssertEqual(url.serialized, "http://example.com:42/p1/p2") + XCTAssertURLIsIdempotent(url) + // And back to nil. + XCTAssertNoThrow(try url.setPort(nil)) + XCTAssertNil(url.port) + XCTAssertEqual(url.serialized, "http://example.com/p1/p2") + XCTAssertURLIsIdempotent(url) + } + + /// Tests the Swift model 'path' property. + /// + /// The Swift model deviates from the JS model in that it does not filter the new value when setting. + /// + func testPath() { + + // [Throw] Cannot set path on cannot-be-a-base URLs. + var url = WebURL("mailto:bob")! + XCTAssertEqual(url.path, "bob") + XCTAssertTrue(url.cannotBeABase) + XCTAssertThrowsSpecific(URLSetterError.cannotSetPathOnCannotBeABaseURL) { try url.setPath("frank") } + XCTAssertEqual(url.serialized, "mailto:bob") + XCTAssertURLIsIdempotent(url) + + // [Deviation] Tabs and newlines are not trimmed. + url = WebURL("file:///hello/world?someQuery")! + XCTAssertNoThrow(try url.setPath("\t\n\t")) + XCTAssertEqual(url.path, "/%09%0A%09") + XCTAssertEqual(url.serialized, "file:///%09%0A%09?someQuery") + XCTAssertURLIsIdempotent(url) + } + + /// Tests the Swift model 'query' property. + /// + /// The Swift model deviates from the JS model in that it does not trim the leading "?" or filter the new value when setting. It is also able to distinguish between + /// not-present and empty query strings using 'nil'. + /// + func testQuery() { + + // [Deviation] The Swift model does not include the leading "?" in the getter, uses 'nil' to mean 'not present'. + var url = WebURL("http://example.com/hello")! + XCTAssertEqual(url.serialized, "http://example.com/hello") + XCTAssertNil(url.query) + + url.query = "" + XCTAssertEqual(url.serialized, "http://example.com/hello?") + XCTAssertEqual(url.query, "") + XCTAssertURLIsIdempotent(url) + + url.query = "a=b&c=d" + XCTAssertEqual(url.serialized, "http://example.com/hello?a=b&c=d") + XCTAssertEqual(url.query, "a=b&c=d") + XCTAssertURLIsIdempotent(url) + + url.query = nil + XCTAssertEqual(url.serialized, "http://example.com/hello") + XCTAssertNil(url.query) + XCTAssertURLIsIdempotent(url) + + // [Deviation] The Swift model does not trim the leading "?" from the new value when setting. + url.query = "?e=f&g=h" + XCTAssertEqual(url.serialized, "http://example.com/hello??e=f&g=h") + XCTAssertEqual(url.query, "?e=f&g=h") + XCTAssertURLIsIdempotent(url) + + // [Deviation] Newlines and tabs are not filtered. + url.query = "\tso\nmething" + XCTAssertEqual(url.serialized, "http://example.com/hello?%09so%0Amething") + XCTAssertEqual(url.query, "%09so%0Amething") + XCTAssertURLIsIdempotent(url) + } + + /// Tests the Swift model 'fragment' property. + /// + /// The Swift model deviates from the JS model in that it does not trim the leading "#" or filter the new value when setting. It is also able to distinguish between + /// not-present and empty fragment strings using 'nil'. + /// + func testFragment() { + + // [Deviation]: The Swift model does not include the leading "#" in the getter, uses 'nil' to mean 'not present'. + var url = WebURL("http://example.com/hello")! + XCTAssertEqual(url.serialized, "http://example.com/hello") + XCTAssertNil(url.fragment) + + url.fragment = "" + XCTAssertEqual(url.serialized, "http://example.com/hello#") + XCTAssertEqual(url.fragment, "") + XCTAssertURLIsIdempotent(url) + + url.fragment = "test" + XCTAssertEqual(url.serialized, "http://example.com/hello#test") + XCTAssertEqual(url.fragment, "test") + XCTAssertURLIsIdempotent(url) + + url.fragment = nil + XCTAssertEqual(url.serialized, "http://example.com/hello") + XCTAssertNil(url.fragment) + XCTAssertURLIsIdempotent(url) + + // [Deviation]: The Swift model does not trim the leading "#" from the new value when setting. + url.fragment = "#test" + XCTAssertEqual(url.serialized, "http://example.com/hello##test") + XCTAssertEqual(url.fragment, "#test") + XCTAssertURLIsIdempotent(url) + + // [Deviation]: Newlines and tabs are not filtered. + url.fragment = "\tso\nmething" + XCTAssertEqual(url.serialized, "http://example.com/hello#%09so%0Amething") + XCTAssertEqual(url.fragment, "%09so%0Amething") + XCTAssertURLIsIdempotent(url) + } +} + +extension WebURLTests { + + func testSerializedExcludingFragment() { + do { + let url = WebURL("http://example.com/some/path?and&a&query#withAFragment")! + XCTAssertEqual(url.serialized, "http://example.com/some/path?and&a&query#withAFragment") + XCTAssertEqual(url.serializedExcludingFragment, "http://example.com/some/path?and&a&query") + } + // Fragment with a bunch of extra '#'s. + do { + let url = WebURL("http://example.com/some/path?and&a&query######withAFragment")! + XCTAssertEqual(url.serialized, "http://example.com/some/path?and&a&query######withAFragment") + XCTAssertEqual(url.serializedExcludingFragment, "http://example.com/some/path?and&a&query") + } + // No fragment. + do { + let url = WebURL("http://example.com/some/path?and&a&query")! + XCTAssertEqual(url.serialized, "http://example.com/some/path?and&a&query") + XCTAssertEqual(url.serializedExcludingFragment, "http://example.com/some/path?and&a&query") + } + } + + func testPortOrKnownDefault() { + // Special schemes. + do { + let url = WebURL("file:///usr/bin/swift")! + XCTAssertEqual(url.serialized, "file:///usr/bin/swift") + XCTAssertNil(url.port) + XCTAssertNil(url.portOrKnownDefault) + } + do { + var url = WebURL("http://example.com/")! + XCTAssertEqual(url.serialized, "http://example.com/") + XCTAssertNil(url.port) + XCTAssertEqual(url.portOrKnownDefault, 80) + + url.port = 999 + XCTAssertEqual(url.serialized, "http://example.com:999/") + XCTAssertEqual(url.port, 999) + XCTAssertEqual(url.portOrKnownDefault, 999) + } + do { + var url = WebURL("ws://example.com/")! + XCTAssertEqual(url.serialized, "ws://example.com/") + XCTAssertNil(url.port) + XCTAssertEqual(url.portOrKnownDefault, 80) + + url.port = 999 + XCTAssertEqual(url.serialized, "ws://example.com:999/") + XCTAssertEqual(url.port, 999) + XCTAssertEqual(url.portOrKnownDefault, 999) + } + do { + var url = WebURL("https://example.com/")! + XCTAssertEqual(url.serialized, "https://example.com/") + XCTAssertNil(url.port) + XCTAssertEqual(url.portOrKnownDefault, 443) + + url.port = 999 + XCTAssertEqual(url.serialized, "https://example.com:999/") + XCTAssertEqual(url.port, 999) + XCTAssertEqual(url.portOrKnownDefault, 999) + } + do { + var url = WebURL("wss://example.com/")! + XCTAssertEqual(url.serialized, "wss://example.com/") + XCTAssertNil(url.port) + XCTAssertEqual(url.portOrKnownDefault, 443) + + url.port = 999 + XCTAssertEqual(url.serialized, "wss://example.com:999/") + XCTAssertEqual(url.port, 999) + XCTAssertEqual(url.portOrKnownDefault, 999) + } + do { + var url = WebURL("ftp://example.com/")! + XCTAssertEqual(url.serialized, "ftp://example.com/") + XCTAssertNil(url.port) + XCTAssertEqual(url.portOrKnownDefault, 21) + + url.port = 999 + XCTAssertEqual(url.serialized, "ftp://example.com:999/") + XCTAssertEqual(url.port, 999) + XCTAssertEqual(url.portOrKnownDefault, 999) + } + // Non-special scheme. + do { + var url = WebURL("foo://example.com/")! + XCTAssertEqual(url.serialized, "foo://example.com/") + XCTAssertNil(url.port) + XCTAssertNil(url.portOrKnownDefault) + + url.port = 999 + XCTAssertEqual(url.serialized, "foo://example.com:999/") + XCTAssertEqual(url.port, 999) + XCTAssertEqual(url.portOrKnownDefault, 999) + } + } +} + +extension WebURLTests { + + /// Tests that URL setters do not inadvertently create strings that would be re-parsed as 'cannot-be-a-base' URLs. + /// + /// There are 2 situations where this could happen: + /// + /// 1. Setting a `nil` host when there is no path + /// 2. Setting an empty path when there is no host + /// + func testDoesNotCreateCannotBeABaseURLs() { + + // Check that we require a path in order to remove a host. + var url = WebURL("foo://somehost")! + XCTAssertEqual(url.serialized, "foo://somehost") + XCTAssertEqual(url.path, "") + XCTAssertNotNil(url.hostname) + XCTAssertThrowsSpecific(URLSetterError.cannotRemoveHostnameWithoutPath) { + try url.setHostname(String?.none) + } + XCTAssertEqual(url.serialized, "foo://somehost") + XCTAssertURLIsIdempotent(url) + + // Check that we require a host in order to remove a path. + + // [Bug] This seems to be a bug in the standard, which our implementation also exhibits (yay for accuracy?). + // The path of non-special URLs can be set to empty, even if they don't have a host (path-only URLs). + // This makes them 'cannot-be-a-base' URLs, but without the flag. Re-parsing the URL sets the flag, + // but it means the set of operations which are allowed depends on how you got the URL (not idempotent). + // See: https://github.com/whatwg/url/issues/581 + + url = WebURL("foo:/hello/world?someQuery")! + XCTAssertEqual(url.serialized, "foo:/hello/world?someQuery") + XCTAssertEqual(url.path, "/hello/world") + XCTAssertNil(url.hostname) + XCTAssertFalse(url.cannotBeABase) + + url.path = "" + XCTAssertEqual(url.serialized, "foo:?someQuery") + XCTAssertEqual(url.path, "") + XCTAssertNil(url.hostname) + XCTAssertFalse(url.cannotBeABase) + XCTAssertTrue(WebURL(url.serialized)!.cannotBeABase) + // XCTAssertURLIsIdempotent(url) - see comment above. + + url.path = "test" + XCTAssertEqual(url.serialized, "foo:/test?someQuery") + XCTAssertEqual(url.path, "/test") + XCTAssertNil(url.hostname) + XCTAssertFalse(url.cannotBeABase) + XCTAssertURLIsIdempotent(url) + } + + func testJSModelSetters() { + // Check that 'pathname' setter does not remove leading slashes. + do { + var x = WebURL("sc://x?hello")!.jsModel + XCTAssertEqual(x.href, "sc://x?hello") + XCTAssertEqual(x.pathname, "") + x.pathname = #"/"# + XCTAssertEqual(x.href, "sc://x/?hello") + XCTAssertEqual(x.pathname, "/") + x.pathname = #"/s"# + XCTAssertEqual(x.href, "sc://x/s?hello") + XCTAssertEqual(x.pathname, "/s") + x.pathname = #""# + XCTAssertEqual(x.href, "sc://x?hello") + XCTAssertEqual(x.pathname, "") + } + + + // TODO [testing]: This test needs to be more comprehensive, and we need tests like this exercising all major + // paths in all setters. + + // Checks what happens when a scheme change requires the port to be removed, and the resulting URL + // has a different optimal storage type to the original (i.e. suddenly becomes less than 255 bytes). + // It's such a niche case that we'd otherwise never see it. + do { + var url = WebURL( + "ws://hostnamewhichtakesustotheedge:443?hellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellop" + )!.jsModel + switch url.storage { + case .large(_): break + default: XCTFail("Unexpected storage type") + } + url.scheme = "wss" + switch url.storage { + case .small(_): break + default: XCTFail("Unexpected storage type") + } + XCTAssertEqual( + url.description, + "wss://hostnamewhichtakesustotheedge/?hellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellohellop" + ) + } + } +} + + +// MARK: - Host and Origin + + +extension WebURLTests { + + func testHost() { + + // Non-IP hostnames in special URLs are always domains. + do { + let url = WebURL("http://example.com/aPath?aQuery#andFragment, too")! + if case .domain("example.com") = url.host { + XCTAssertEqual(url.host?.serialized, url.hostname) + } else { + XCTFail("Unexpected host: \(String(describing: url.host))") + } + } + + // Non-IP hostnames in non-special URLs are always opaque hostnames. + do { + let url = WebURL("foo://example.com/aPath?aQuery#andFragment, too")! + if case .opaque("example.com") = url.host { + XCTAssertEqual(url.host?.serialized, url.hostname) + } else { + XCTFail("Unexpected host: \(String(describing: url.host))") + } + } + + // Special URLs detect IPv4 addresses. + do { + let url = WebURL("http://0xbadf00d/aPath?aQuery#andFragment, too")! + if case .ipv4Address(.init(octets: (11, 173, 240, 13))) = url.host { + XCTAssertEqual(url.host?.serialized, url.hostname) + } else { + XCTFail("Unexpected host: \(String(describing: url.host))") + } + } + // Non-special URLs do not. + do { + let url = WebURL("foo://11.173.240.13/aPath?aQuery#andFragment, too")! + if case .opaque("11.173.240.13") = url.host { + XCTAssertEqual(url.host?.serialized, url.hostname) + } else { + XCTFail("Unexpected host: \(String(describing: url.host))") + } + } + + // Both special and non-special URLs detect IPv6 addresses. + do { + let url = WebURL("http://[::127.0.0.1]/aPath?aQuery#andFragment, too")! + if case .ipv6Address(.init(pieces: (0, 0, 0, 0, 0, 0, 0x7f00, 0x0001), .numeric)) = url.host { + XCTAssertEqual(url.host?.serialized, url.hostname) + } else { + XCTFail("Unexpected host: \(String(describing: url.host))") + } + } + do { + let url = WebURL("foo://[::127.0.0.1]/aPath?aQuery#andFragment, too")! + if case .ipv6Address(.init(pieces: (0, 0, 0, 0, 0, 0, 0x7f00, 0x0001), .numeric)) = url.host { + XCTAssertEqual(url.host?.serialized, url.hostname) + } else { + XCTFail("Unexpected host: \(String(describing: url.host))") + } + } + + // File and non-special URLs may have empty hostnames. + do { + let url = WebURL("file:///usr/bin/swift")! + if case .empty = url.host { + XCTAssertEqual(url.host?.serialized, url.hostname) + XCTAssertEqual(url.host?.serialized, "") + } else { + XCTFail("Unexpected host: \(String(describing: url.host))") + } + } + do { + let url = WebURL("foo:///some/path")! + if case .empty = url.host { + XCTAssertEqual(url.host?.serialized, url.hostname) + XCTAssertEqual(url.host?.serialized, "") + } else { + XCTFail("Unexpected host: \(String(describing: url.host))") + } + } + + // Path-only and cannot-be-a-base-path URLs do not have hosts. + do { + let url = WebURL("foo:/path/only")! + if case .none = url.host { + XCTAssertEqual(url.host?.serialized, url.hostname) + XCTAssertFalse(url.cannotBeABase) + } else { + XCTFail("Unexpected host: \(String(describing: url.host))") + } + } + do { + let url = WebURL("foo:some non-path")! + if case .none = url.host { + XCTAssertEqual(url.host?.serialized, url.hostname) + XCTAssertTrue(url.cannotBeABase) + } else { + XCTFail("Unexpected host: \(String(describing: url.host))") + } + } + } + + func testOrigin() { + + // Special URLs return non-opaque origins. + // Are same-origin WRT other paths, queries, fragments at... err... the same origin. + if let origin = WebURL("https://example.com/index.html")?.origin { + XCTAssertEqual(origin.serialized, "https://example.com") + XCTAssertFalse(origin.isOpaque) + XCTAssertEqual(origin, WebURL("https://example.com:443/some_resource.png?the_answer=42#test")?.origin) + XCTAssertNotEqual(origin, WebURL("https://test.com/")?.origin) + } else { + XCTFail("Failed to parse valid URL") + } + + // Port number included if not the default. Different port numbers are not same-origin. + if let origin = WebURL("http://localhost:8080/index.html")?.origin { + XCTAssertEqual(origin.serialized, "http://localhost:8080") + XCTAssertFalse(origin.isOpaque) + XCTAssertEqual(origin, WebURL("http://localhost:8080/some_resource.png?query=true#frag-it")?.origin) + XCTAssertNotEqual(origin, WebURL("http://localhost:80/")?.origin) + } else { + XCTFail("Failed to parse valid URL") + } + + // Cannot-be-a-base 'blob:' URLs have the same origin as the URL parsed from their path. + if let origin = WebURL("blob:https://example.com:443/index.html")?.origin { + XCTAssertEqual(origin.serialized, "https://example.com") + XCTAssertFalse(origin.isOpaque) + XCTAssertEqual(origin, WebURL("https://example.com/some_resource.txt?q=🐟#🦆=👹")?.origin) + } else { + XCTFail("Failed to parse valid URL") + } + + // Non-cannot-be-a-base 'blob:' URLs are always opaque. + if let origin = WebURL("blob:///https://example.com:443/index.html")?.origin { + XCTAssertEqual(origin.serialized, "null") + XCTAssertTrue(origin.isOpaque) + XCTAssertNotEqual(origin, origin) + XCTAssertNotEqual(origin, WebURL("blob:https://example.com")?.origin) + } else { + XCTFail("Failed to parse valid URL") + } + + // Cannot-be-a-base 'blob:' URLs have opaque origins if their path is not a valid URL string. + if let origin = WebURL("blob:this is not a URL")?.origin { + XCTAssertEqual(origin.serialized, "null") + XCTAssertTrue(origin.isOpaque) + XCTAssertNotEqual(origin, origin) + XCTAssertNotEqual(origin, WebURL("blob:also not a URL")?.origin) + } else { + XCTFail("Failed to parse valid URL") + } + + // 'file' URLs have opaque origins. + if let origin = WebURL("file:///usr/bin/swift")?.origin { + XCTAssertEqual(origin.serialized, "null") + XCTAssertTrue(origin.isOpaque) + XCTAssertNotEqual(origin, origin) + XCTAssertNotEqual(origin, WebURL("file:///var/tmp/somefile")?.origin) + } else { + XCTFail("Failed to parse valid URL") + } + + // Opaque hosts are not equal to each other. + do { + let myURL = WebURL("foo://exampleHost:4567/")! + XCTAssertTrue(myURL.origin.isOpaque) + XCTAssertFalse(myURL.origin == myURL.origin) + + var seenOrigins: Set = [myURL.origin] + XCTAssertFalse(seenOrigins.contains(myURL.origin)) + XCTAssertTrue(seenOrigins.insert(myURL.origin).inserted) + XCTAssertTrue(seenOrigins.insert(myURL.origin).inserted) + XCTAssertTrue(seenOrigins.insert(myURL.origin).inserted) + XCTAssertFalse(seenOrigins.contains(myURL.origin)) + } + } +} diff --git a/Tests/WebURLTests/_ReportGeneratingTestCase.swift b/Tests/WebURLTests/_ReportGeneratingTestCase.swift new file mode 100644 index 000000000..1314a2412 --- /dev/null +++ b/Tests/WebURLTests/_ReportGeneratingTestCase.swift @@ -0,0 +1,29 @@ +// Copyright The swift-url Contributors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation +import XCTest + +class ReportGeneratingTestCase: XCTestCase { + + private static let reportDir = ProcessInfo.processInfo.environment["SWIFT_URL_REPORT_PATH"] ?? NSTemporaryDirectory() + + override class func setUp() { + try? FileManager.default.createDirectory(atPath: reportDir, withIntermediateDirectories: true, attributes: nil) + } + + func fileURLForReport(named reportName: String) -> URL { + URL(fileURLWithPath: ReportGeneratingTestCase.reportDir).appendingPathComponent(reportName) + } +} diff --git a/docs-excluded-symbols b/docs-excluded-symbols new file mode 100644 index 000000000..6e3e6d9ec --- /dev/null +++ b/docs-excluded-symbols @@ -0,0 +1,17 @@ +LazilyPercentEncodedGroups +LazilyPercentEncodedGroups.index(before:) +LazilyPercentEncodedGroups.formIndex(before:) +_PercentEncodedByte +LazilyPercentDecodedUTF8.Index +PercentEncodeSet.C0Control +PercentEncodeSet.Fragment +PercentEncodeSet.Query_NotSpecial +PercentEncodeSet.Query_Special +PercentEncodeSet.Path +PercentEncodeSet.UserInfo +PercentEncodeSet.Component +PercentEncodeSet.FormEncoded +PercentEncodeSet._Passthrough +WebURL.FormEncodedQueryParameters.KeyValuePairs.Iterator +WebURL.PathComponents.Index +Collection._longestSubrange(equalTo:)