From a06a80162bb9bdf6f7e91dc18e7ccf5c12673ca4 Mon Sep 17 00:00:00 2001 From: Timothy Date: Wed, 25 Sep 2024 03:15:48 +0800 Subject: [PATCH] Add filesystem func to transform a path to a URI (#55454) In a few places across Base and the stdlib, we emit paths that we like people to be able to click on in their terminal and editor. Up to this point, we have relied on auto-filepath detection, but this does not allow for alternative link text, such as contracted paths. Doing so (via OSC 8 terminal links for example) requires filepath URI encoding. This functionality was previously part of a PR modifying stacktrace printing (#51816), but after that became held up for unrelated reasons and another PR appeared that would benefit from this utility (#55335), I've split out this functionality so it can be used before the stacktrace printing PR is resolved. --- base/path.jl | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++ test/path.jl | 13 ++++++++++++ 2 files changed, 69 insertions(+) diff --git a/base/path.jl b/base/path.jl index 3b8124f34f174..f6d3266d9738c 100644 --- a/base/path.jl +++ b/base/path.jl @@ -613,3 +613,59 @@ relpath(path::AbstractString, startpath::AbstractString) = for f in (:isdirpath, :splitdir, :splitdrive, :splitext, :normpath, :abspath) @eval $f(path::AbstractString) = $f(String(path)) end + +""" + uripath(path::AbstractString) + +Encode `path` as a URI as per [RFC8089: The "file" URI +Scheme](https://www.rfc-editor.org/rfc/rfc8089), [RFC3986: Uniform Resource +Identifier (URI): Generic Syntax](https://www.rfc-editor.org/rfc/rfc3986), and +the [Freedesktop File URI spec](https://www.freedesktop.org/wiki/Specifications/file-uri-spec/). + +## Examples + +```julia-repl +julia> uripath("/home/user/example file.jl") # On a unix machine +"file:///home/user/example%20file.jl" + +juila> uripath("C:\\Users\\user\\example file.jl") # On a windows machine +"file:///C:/Users/user/example%20file.jl" +``` +""" +function uripath end + +@static if Sys.iswindows() + function uripath(path::String) + percent_escape(s) = # RFC3986 Section 2.1 + '%' * join(map(b -> uppercase(string(b, base=16)), codeunits(s)), '%') + encode_uri_component(s) = # RFC3986 Section 2.3 + replace(s, r"[^A-Za-z0-9\-_.~/]+" => percent_escape) + path = abspath(path) + if startswith(path, "\\\\") # UNC path, RFC8089 Appendix E.3 + unixpath = join(eachsplit(path, path_separator_re, keepempty=false), '/') + string("file://", encode_uri_component(unixpath)) # RFC8089 Section 2 + else + drive, localpath = splitdrive(path) # Assuming that non-UNC absolute paths on Windows always have a drive component + unixpath = join(eachsplit(localpath, path_separator_re, keepempty=false), '/') + encdrive = replace(encode_uri_component(drive), "%3A" => ':', "%7C" => '|') # RFC8089 Appendices D.2, E.2.1, and E.2.2 + string("file:///", encdrive, '/', encode_uri_component(unixpath)) # RFC8089 Section 2 + end + end +else + function uripath(path::String) + percent_escape(s) = # RFC3986 Section 2.1 + '%' * join(map(b -> uppercase(string(b, base=16)), codeunits(s)), '%') + encode_uri_component(s) = # RFC3986 Section 2.3 + replace(s, r"[^A-Za-z0-9\-_.~/]+" => percent_escape) + localpath = join(eachsplit(abspath(path), path_separator_re, keepempty=false), '/') + host = if ispath("/proc/sys/fs/binfmt_misc/WSLInterop") # WSL sigil + distro = get(ENV, "WSL_DISTRO_NAME", "") # See + "wsl\$/$distro" # See and + else + gethostname() # Freedesktop File URI Spec, Hostnames section + end + string("file://", encode_uri_component(host), '/', encode_uri_component(localpath)) # RFC8089 Section 2 + end +end + +uripath(path::AbstractString) = uripath(String(path)) diff --git a/test/path.jl b/test/path.jl index 2f4f2d0983a58..4c2c7034577d5 100644 --- a/test/path.jl +++ b/test/path.jl @@ -311,6 +311,19 @@ test_relpath() end + @testset "uripath" begin + host = if Sys.iswindows() "" else gethostname() end + sysdrive, uridrive = if Sys.iswindows() "C:\\", "C:/" else "/", "" end + @test Base.Filesystem.uripath("$(sysdrive)some$(sep)file.txt") == "file://$host/$(uridrive)some/file.txt" + @test Base.Filesystem.uripath("$(sysdrive)another$(sep)$(sep)folder$(sep)file.md") == "file://$host/$(uridrive)another/folder/file.md" + @test Base.Filesystem.uripath("$(sysdrive)some file with ^odd% chars") == "file://$host/$(uridrive)some%20file%20with%20%5Eodd%25%20chars" + @test Base.Filesystem.uripath("$(sysdrive)weird chars like @#&()[]{}") == "file://$host/$(uridrive)weird%20chars%20like%20%40%23%26%28%29%5B%5D%7B%7D" + @test Base.Filesystem.uripath("$sysdrive") == "file://$host/$uridrive" + @test Base.Filesystem.uripath(".") == Base.Filesystem.uripath(pwd()) + @test Base.Filesystem.uripath("$(sysdrive)unicode$(sep)Δεδομένα") == "file://$host/$(uridrive)unicode/%CE%94%CE%B5%CE%B4%CE%BF%CE%BC%CE%AD%CE%BD%CE%B1" + @test Base.Filesystem.uripath("$(sysdrive)unicode$(sep)🧮🐛🔨") == "file://$host/$(uridrive)unicode/%F0%9F%A7%AE%F0%9F%90%9B%F0%9F%94%A8" + end + if Sys.iswindows() @testset "issue #23646" begin @test lowercase(relpath("E:\\a\\b", "C:\\c")) == "e:\\a\\b"