Skip to content

Commit

Permalink
Add filesystem func to transform a path to a URI (#55454)
Browse files Browse the repository at this point in the history
In a few places across Base and the stdlib, we emit paths that we like
people to be able to click on in their terminal and editor. Up to this
point, we have relied on auto-filepath detection, but this does not
allow for alternative link text, such as contracted paths.

Doing so (via OSC 8 terminal links for example) requires filepath URI
encoding.

This functionality was previously part of a PR modifying stacktrace
printing (#51816), but after that became held up for unrelated reasons
and another PR appeared that would benefit from this utility (#55335),
I've split out this functionality so it can be used before the
stacktrace printing PR is resolved.
  • Loading branch information
tecosaur authored Sep 24, 2024
1 parent 2943833 commit a06a801
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 0 deletions.
56 changes: 56 additions & 0 deletions base/path.jl
Original file line number Diff line number Diff line change
Expand Up @@ -613,3 +613,59 @@ relpath(path::AbstractString, startpath::AbstractString) =
for f in (:isdirpath, :splitdir, :splitdrive, :splitext, :normpath, :abspath)
@eval $f(path::AbstractString) = $f(String(path))
end

"""
uripath(path::AbstractString)
Encode `path` as a URI as per [RFC8089: The "file" URI
Scheme](https://www.rfc-editor.org/rfc/rfc8089), [RFC3986: Uniform Resource
Identifier (URI): Generic Syntax](https://www.rfc-editor.org/rfc/rfc3986), and
the [Freedesktop File URI spec](https://www.freedesktop.org/wiki/Specifications/file-uri-spec/).
## Examples
```julia-repl
julia> uripath("/home/user/example file.jl") # On a unix machine
"file://<hostname>/home/user/example%20file.jl"
juila> uripath("C:\\Users\\user\\example file.jl") # On a windows machine
"file:///C:/Users/user/example%20file.jl"
```
"""
function uripath end

@static if Sys.iswindows()
function uripath(path::String)
percent_escape(s) = # RFC3986 Section 2.1
'%' * join(map(b -> uppercase(string(b, base=16)), codeunits(s)), '%')
encode_uri_component(s) = # RFC3986 Section 2.3
replace(s, r"[^A-Za-z0-9\-_.~/]+" => percent_escape)
path = abspath(path)
if startswith(path, "\\\\") # UNC path, RFC8089 Appendix E.3
unixpath = join(eachsplit(path, path_separator_re, keepempty=false), '/')
string("file://", encode_uri_component(unixpath)) # RFC8089 Section 2
else
drive, localpath = splitdrive(path) # Assuming that non-UNC absolute paths on Windows always have a drive component
unixpath = join(eachsplit(localpath, path_separator_re, keepempty=false), '/')
encdrive = replace(encode_uri_component(drive), "%3A" => ':', "%7C" => '|') # RFC8089 Appendices D.2, E.2.1, and E.2.2
string("file:///", encdrive, '/', encode_uri_component(unixpath)) # RFC8089 Section 2
end
end
else
function uripath(path::String)
percent_escape(s) = # RFC3986 Section 2.1
'%' * join(map(b -> uppercase(string(b, base=16)), codeunits(s)), '%')
encode_uri_component(s) = # RFC3986 Section 2.3
replace(s, r"[^A-Za-z0-9\-_.~/]+" => percent_escape)
localpath = join(eachsplit(abspath(path), path_separator_re, keepempty=false), '/')
host = if ispath("/proc/sys/fs/binfmt_misc/WSLInterop") # WSL sigil
distro = get(ENV, "WSL_DISTRO_NAME", "") # See <https://patrickwu.space/wslconf/>
"wsl\$/$distro" # See <https://github.com/microsoft/terminal/pull/14993> and <https://learn.microsoft.com/en-us/windows/wsl/filesystems>
else
gethostname() # Freedesktop File URI Spec, Hostnames section
end
string("file://", encode_uri_component(host), '/', encode_uri_component(localpath)) # RFC8089 Section 2
end
end

uripath(path::AbstractString) = uripath(String(path))
13 changes: 13 additions & 0 deletions test/path.jl
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,19 @@
test_relpath()
end

@testset "uripath" begin
host = if Sys.iswindows() "" else gethostname() end
sysdrive, uridrive = if Sys.iswindows() "C:\\", "C:/" else "/", "" end
@test Base.Filesystem.uripath("$(sysdrive)some$(sep)file.txt") == "file://$host/$(uridrive)some/file.txt"
@test Base.Filesystem.uripath("$(sysdrive)another$(sep)$(sep)folder$(sep)file.md") == "file://$host/$(uridrive)another/folder/file.md"
@test Base.Filesystem.uripath("$(sysdrive)some file with ^odd% chars") == "file://$host/$(uridrive)some%20file%20with%20%5Eodd%25%20chars"
@test Base.Filesystem.uripath("$(sysdrive)weird chars like @#&()[]{}") == "file://$host/$(uridrive)weird%20chars%20like%20%40%23%26%28%29%5B%5D%7B%7D"
@test Base.Filesystem.uripath("$sysdrive") == "file://$host/$uridrive"
@test Base.Filesystem.uripath(".") == Base.Filesystem.uripath(pwd())
@test Base.Filesystem.uripath("$(sysdrive)unicode$(sep)Δεδομένα") == "file://$host/$(uridrive)unicode/%CE%94%CE%B5%CE%B4%CE%BF%CE%BC%CE%AD%CE%BD%CE%B1"
@test Base.Filesystem.uripath("$(sysdrive)unicode$(sep)🧮🐛🔨") == "file://$host/$(uridrive)unicode/%F0%9F%A7%AE%F0%9F%90%9B%F0%9F%94%A8"
end

if Sys.iswindows()
@testset "issue #23646" begin
@test lowercase(relpath("E:\\a\\b", "C:\\c")) == "e:\\a\\b"
Expand Down

0 comments on commit a06a801

Please sign in to comment.