diff --git a/lychee-bin/src/formatters/response/color.rs b/lychee-bin/src/formatters/response/color.rs
index 0a84f2a393..9aa12df40f 100644
--- a/lychee-bin/src/formatters/response/color.rs
+++ b/lychee-bin/src/formatters/response/color.rs
@@ -65,13 +65,19 @@ mod tests {
}
}
+ #[cfg(test)]
+ /// Helper function to strip ANSI color codes for tests
+ fn strip_ansi_codes(s: &str) -> String {
+ console::strip_ansi_codes(s).to_string()
+ }
+
#[test]
fn test_format_response_with_ok_status() {
let formatter = ColorFormatter;
let body = mock_response_body(Status::Ok(StatusCode::OK), "https://example.com");
assert_eq!(
- formatter.format_response(&body),
- "\u{1b}[38;5;2m\u{1b}[1m [200]\u{1b}[0m https://example.com/"
+ strip_ansi_codes(&formatter.format_response(&body)),
+ " [200] https://example.com/"
);
}
@@ -83,8 +89,8 @@ mod tests {
"https://example.com/404",
);
assert_eq!(
- formatter.format_response(&body),
- "\u{1b}[38;5;197m [ERROR]\u{1b}[0m https://example.com/404"
+ strip_ansi_codes(&formatter.format_response(&body)),
+ " [ERROR] https://example.com/404"
);
}
diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs
index 9dbae37d34..261b77b0d7 100644
--- a/lychee-bin/tests/cli.rs
+++ b/lychee-bin/tests/cli.rs
@@ -266,8 +266,6 @@ mod cli {
#[test]
fn test_resolve_paths() {
- // TODO: Refactor the code to clean up base path handling
-
let mut cmd = main_command();
let dir = fixtures_path().join("resolve_paths");
diff --git a/lychee-lib/src/checker/file.rs b/lychee-lib/src/checker/file.rs
index d0f1f6c3a2..f972bee09a 100644
--- a/lychee-lib/src/checker/file.rs
+++ b/lychee-lib/src/checker/file.rs
@@ -1,13 +1,26 @@
-use crate::{utils::fragment_checker::FragmentChecker, Base, ErrorKind, Status, Uri};
use http::StatusCode;
use log::warn;
use std::path::{Path, PathBuf};
+use crate::{utils::fragment_checker::FragmentChecker, Base, ErrorKind, Status, Uri};
+
+/// A utility for checking the existence and validity of file-based URIs.
+///
+/// `FileChecker` is responsible for resolving and validating file paths,
+/// handling both absolute and relative paths. It supports base path resolution,
+/// fallback extensions for files without extensions, and optional fragment checking.
+///
+/// This creates a `FileChecker` with a base path, fallback extensions for HTML files,
+/// and fragment checking enabled.
#[derive(Debug, Clone)]
pub(crate) struct FileChecker {
+ /// An optional base path or URL used for resolving relative paths.
base: Option,
+ /// A list of file extensions to try if the original path doesn't exist.
fallback_extensions: Vec,
+ /// Whether to check for the existence of fragments (e.g., #section-id) in HTML files.
include_fragments: bool,
+ /// A utility for performing fragment checks in HTML files.
fragment_checker: FragmentChecker,
}
@@ -30,23 +43,28 @@ impl FileChecker {
return ErrorKind::InvalidFilePath(uri.clone()).into();
};
- if path.is_absolute() {
- let resolved_path = self.resolve_absolute_path(&path);
- return self.check_resolved_path(&resolved_path, uri).await;
- }
-
- self.check_path(&path, uri).await
+ let resolved_path = self.resolve_path(&path);
+ self.check_path(&resolved_path, uri).await
}
- async fn check_resolved_path(&self, path: &Path, uri: &Uri) -> Status {
- if path.exists() {
- if self.include_fragments {
- self.check_fragment(path, uri).await
+ fn resolve_path(&self, path: &Path) -> PathBuf {
+ if let Some(Base::Local(base_path)) = &self.base {
+ if path.is_absolute() {
+ let absolute_base_path = if base_path.is_relative() {
+ std::env::current_dir()
+ .unwrap_or_else(|_| PathBuf::new())
+ .join(base_path)
+ } else {
+ base_path.clone()
+ };
+
+ let stripped = path.strip_prefix("/").unwrap_or(path);
+ absolute_base_path.join(stripped)
} else {
- Status::Ok(StatusCode::OK)
+ base_path.join(path)
}
} else {
- ErrorKind::InvalidFilePath(uri.clone()).into()
+ path.to_path_buf()
}
}
@@ -55,10 +73,6 @@ impl FileChecker {
return self.check_existing_path(path, uri).await;
}
- if path.extension().is_some() {
- return ErrorKind::InvalidFilePath(uri.clone()).into();
- }
-
self.check_with_fallback_extensions(path, uri).await
}
@@ -72,30 +86,21 @@ impl FileChecker {
async fn check_with_fallback_extensions(&self, path: &Path, uri: &Uri) -> Status {
let mut path_buf = path.to_path_buf();
+
+ // If the path already has an extension, try it first
+ if path_buf.extension().is_some() && path_buf.exists() {
+ return self.check_existing_path(&path_buf, uri).await;
+ }
+
+ // Try fallback extensions
for ext in &self.fallback_extensions {
path_buf.set_extension(ext);
if path_buf.exists() {
return self.check_existing_path(&path_buf, uri).await;
}
}
- ErrorKind::InvalidFilePath(uri.clone()).into()
- }
-
- fn resolve_absolute_path(&self, path: &Path) -> PathBuf {
- if let Some(Base::Local(base_path)) = &self.base {
- let absolute_base_path = if base_path.is_relative() {
- std::env::current_dir()
- .unwrap_or_else(|_| PathBuf::new())
- .join(base_path)
- } else {
- base_path.to_path_buf()
- };
- let stripped = path.strip_prefix("/").unwrap_or(path);
- absolute_base_path.join(stripped)
- } else {
- path.to_path_buf()
- }
+ ErrorKind::InvalidFilePath(uri.clone()).into()
}
async fn check_fragment(&self, path: &Path, uri: &Uri) -> Status {
diff --git a/lychee-lib/src/checker/mail.rs b/lychee-lib/src/checker/mail.rs
new file mode 100644
index 0000000000..1c3954568a
--- /dev/null
+++ b/lychee-lib/src/checker/mail.rs
@@ -0,0 +1,53 @@
+use crate::{ErrorKind, Status, Uri};
+use http::StatusCode;
+
+#[cfg(all(feature = "email-check", feature = "native-tls"))]
+use check_if_email_exists::{check_email, CheckEmailInput, Reachable};
+
+#[cfg(all(feature = "email-check", feature = "native-tls"))]
+use crate::types::mail;
+
+/// A utility for checking the validity of email addresses.
+///
+/// `EmailChecker` is responsible for validating email addresses,
+/// optionally performing reachability checks when the appropriate
+/// features are enabled.
+#[derive(Debug, Clone)]
+pub(crate) struct MailChecker {}
+
+impl MailChecker {
+ /// Creates a new `EmailChecker`.
+ pub(crate) const fn new() -> Self {
+ Self {}
+ }
+
+ /// Check a mail address, or equivalently a `mailto` URI.
+ ///
+ /// URIs may contain query parameters (e.g. `contact@example.com?subject="Hello"`),
+ /// which are ignored by this check. They are not part of the mail address
+ /// and instead passed to a mail client.
+ pub(crate) async fn check_mail(&self, uri: &Uri) -> Status {
+ #[cfg(all(feature = "email-check", feature = "native-tls"))]
+ {
+ self.perform_email_check(uri).await
+ }
+
+ #[cfg(not(all(feature = "email-check", feature = "native-tls")))]
+ {
+ Status::Excluded
+ }
+ }
+
+ #[cfg(all(feature = "email-check", feature = "native-tls"))]
+ async fn perform_email_check(&self, uri: &Uri) -> Status {
+ let address = uri.url.path().to_string();
+ let input = CheckEmailInput::new(address);
+ let result = &(check_email(&input).await);
+
+ if let Reachable::Invalid = result.is_reachable {
+ ErrorKind::UnreachableEmailAddress(uri.clone(), mail::error_from_output(result)).into()
+ } else {
+ Status::Ok(StatusCode::OK)
+ }
+ }
+}
diff --git a/lychee-lib/src/checker/mod.rs b/lychee-lib/src/checker/mod.rs
index eaf206e21a..bfbef9de51 100644
--- a/lychee-lib/src/checker/mod.rs
+++ b/lychee-lib/src/checker/mod.rs
@@ -1,7 +1,7 @@
//! Checker Module
//!
//! This module contains all checkers, which are responsible for checking the status of a URL.
-//! Each checker implements [Handler](crate::chain::Handler).
pub(crate) mod file;
+pub(crate) mod mail;
pub(crate) mod website;
diff --git a/lychee-lib/src/checker/website.rs b/lychee-lib/src/checker/website.rs
index f2cffd4298..668cf827e2 100644
--- a/lychee-lib/src/checker/website.rs
+++ b/lychee-lib/src/checker/website.rs
@@ -9,14 +9,14 @@ use reqwest::Request;
use std::{collections::HashSet, time::Duration};
#[derive(Debug, Clone)]
-pub(crate) struct Checker {
+pub(crate) struct WebsiteChecker {
retry_wait_time: Duration,
max_retries: u64,
reqwest_client: reqwest::Client,
accepted: Option>,
}
-impl Checker {
+impl WebsiteChecker {
pub(crate) const fn new(
retry_wait_time: Duration,
max_retries: u64,
@@ -73,8 +73,8 @@ fn clone_unwrap(request: &Request) -> Request {
}
#[async_trait]
-impl Handler for Checker {
+impl Handler for WebsiteChecker {
async fn handle(&mut self, input: Request) -> ChainResult {
ChainResult::Done(self.retry_request(input).await)
}
-}
\ No newline at end of file
+}
diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs
index c78d9f863f..7f59c2ce39 100644
--- a/lychee-lib/src/client.rs
+++ b/lychee-lib/src/client.rs
@@ -15,8 +15,6 @@
)]
use std::{collections::HashSet, path::Path, sync::Arc, time::Duration};
-#[cfg(all(feature = "email-check", feature = "native-tls"))]
-use check_if_email_exists::{check_email, CheckEmailInput, Reachable};
use http::{
header::{HeaderMap, HeaderValue},
StatusCode,
@@ -32,18 +30,15 @@ use typed_builder::TypedBuilder;
use crate::{
chain::{Chain, ClientRequestChains, RequestChain},
checker::file::FileChecker,
- checker::website::Checker,
+ checker::{mail::MailChecker, website::WebsiteChecker},
filter::{Excludes, Filter, Includes},
quirks::Quirks,
remap::Remaps,
types::uri::github::GithubUri,
utils::fragment_checker::FragmentChecker,
- Base, ErrorKind, Request, Response, Result, Status, Uri,
+ Base, BasicAuthCredentials, ErrorKind, Request, Response, Result, Status, Uri,
};
-#[cfg(all(feature = "email-check", feature = "native-tls"))]
-use crate::types::mail;
-
/// Default number of redirects before a request is deemed as failed, 5.
pub const DEFAULT_MAX_REDIRECTS: usize = 5;
/// Default number of retries before a request is deemed as failed, 3.
@@ -401,6 +396,7 @@ impl ClientBuilder {
accepted: self.accepted,
require_https: self.require_https,
fragment_checker: FragmentChecker::new(),
+ email_checker: MailChecker::new(),
file_checker: FileChecker::new(
self.base,
self.fallback_extensions,
@@ -457,6 +453,8 @@ pub struct Client {
plugin_request_chain: RequestChain,
file_checker: FileChecker,
+
+ email_checker: MailChecker,
}
impl Client {
@@ -500,23 +498,11 @@ impl Client {
}
let status = match uri.scheme() {
+ // We don't check tel: URIs
+ _ if uri.is_tel() => Status::Excluded,
_ if uri.is_file() => self.check_file(uri).await,
_ if uri.is_mail() => self.check_mail(uri).await,
- _ if uri.is_tel() => Status::Excluded,
- _ => {
- let default_chain: RequestChain = Chain::new(vec![
- Box::::default(),
- Box::new(credentials),
- Box::new(Checker::new(
- self.retry_wait_time,
- self.max_retries,
- self.reqwest_client.clone(),
- self.accepted.clone(),
- )),
- ]);
-
- self.check_website(uri, default_chain).await?
- }
+ _ => self.check_website(uri, credentials).await?,
};
Ok(Response::new(uri.clone(), status, source))
@@ -554,7 +540,22 @@ impl Client {
/// - The request failed.
/// - The response status code is not accepted.
/// - The URI cannot be converted to HTTPS.
- pub async fn check_website(&self, uri: &Uri, default_chain: RequestChain) -> Result {
+ pub async fn check_website(
+ &self,
+ uri: &Uri,
+ credentials: Option,
+ ) -> Result {
+ let default_chain: RequestChain = Chain::new(vec![
+ Box::::default(),
+ Box::new(credentials),
+ Box::new(WebsiteChecker::new(
+ self.retry_wait_time,
+ self.max_retries,
+ self.reqwest_client.clone(),
+ self.accepted.clone(),
+ )),
+ ]);
+
match self.check_website_inner(uri, &default_chain).await {
Status::Ok(code) if self.require_https && uri.scheme() == "http" => {
if self
@@ -577,6 +578,8 @@ impl Client {
///
/// Unsupported schemes will be ignored
///
+ /// Note: we use `inner` to improve compile times by avoiding monomorphization
+ ///
/// # Errors
///
/// This returns an `Err` if
@@ -617,7 +620,7 @@ impl Client {
// Pull out the heavy machinery in case of a failed normal request.
// This could be a GitHub URL and we ran into the rate limiter.
- // TODO: We should first try to parse the URI as GitHub URI first (Lucius, Jan 2023)
+ // TODO: We should try to parse the URI as GitHub URI first (Lucius, Jan 2023)
async fn handle_github(&self, status: Status, uri: &Uri) -> Status {
if status.is_success() {
return status;
@@ -670,6 +673,11 @@ impl Client {
Status::Ok(StatusCode::OK)
}
+ /// Checks a `mailto` URI.
+ pub async fn check_mail(&self, uri: &Uri) -> Status {
+ self.email_checker.check_mail(uri).await
+ }
+
/// Checks a `file` URI's fragment.
pub async fn check_fragment(&self, path: &Path, uri: &Uri) -> Status {
match self.fragment_checker.check(path, &uri.url).await {
@@ -681,33 +689,6 @@ impl Client {
}
}
}
-
- /// Check a mail address, or equivalently a `mailto` URI.
- ///
- /// URIs may contain query parameters (e.g. `contact@example.com?subject="Hello"`),
- /// which are ignored by this check. The are not part of the mail address
- /// and instead passed to a mail client.
- #[cfg(all(feature = "email-check", feature = "native-tls"))]
- pub async fn check_mail(&self, uri: &Uri) -> Status {
- let address = uri.url.path().to_string();
- let input = CheckEmailInput::new(address);
- let result = &(check_email(&input).await);
-
- if let Reachable::Invalid = result.is_reachable {
- ErrorKind::UnreachableEmailAddress(uri.clone(), mail::error_from_output(result)).into()
- } else {
- Status::Ok(StatusCode::OK)
- }
- }
-
- /// Check a mail address, or equivalently a `mailto` URI.
- ///
- /// This implementation simply excludes all email addresses.
- #[cfg(not(all(feature = "email-check", feature = "native-tls")))]
- #[allow(clippy::unused_async)]
- pub async fn check_mail(&self, _uri: &Uri) -> Status {
- Status::Excluded
- }
}
// Check if the given `Url` would cause `reqwest` to panic.