Skip to content

Commit

Permalink
refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
mre committed Oct 16, 2024
1 parent 3b03dc1 commit 453cd14
Show file tree
Hide file tree
Showing 7 changed files with 139 additions and 96 deletions.
14 changes: 10 additions & 4 deletions lychee-bin/src/formatters/response/color.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,19 @@ mod tests {
}
}

#[cfg(test)]
/// Helper function to strip ANSI color codes for tests
fn strip_ansi_codes(s: &str) -> String {
console::strip_ansi_codes(s).to_string()
}

#[test]
fn test_format_response_with_ok_status() {
let formatter = ColorFormatter;
let body = mock_response_body(Status::Ok(StatusCode::OK), "https://example.com");
assert_eq!(
formatter.format_response(&body),
"\u{1b}[38;5;2m\u{1b}[1m [200]\u{1b}[0m https://example.com/"
strip_ansi_codes(&formatter.format_response(&body)),
" [200] https://example.com/"
);
}

Expand All @@ -83,8 +89,8 @@ mod tests {
"https://example.com/404",
);
assert_eq!(
formatter.format_response(&body),
"\u{1b}[38;5;197m [ERROR]\u{1b}[0m https://example.com/404"
strip_ansi_codes(&formatter.format_response(&body)),
" [ERROR] https://example.com/404"
);
}

Expand Down
2 changes: 0 additions & 2 deletions lychee-bin/tests/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -266,8 +266,6 @@ mod cli {

#[test]
fn test_resolve_paths() {
// TODO: Refactor the code to clean up base path handling

let mut cmd = main_command();
let dir = fixtures_path().join("resolve_paths");

Expand Down
73 changes: 39 additions & 34 deletions lychee-lib/src/checker/file.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,26 @@
use crate::{utils::fragment_checker::FragmentChecker, Base, ErrorKind, Status, Uri};
use http::StatusCode;
use log::warn;
use std::path::{Path, PathBuf};

use crate::{utils::fragment_checker::FragmentChecker, Base, ErrorKind, Status, Uri};

/// A utility for checking the existence and validity of file-based URIs.
///
/// `FileChecker` is responsible for resolving and validating file paths,
/// handling both absolute and relative paths. It supports base path resolution,
/// fallback extensions for files without extensions, and optional fragment checking.
///
/// This creates a `FileChecker` with a base path, fallback extensions for HTML files,
/// and fragment checking enabled.
#[derive(Debug, Clone)]
pub(crate) struct FileChecker {
/// An optional base path or URL used for resolving relative paths.
base: Option<Base>,
/// A list of file extensions to try if the original path doesn't exist.
fallback_extensions: Vec<String>,
/// Whether to check for the existence of fragments (e.g., #section-id) in HTML files.
include_fragments: bool,
/// A utility for performing fragment checks in HTML files.
fragment_checker: FragmentChecker,
}

Expand All @@ -30,23 +43,28 @@ impl FileChecker {
return ErrorKind::InvalidFilePath(uri.clone()).into();
};

if path.is_absolute() {
let resolved_path = self.resolve_absolute_path(&path);
return self.check_resolved_path(&resolved_path, uri).await;
}

self.check_path(&path, uri).await
let resolved_path = self.resolve_path(&path);
self.check_path(&resolved_path, uri).await
}

async fn check_resolved_path(&self, path: &Path, uri: &Uri) -> Status {
if path.exists() {
if self.include_fragments {
self.check_fragment(path, uri).await
fn resolve_path(&self, path: &Path) -> PathBuf {
if let Some(Base::Local(base_path)) = &self.base {
if path.is_absolute() {
let absolute_base_path = if base_path.is_relative() {
std::env::current_dir()
.unwrap_or_else(|_| PathBuf::new())
.join(base_path)
} else {
base_path.clone()
};

let stripped = path.strip_prefix("/").unwrap_or(path);
absolute_base_path.join(stripped)
} else {
Status::Ok(StatusCode::OK)
base_path.join(path)
}
} else {
ErrorKind::InvalidFilePath(uri.clone()).into()
path.to_path_buf()
}
}

Expand All @@ -55,10 +73,6 @@ impl FileChecker {
return self.check_existing_path(path, uri).await;
}

if path.extension().is_some() {
return ErrorKind::InvalidFilePath(uri.clone()).into();
}

self.check_with_fallback_extensions(path, uri).await
}

Expand All @@ -72,30 +86,21 @@ impl FileChecker {

async fn check_with_fallback_extensions(&self, path: &Path, uri: &Uri) -> Status {
let mut path_buf = path.to_path_buf();

// If the path already has an extension, try it first
if path_buf.extension().is_some() && path_buf.exists() {
return self.check_existing_path(&path_buf, uri).await;
}

// Try fallback extensions
for ext in &self.fallback_extensions {
path_buf.set_extension(ext);
if path_buf.exists() {
return self.check_existing_path(&path_buf, uri).await;
}
}
ErrorKind::InvalidFilePath(uri.clone()).into()
}

fn resolve_absolute_path(&self, path: &Path) -> PathBuf {
if let Some(Base::Local(base_path)) = &self.base {
let absolute_base_path = if base_path.is_relative() {
std::env::current_dir()
.unwrap_or_else(|_| PathBuf::new())
.join(base_path)
} else {
base_path.to_path_buf()
};

let stripped = path.strip_prefix("/").unwrap_or(path);
absolute_base_path.join(stripped)
} else {
path.to_path_buf()
}
ErrorKind::InvalidFilePath(uri.clone()).into()
}

async fn check_fragment(&self, path: &Path, uri: &Uri) -> Status {
Expand Down
53 changes: 53 additions & 0 deletions lychee-lib/src/checker/mail.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
use crate::{ErrorKind, Status, Uri};
use http::StatusCode;

#[cfg(all(feature = "email-check", feature = "native-tls"))]
use check_if_email_exists::{check_email, CheckEmailInput, Reachable};

#[cfg(all(feature = "email-check", feature = "native-tls"))]
use crate::types::mail;

/// A utility for checking the validity of email addresses.
///
/// `EmailChecker` is responsible for validating email addresses,
/// optionally performing reachability checks when the appropriate
/// features are enabled.
#[derive(Debug, Clone)]
pub(crate) struct MailChecker {}

impl MailChecker {
/// Creates a new `EmailChecker`.
pub(crate) const fn new() -> Self {
Self {}
}

/// Check a mail address, or equivalently a `mailto` URI.
///
/// URIs may contain query parameters (e.g. `[email protected]?subject="Hello"`),
/// which are ignored by this check. They are not part of the mail address
/// and instead passed to a mail client.
pub(crate) async fn check_mail(&self, uri: &Uri) -> Status {
#[cfg(all(feature = "email-check", feature = "native-tls"))]
{
self.perform_email_check(uri).await
}

#[cfg(not(all(feature = "email-check", feature = "native-tls")))]
{
Status::Excluded
}
}

#[cfg(all(feature = "email-check", feature = "native-tls"))]
async fn perform_email_check(&self, uri: &Uri) -> Status {
let address = uri.url.path().to_string();
let input = CheckEmailInput::new(address);
let result = &(check_email(&input).await);

if let Reachable::Invalid = result.is_reachable {
ErrorKind::UnreachableEmailAddress(uri.clone(), mail::error_from_output(result)).into()
} else {
Status::Ok(StatusCode::OK)
}
}
}
2 changes: 1 addition & 1 deletion lychee-lib/src/checker/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Checker Module
//!
//! This module contains all checkers, which are responsible for checking the status of a URL.
//! Each checker implements [Handler](crate::chain::Handler).

pub(crate) mod file;
pub(crate) mod mail;
pub(crate) mod website;
8 changes: 4 additions & 4 deletions lychee-lib/src/checker/website.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ use reqwest::Request;
use std::{collections::HashSet, time::Duration};

#[derive(Debug, Clone)]
pub(crate) struct Checker {
pub(crate) struct WebsiteChecker {
retry_wait_time: Duration,
max_retries: u64,
reqwest_client: reqwest::Client,
accepted: Option<HashSet<StatusCode>>,
}

impl Checker {
impl WebsiteChecker {
pub(crate) const fn new(
retry_wait_time: Duration,
max_retries: u64,
Expand Down Expand Up @@ -73,8 +73,8 @@ fn clone_unwrap(request: &Request) -> Request {
}

#[async_trait]
impl Handler<Request, Status> for Checker {
impl Handler<Request, Status> for WebsiteChecker {
async fn handle(&mut self, input: Request) -> ChainResult<Request, Status> {
ChainResult::Done(self.retry_request(input).await)
}
}
}
Loading

0 comments on commit 453cd14

Please sign in to comment.