Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add support for ranges in the --accept option / config field #1167

Merged
merged 23 commits into from
Sep 17, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
8528b8a
Foundation work for accept ranges
Techassi Jul 17, 2023
5e58638
Apply suggestions
Techassi Jul 17, 2023
c11e41d
Cleanup suggestions, fix clippy errors
Techassi Jul 17, 2023
7866393
Add more tests, fix edge cases
Techassi Jul 17, 2023
f3a7101
Fix clippy error
Techassi Jul 17, 2023
253d212
Merge remote-tracking branch 'origin/master' into feat/accept-range-s…
Techassi Jul 17, 2023
6645927
Integrate accept selector into CLI
Techassi Aug 1, 2023
48b2f82
Merge remote-tracking branch 'origin/master' into feat/accept-range-s…
Techassi Aug 1, 2023
4f45d52
Fix clippy errors
Techassi Aug 1, 2023
32f654a
Add support for comma-separated string and sequence of strings for ac…
Techassi Aug 6, 2023
f8aa68b
Merge branch 'master' into feat/accept-range-selectors
mre Aug 17, 2023
4bdf33d
Merge branch 'master' into feat/accept-range-selectors
mre Aug 22, 2023
53323f4
Implement `Default` and `Display` for `AcceptSelector`
Techassi Sep 3, 2023
f352a78
Update deps
Techassi Sep 3, 2023
e1a6aca
Fix cargo fmt errors
Techassi Sep 3, 2023
939d044
Merge branch 'master' into feat/accept-range-selectors
Techassi Sep 3, 2023
359bb2e
Fix clippy errors
Techassi Sep 3, 2023
ed2faab
Fix tests
Techassi Sep 9, 2023
1dbb9ad
Merge branch 'master' into feat/accept-range-selectors
Techassi Sep 9, 2023
1e43493
Print more specific error message when parsing TOML config
mre Sep 9, 2023
67fa627
Set serde default for `AcceptSelector`
mre Sep 9, 2023
430a29b
Merge branch 'master' into feat/accept-range-selectors
Techassi Sep 17, 2023
24e69c8
Fix tests
Techassi Sep 17, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions lychee-lib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ pub use crate::{
collector::Collector,
filter::{Excludes, Filter, Includes},
types::{
uri::valid::Uri, Base, BasicAuthCredentials, BasicAuthSelector, CacheStatus, CookieJar,
ErrorKind, FileType, Input, InputContent, InputSource, Request, Response, ResponseBody,
Result, Status,
uri::valid::Uri, AcceptRange, AcceptRangeParseError, AcceptSelector, Base,
BasicAuthCredentials, BasicAuthSelector, CacheStatus, CookieJar, ErrorKind, FileType,
Input, InputContent, InputSource, Request, Response, ResponseBody, Result, Status,
},
};
5 changes: 5 additions & 0 deletions lychee-lib/src/types/accept/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
mod range;
mod selector;

pub use range::*;
pub use selector::*;
133 changes: 133 additions & 0 deletions lychee-lib/src/types/accept/range.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
use std::{num::ParseIntError, ops::RangeInclusive, str::FromStr};

use once_cell::sync::Lazy;
use regex::Regex;
use thiserror::Error;

static RANGE_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"([0-9]*)?..(=?)([0-9]+)+").unwrap());

/// The [`AcceptRangeParseError`] indicates that the parsing process of an
/// [`AcceptRange`] from a string failed due to various underlying reasons.
#[derive(Debug, Error, PartialEq)]
pub enum AcceptRangeParseError {
/// The string input didn't contain any range pattern.
#[error("No range pattern found")]
NoRangePattern,

/// The start or end index could not be parsed as an integer.
#[error("Failed to parse str as integer")]
ParseIntError(#[from] ParseIntError),

/// The start index is larger than the end index.
#[error("Invalid range indices, only start < end supported")]
InvalidRangeIndices,
}

/// [`AcceptRange`] specifies which HTTP status codes are accepted and
/// considered successful when checking a remote URL.
#[derive(Debug, PartialEq)]
pub struct AcceptRange(RangeInclusive<usize>);

impl FromStr for AcceptRange {
type Err = AcceptRangeParseError;

fn from_str(s: &str) -> Result<Self, Self::Err> {
let captures = RANGE_PATTERN
.captures(s)
.ok_or(AcceptRangeParseError::NoRangePattern)?;

let inclusive = !captures[2].is_empty();

let start: usize = captures[1].parse().unwrap_or_default();
let end: usize = captures[3].parse()?;

if start >= end {
return Err(AcceptRangeParseError::InvalidRangeIndices);
}

match inclusive {
true => Ok(Self::new(start, end)),
false => Ok(Self::new(start, end - 1)),
}
}
}

impl AcceptRange {
/// Creates a new [`AcceptRange`] which matches values between `start` and
/// `end` (both inclusive).
pub fn new(start: usize, end: usize) -> Self {
Self(RangeInclusive::new(start, end))
}

/// Returns the `start` value of this [`AcceptRange`].
pub fn start(&self) -> &usize {
self.0.start()
}

/// Returns the `end` value of this [`AcceptRange`].
pub fn end(&self) -> &usize {
self.0.end()
}

/// Returns wether this [`AcceptRange`] contains `value`.
Techassi marked this conversation as resolved.
Show resolved Hide resolved
pub fn contains(&self, value: usize) -> bool {
self.0.contains(&value)
}

pub(crate) fn update_start(&mut self, new_start: usize) {
self.0 = RangeInclusive::new(new_start, *self.end());
Techassi marked this conversation as resolved.
Show resolved Hide resolved
}

pub(crate) fn update_end(&mut self, new_end: usize) {
self.0 = RangeInclusive::new(*self.start(), new_end);
}
}

#[cfg(test)]
mod test {
use super::*;

#[test]
fn test_from_str() {
let range = AcceptRange::from_str("0..10").unwrap();

assert!(range.contains(0));
assert!(range.contains(9));
assert!(!range.contains(10));
}

#[test]
fn test_from_str_inclusive() {
let range = AcceptRange::from_str("0..=10").unwrap();

assert!(range.contains(0));
assert!(range.contains(9));
assert!(range.contains(10));
assert!(!range.contains(11));
}

#[test]
fn test_from_str_open_start() {
let range = AcceptRange::from_str("..10").unwrap();

assert!(range.contains(0));
assert!(range.contains(9));
assert!(!range.contains(10));
}

#[test]
fn test_from_str_open_start_inclusive() {
let range = AcceptRange::from_str("..=10").unwrap();

assert!(range.contains(0));
assert!(range.contains(9));
assert!(range.contains(10));
assert!(!range.contains(11));
}

#[test]
fn test_from_str_invalid_indices() {
let range = AcceptRange::from_str("10..=0");
assert_eq!(range, Err(AcceptRangeParseError::InvalidRangeIndices))
}
Techassi marked this conversation as resolved.
Show resolved Hide resolved
}
115 changes: 115 additions & 0 deletions lychee-lib/src/types/accept/selector.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
use crate::types::accept::AcceptRange;

/// A [`AcceptSelector`] determines if a returned HTTP status code should be
Techassi marked this conversation as resolved.
Show resolved Hide resolved
/// accepted and thus counted as a valid (not broken) link.
#[derive(Debug)]
pub struct AcceptSelector {
ranges: Vec<AcceptRange>,
}

impl AcceptSelector {
/// Creates a new empty [`AcceptSelector`].
pub fn new() -> Self {
Self { ranges: Vec::new() }
}

/// Adds a range of accepted HTTP status codes to this [`AcceptSelector`].
/// This method merges the new and existing ranges if they overlap.
pub fn add_range(&mut self, range: AcceptRange) -> &mut Self {
// Merge with previous range if possible
if let Some(last) = self.ranges.last_mut() {
// Merge when there is an overlap between the last end value and the
// to be inserted new range start value.
if last.end() >= range.start() {
last.update_end(*range.end());
return self;
}

// Merge when there is an overlap between the last start value and
// the to be inserted new range end value. Only do this, if the new
// start value is smaller than the last start value.
if last.start() <= range.end() && range.start() <= last.start() {
last.update_start(*range.start());
return self;
}
}

// If neither is the case, the ranges have no overlap at all. Just add
// to the list of ranges.
self.ranges.push(range);
self
Techassi marked this conversation as resolved.
Show resolved Hide resolved
}

/// Returns wether this [`AcceptSelector`] contains `value`.
Techassi marked this conversation as resolved.
Show resolved Hide resolved
pub fn contains(&self, value: usize) -> bool {
for range in &self.ranges {
if range.contains(value) {
return true;
}
}

false
Techassi marked this conversation as resolved.
Show resolved Hide resolved
}

pub(crate) fn len(&self) -> usize {
self.ranges.len()
}
}

#[cfg(test)]
mod test {
use super::*;

#[test]
fn test_non_overlapping_ranges() {
let range1 = AcceptRange::new(0, 10);
let range2 = AcceptRange::new(20, 30);

let mut selector = AcceptSelector::new();
selector.add_range(range1).add_range(range2);

assert!(selector.contains(0));
assert!(selector.contains(10));
assert!(selector.contains(20));
assert!(selector.contains(30));

assert!(!selector.contains(15));
assert!(!selector.contains(35));

assert_eq!(selector.len(), 2);
}

#[test]
fn test_overlapping_start_ranges() {
let range1 = AcceptRange::new(8, 20);
let range2 = AcceptRange::new(0, 10);

let mut selector = AcceptSelector::new();
selector.add_range(range1).add_range(range2);

assert!(selector.contains(0));
assert!(selector.contains(10));
assert!(selector.contains(20));

assert!(!selector.contains(35));

assert_eq!(selector.len(), 1);
}

#[test]
fn test_overlapping_end_ranges() {
let range1 = AcceptRange::new(0, 10);
let range2 = AcceptRange::new(8, 20);

let mut selector = AcceptSelector::new();
selector.add_range(range1).add_range(range2);

assert!(selector.contains(0));
assert!(selector.contains(10));
assert!(selector.contains(20));

assert!(!selector.contains(35));

assert_eq!(selector.len(), 1);
}
}
2 changes: 2 additions & 0 deletions lychee-lib/src/types/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#![allow(unreachable_pub)]

mod accept;
mod base;
mod basic_auth;
mod cache;
Expand All @@ -13,6 +14,7 @@ mod response;
mod status;
pub(crate) mod uri;

pub use accept::*;
pub use base::Base;
pub use basic_auth::{BasicAuthCredentials, BasicAuthSelector};
pub use cache::CacheStatus;
Expand Down
Loading