Skip to content

Commit

Permalink
Respect timeout when retrieving archived link (#1526)
Browse files Browse the repository at this point in the history
  • Loading branch information
thomas-zahner authored Oct 12, 2024
1 parent 3d414c2 commit 17f62ae
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 9 deletions.
10 changes: 7 additions & 3 deletions lychee-bin/src/archive/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use reqwest::{Error, Url};
use serde::{Deserialize, Serialize};
use std::fmt::Display;
use std::{fmt::Display, time::Duration};
use strum::{Display, EnumIter, EnumString, VariantNames};

use crate::color::{color, GREEN, PINK};
Expand Down Expand Up @@ -32,11 +32,15 @@ pub(crate) enum Archive {
}

impl Archive {
pub(crate) async fn get_link(&self, original: &Url) -> Result<Option<Url>, Error> {
pub(crate) async fn get_link(
&self,
original: &Url,
timeout: Duration,
) -> Result<Option<Url>, Error> {
let function = match self {
Archive::WaybackMachine => wayback::get_wayback_link,
};

function(original).await
function(original, timeout).await
}
}
16 changes: 11 additions & 5 deletions lychee-bin/src/archive/wayback/mod.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,23 @@
use std::time::Duration;

use once_cell::sync::Lazy;
use serde::de::Error as SerdeError;
use serde::{Deserialize, Deserializer};

use http::StatusCode;
use reqwest::{Error, Url};
use reqwest::{Client, Error, Url};
static WAYBACK_URL: Lazy<Url> =
Lazy::new(|| Url::parse("https://archive.org/wayback/available").unwrap());

pub(crate) async fn get_wayback_link(url: &Url) -> Result<Option<Url>, Error> {
pub(crate) async fn get_wayback_link(url: &Url, timeout: Duration) -> Result<Option<Url>, Error> {
let mut archive_url: Url = WAYBACK_URL.clone();
archive_url.set_query(Some(&format!("url={url}")));

let response = reqwest::get(archive_url)
let response = Client::builder()
.timeout(timeout)
.build()?
.get(archive_url)
.send()
.await?
.json::<InternetArchiveResponse>()
.await?;
Expand Down Expand Up @@ -74,7 +80,7 @@ mod tests {
// This test can be flaky, because the wayback machine does not always
// return a suggestion. Retry a few times if needed.
for _ in 0..3 {
match get_wayback_link(&target_url).await {
match get_wayback_link(&target_url, Duration::from_secs(20)).await {
Ok(Some(suggested_url)) => {
// Ensure the host is correct
let host = suggested_url
Expand Down Expand Up @@ -124,7 +130,7 @@ mod tests {
.try_into()
.unwrap();

let response = get_wayback_link(url).await?;
let response = get_wayback_link(url, Duration::from_secs(20)).await?;
assert_eq!(response, None);
Ok(())
}
Expand Down
5 changes: 4 additions & 1 deletion lychee-bin/src/commands/check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use lychee_lib::{ResponseBody, Status};
use crate::archive::{Archive, Suggestion};
use crate::formatters::get_response_formatter;
use crate::formatters::response::ResponseFormatter;
use crate::parse::parse_duration_secs;
use crate::verbosity::Verbosity;
use crate::{cache::Cache, stats::ResponseStats, ExitCode};

Expand Down Expand Up @@ -95,6 +96,7 @@ where
&mut stats,
!params.cfg.no_progress,
max_concurrency,
parse_duration_secs(params.cfg.timeout),
)
.await;
}
Expand All @@ -112,6 +114,7 @@ async fn suggest_archived_links(
stats: &mut ResponseStats,
show_progress: bool,
max_concurrency: usize,
timeout: Duration,
) {
let failed_urls = &get_failed_urls(stats);
let bar = if show_progress {
Expand All @@ -125,7 +128,7 @@ async fn suggest_archived_links(
let suggestions = Mutex::new(&mut stats.suggestion_map);

futures::stream::iter(failed_urls)
.map(|(input, url)| (input, url, archive.get_link(url)))
.map(|(input, url)| (input, url, archive.get_link(url, timeout)))
.for_each_concurrent(max_concurrency, |(input, url, future)| async {
if let Ok(Some(suggestion)) = future.await {
suggestions
Expand Down

0 comments on commit 17f62ae

Please sign in to comment.