Skip to content

Commit

Permalink
Cookie Support (#1146)
Browse files Browse the repository at this point in the history
This is a very conservative and limited implementation of cookie support.

The goal is to ship an MVP, which covers 80% of the use-cases.
When you run lychee with --cookie-jar cookies.json, all cookies will be stored in cookies.json, one cookie per line.
This makes cookies easy to edit by hand if needed, although this is an advanced use-case and the API for the format is not guaranteed to be stable.

Fixes: #645, #715
Partially fixes: #1108
  • Loading branch information
mre authored Jul 13, 2023
1 parent 40ba187 commit 14e7487
Show file tree
Hide file tree
Showing 16 changed files with 297 additions and 21 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,6 @@ Cargo.lock

# Config smoketest report file
.config.dummy.report.md

# Other
cookies.json
88 changes: 88 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ docker-run: ## Run Docker image
docker-push: ## Push image to Docker Hub
docker push $(IMAGE_NAME)

.PHONY: clean
clean: ## Clean up build artifacts
cargo clean

.PHONY: build
build: ## Build Rust code locally
cargo build
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ outdated information.
| [Use as library] | ![yes] | ![yes] | ![no] | ![yes] | ![yes] | ![no] | ![yes] | ![no] |
| Quiet mode | ![yes] | ![no] | ![no] | ![no] | ![yes] | ![yes] | ![yes] | ![yes] |
| [Config file] | ![yes] | ![no] | ![no] | ![no] | ![yes] | ![yes] | ![yes] | ![no] |
| Cookies | ![yes] | ![no] | ![yes] | ![no] | ![no] | ![yes] | ![no] | ![yes] |
| Recursion | ![no] | ![no] | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] | ![no] |
| Amazing lychee logo | ![yes] | ![no] | ![no] | ![no] | ![no] | ![no] | ![no] | ![no] |

Expand Down Expand Up @@ -407,6 +408,9 @@ Options:
--require-https
When HTTPS is available, treat HTTP links as errors
--cookie-jar <COOKIE_JAR>
Tell lychee to read cookies from the given file. Cookies will be stored in the cookie jar and sent with requests. New cookies will be stored in the cookie jar and existing cookies will be updated
-h, --help
Print help (see a summary with '-h')
Expand Down
1 change: 1 addition & 0 deletions lychee-bin/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ openssl-sys = { version = "0.9.90", optional = true }
pad = "0.1.6"
regex = "1.9.1"
reqwest = { version = "0.11.18", default-features = false, features = ["gzip", "json"] }
reqwest_cookie_store = "0.5.0"
# Make build work on Apple Silicon.
# See https://github.com/briansmith/ring/issues/1163
# This is necessary for the homebrew build
Expand Down
6 changes: 4 additions & 2 deletions lychee-bin/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@ use anyhow::{Context, Result};
use http::StatusCode;
use lychee_lib::{Client, ClientBuilder};
use regex::RegexSet;
use reqwest_cookie_store::CookieStoreMutex;
use std::sync::Arc;
use std::{collections::HashSet, str::FromStr};

/// Creates a client according to the command-line config
pub(crate) fn create(cfg: &Config) -> Result<Client> {
pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc<CookieStoreMutex>>) -> Result<Client> {
let headers = parse_headers(&cfg.header)?;

let timeout = parse_duration_secs(cfg.timeout);
let retry_wait_time = parse_duration_secs(cfg.retry_wait_time);
let method: reqwest::Method = reqwest::Method::from_str(&cfg.method.to_uppercase())?;
Expand Down Expand Up @@ -56,6 +57,7 @@ pub(crate) fn create(cfg: &Config) -> Result<Client> {
.schemes(HashSet::from_iter(schemes))
.accepted(accepted)
.require_https(cfg.require_https)
.cookie_jar(cookie_jar.cloned())
.build()
.client()
.context("Failed to create request client")
Expand Down
2 changes: 1 addition & 1 deletion lychee-bin/src/commands/check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ where
let cache = params.cache;
let accept = params.cfg.accept;

let pb = if params.cfg.no_progress {
let pb = if params.cfg.no_progress || params.cfg.verbose.log_level() >= log::Level::Info {
None
} else {
Some(init_progress_bar("Extracting links"))
Expand Down
32 changes: 30 additions & 2 deletions lychee-bin/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,13 @@ use log::{error, info, warn};
#[cfg(feature = "native-tls")]
use openssl_sys as _; // required for vendored-openssl feature

use openssl_sys as _;
use options::LYCHEE_CONFIG_FILE;
use ring as _; // required for apple silicon

use lychee_lib::{BasicAuthExtractor, Collector};
use lychee_lib::BasicAuthExtractor;
use lychee_lib::Collector;
use lychee_lib::CookieJar;

mod archive;
mod cache;
Expand Down Expand Up @@ -188,6 +191,14 @@ fn load_config() -> Result<LycheeOptions> {
Ok(opts)
}

/// Load cookie jar from path (if exists)
fn load_cookie_jar(cfg: &Config) -> Result<Option<CookieJar>> {
match &cfg.cookie_jar {
Some(path) => Ok(CookieJar::load(path.clone()).map(Some)?),
None => Ok(None),
}
}

#[must_use]
/// Load cache (if exists and is still valid)
/// This returns an `Option` as starting without a cache is a common scenario
Expand Down Expand Up @@ -290,13 +301,24 @@ async fn run(opts: &LycheeOptions) -> Result<i32> {

let requests = collector.collect_links(inputs).await;

let client = client::create(&opts.config)?;
let cache = load_cache(&opts.config).unwrap_or_default();
let cache = Arc::new(cache);

let cookie_jar = load_cookie_jar(&opts.config).with_context(|| {
format!(
"Cannot load cookie jar from path `{}`",
opts.config
.cookie_jar
.as_ref()
.map_or_else(|| "<none>".to_string(), |p| p.display().to_string())
)
})?;

let response_formatter: Box<dyn ResponseFormatter> =
formatters::get_formatter(&opts.config.format);

let client = client::create(&opts.config, cookie_jar.as_deref())?;

let params = CommandParams {
client,
cache,
Expand Down Expand Up @@ -348,6 +370,12 @@ async fn run(opts: &LycheeOptions) -> Result<i32> {
if opts.config.cache {
cache.store(LYCHEE_CACHE_FILE)?;
}

if let Some(cookie_jar) = cookie_jar.as_ref() {
info!("Saving cookie jar");
cookie_jar.save().context("Cannot save cookie jar")?;
}

exit_code
};

Expand Down
8 changes: 8 additions & 0 deletions lychee-bin/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,13 @@ pub(crate) struct Config {
#[arg(long)]
#[serde(default)]
pub(crate) require_https: bool,

/// Tell lychee to read cookies from the given file.
/// Cookies will be stored in the cookie jar and sent with requests.
/// New cookies will be stored in the cookie jar and existing cookies will be updated.
#[arg(long)]
#[serde(default)]
pub(crate) cookie_jar: Option<PathBuf>,
}

impl Config {
Expand Down Expand Up @@ -406,6 +413,7 @@ impl Config {
glob_ignore_case: false;
output: None;
require_https: false;
cookie_jar: None;
}

if self
Expand Down
52 changes: 45 additions & 7 deletions lychee-bin/tests/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ mod cli {
use pretty_assertions::assert_eq;
use serde::Serialize;
use serde_json::Value;
use tempfile::NamedTempFile;
use uuid::Uuid;
use wiremock::{matchers::basic_auth, Mock, ResponseTemplate};

Expand Down Expand Up @@ -886,8 +887,12 @@ mod cli {
/// and even if they are invalid, we don't know if they will be valid in the
/// future.
///
/// Since we cannot test this with our mock server (because hyper panics on invalid status codes)
/// we use LinkedIn as a test target.
/// Since we cannot test this with our mock server (because hyper panics on
/// invalid status codes) we use LinkedIn as a test target.
///
/// Unfortunately, LinkedIn does not always return 999, so this is a flaky
/// test. We only check that the cache file doesn't contain any invalid
/// status codes.
#[tokio::test]
async fn test_skip_cache_unknown_status_code() -> Result<()> {
let base_path = fixtures_path().join("cache");
Expand All @@ -910,13 +915,20 @@ mod cli {
.arg("--")
.arg("-")
.assert()
.stderr(contains(format!("[999] {unknown_url} | Unknown status")));
// LinkedIn does not always return 999, so we cannot check for that
// .stderr(contains(format!("[999] {unknown_url} | Unknown status")))
;

// The cache file should be empty, because the only checked URL is
// unsupported and we don't want to cache that. It might be supported in
// future versions.
// If the status code was 999, the cache file should be empty
// because we do not want to cache unknown status codes
let buf = fs::read(&cache_file).unwrap();
assert!(buf.is_empty());
if !buf.is_empty() {
let data = String::from_utf8(buf)?;
// The cache file should not contain any invalid status codes
// In that case, we expect a single entry with status code 200
assert!(!data.contains("999"));
assert!(data.contains("200"));
}

// clear the cache file
fs::remove_file(&cache_file)?;
Expand Down Expand Up @@ -1309,4 +1321,30 @@ mod cli {

Ok(())
}

#[tokio::test]
async fn test_cookie_jar() -> Result<()> {
// Create a random cookie jar file
let cookie_jar = NamedTempFile::new()?;

let mut cmd = main_command();
cmd.arg("--cookie-jar")
.arg(cookie_jar.path().to_str().unwrap())
.arg("-")
// Using Google as a test target because I couldn't
// get the mock server to work with the cookie jar
.write_stdin("https://google.com")
.assert()
.success();

// check that the cookie jar file contains the expected cookies
let file = std::fs::File::open(cookie_jar.path()).map(std::io::BufReader::new)?;
let cookie_store = reqwest_cookie_store::CookieStore::load_json(file).unwrap();
let all_cookies = cookie_store.iter_any().collect::<Vec<_>>();

assert!(!all_cookies.is_empty());
assert!(all_cookies.iter().all(|c| c.domain() == Some("google.com")));

Ok(())
}
}
3 changes: 2 additions & 1 deletion lychee-lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ pulldown-cmark = "0.9.3"
regex = "1.9.1"
# Use trust-dns to avoid lookup failures on high concurrency
# https://github.com/seanmonstar/reqwest/issues/296
reqwest = { version = "0.11.18", default-features = false, features = ["gzip", "trust-dns"] }
reqwest = { version = "0.11.18", features = ["gzip", "trust-dns", "cookies"] }
reqwest_cookie_store = "0.5.0"
# Make build work on Apple Silicon.
# See https://github.com/briansmith/ring/issues/1163
# This is necessary for the homebrew build
Expand Down
Loading

0 comments on commit 14e7487

Please sign in to comment.