From d3e9ebee2cb9dd7e1ed1ae3d6ed4bea6e05a139d Mon Sep 17 00:00:00 2001 From: Juniper Tyree <50025784+juntyr@users.noreply.github.com> Date: Mon, 15 Jan 2024 05:46:19 +0200 Subject: [PATCH] Fix more serde flatten bugs in the fuzzer (#523) * Work on more fuzzer bugs with serde * Avoid quadratic backtracking for unterminated strings in struct type check * Account for variant tag and content in length minimisation * Improve invalid raw ron detection in fuzz * Add fix for untagged flattened None * Fix another weird fuzz case * Fix rustfmt and remove unfixed tests * Rebase with explicit_struct_names extension * Remove debug print --- README.md | 2 +- fuzz/fuzz_targets/bench/lib.rs | 119 ++++++++++++++++++++++++--- src/parse.rs | 14 +++- tests/502_known_bugs.rs | 145 ++++++++++++++++++++++++++++++++- 4 files changed, 262 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index ec1ed283..f365495d 100644 --- a/README.md +++ b/README.md @@ -187,7 +187,7 @@ While data structures with any of these attributes should generally roundtrip th - ron only supports string keys inside maps flattened into structs - internally (or adjacently) tagged or untagged enum variants or `#[serde(flatten)]`ed fields must not contain: - - struct names, e.g. by enabling the `PrettyConfig::struct_names` setting + - struct names, e.g. by enabling the `#[enable(explicit_struct_names)]` extension or the `PrettyConfig::struct_names` setting - newtypes - zero-length arrays / tuples / tuple structs / structs / tuple variants / struct variants - `Option`s with `#[enable(implicit_some)]` must not contain any of these or a unit, unit struct, or an untagged unit variant diff --git a/fuzz/fuzz_targets/bench/lib.rs b/fuzz/fuzz_targets/bench/lib.rs index 7e1f89c1..b61b592e 100644 --- a/fuzz/fuzz_targets/bench/lib.rs +++ b/fuzz/fuzz_targets/bench/lib.rs @@ -5063,21 +5063,34 @@ impl<'a> SerdeDataType<'a> { SerdeDataValue::UnitStruct } SerdeDataType::Newtype { name, inner } => { - let inner = inner.arbitrary_value(u, pretty)?; + let inner_value = inner.arbitrary_value(u, pretty)?; // ron::value::RawValue cannot safely be constructed from syntactically invalid ron if *name == RAW_VALUE_TOKEN { - if let SerdeDataValue::String(ron) = &inner { - if ron::value::RawValue::from_ron(ron).is_err() { - return Err(arbitrary::Error::IncorrectFormat); - } + // Hacky way to find out if a string is serialised: + // 1. serialise into RON + // 2. try to deserialise into a String + let Ok(inner_ron) = ron::to_string(&BorrowedTypedSerdeData { + ty: inner, + value: &inner_value, + }) else { + return Err(arbitrary::Error::IncorrectFormat); + }; + + let Ok(ron) = ron::from_str::(&inner_ron) else { + return Err(arbitrary::Error::IncorrectFormat); + }; + + // Check that the raw value content is valid + if ron::value::RawValue::from_ron(&ron).is_err() { + return Err(arbitrary::Error::IncorrectFormat); } } name_length += name.len(); SerdeDataValue::Newtype { - inner: Box::new(inner), + inner: Box::new(inner_value), } } SerdeDataType::TupleStruct { name, fields } => { @@ -5127,13 +5140,19 @@ impl<'a> SerdeDataType<'a> { if !ty.supported_flattened_map_inside_flatten_field( pretty, *flatten, + false, &mut has_flatten_map, &mut has_unknown_key_inside_flatten, ) { // Flattened fields with maps must fulfil certain criteria return Err(arbitrary::Error::IncorrectFormat); } - if *flatten && pretty.struct_names { + if *flatten + && (pretty.struct_names + || pretty + .extensions + .contains(Extensions::EXPLICIT_STRUCT_NAMES)) + { // BUG: struct names inside flattend structs do not roundtrip return Err(arbitrary::Error::IncorrectFormat); } @@ -5152,7 +5171,7 @@ impl<'a> SerdeDataType<'a> { representation, SerdeEnumRepresentation::Untagged | SerdeEnumRepresentation::InternallyTagged { tag: _ } - if pretty.struct_names + if pretty.struct_names || pretty.extensions.contains(Extensions::EXPLICIT_STRUCT_NAMES) ) { return Err(arbitrary::Error::IncorrectFormat); } @@ -5178,6 +5197,15 @@ impl<'a> SerdeDataType<'a> { name_length += variant.len(); + match representation { + SerdeEnumRepresentation::ExternallyTagged + | SerdeEnumRepresentation::Untagged => (), + SerdeEnumRepresentation::InternallyTagged { tag } => name_length += tag.len(), + SerdeEnumRepresentation::AdjacentlyTagged { tag, content } => { + name_length += tag.len() + content.len(); + } + }; + let value = match ty { SerdeDataVariantType::Unit => SerdeDataVariantValue::Unit, SerdeDataVariantType::TaggedOther => { @@ -5276,13 +5304,19 @@ impl<'a> SerdeDataType<'a> { if !ty.supported_flattened_map_inside_flatten_field( pretty, *flatten, + false, &mut has_flatten_map, &mut has_unknown_key_inside_flatten, ) { // Flattened fields with maps must fulfil certain criteria return Err(arbitrary::Error::IncorrectFormat); } - if *flatten && pretty.struct_names { + if *flatten + && (pretty.struct_names + || pretty + .extensions + .contains(Extensions::EXPLICIT_STRUCT_NAMES)) + { // BUG: struct names inside flattend structs do not roundtrip return Err(arbitrary::Error::IncorrectFormat); } @@ -5666,6 +5700,7 @@ impl<'a> SerdeDataType<'a> { &self, pretty: &PrettyConfig, is_flattened: bool, + is_untagged: bool, has_flattened_map: &mut bool, has_unknown_key: &mut bool, ) -> bool { @@ -5690,10 +5725,17 @@ impl<'a> SerdeDataType<'a> { SerdeDataType::String => true, SerdeDataType::ByteBuf => true, SerdeDataType::Option { inner } => { - if is_flattened || pretty.extensions.contains(Extensions::IMPLICIT_SOME) { + if is_flattened && is_untagged { + // BUG: (serde) + // - serialising a flattened None only produces an empty struct + // - deserialising content from an empty flatten struct produces an empty map + // - deserialising an option from a content empty map produces some + false + } else if is_flattened || pretty.extensions.contains(Extensions::IMPLICIT_SOME) { inner.supported_flattened_map_inside_flatten_field( pretty, is_flattened, + is_untagged, has_flattened_map, has_unknown_key, ) @@ -5724,6 +5766,7 @@ impl<'a> SerdeDataType<'a> { inner.supported_flattened_map_inside_flatten_field( pretty, is_flattened, + is_untagged, has_flattened_map, has_unknown_key, ) @@ -5734,10 +5777,25 @@ impl<'a> SerdeDataType<'a> { SerdeDataType::TupleStruct { name: _, fields: _ } => true, SerdeDataType::Struct { name: _, - tag: _, + tag, fields, } => { if is_flattened { + // TODO: is this really the correct requirement? + // case clusterfuzz-testcase-minimized-arbitrary-6364230921879552 + if tag.is_some() { + if *has_flattened_map { + // BUG: a flattened map will also see the unknown key (serde) + return false; + } + if *has_unknown_key && is_untagged { + // BUG: an untagged struct will use a map intermediary and see + // the unknown key (serde) + return false; + } + *has_unknown_key = true; + } + fields .1 .iter() @@ -5746,6 +5804,7 @@ impl<'a> SerdeDataType<'a> { field.supported_flattened_map_inside_flatten_field( pretty, *is_flattened, + is_untagged, has_flattened_map, has_unknown_key, ) @@ -5787,16 +5846,33 @@ impl<'a> SerdeDataType<'a> { inner.supported_flattened_map_inside_flatten_field( pretty, is_flattened, + true, has_flattened_map, has_unknown_key, ) } else if is_flattened { + if matches!(representation, SerdeEnumRepresentation::ExternallyTagged) && *has_unknown_key { + // BUG: flattened enums are deserialised using the content deserialiser, + // which expects to see a map with just one field (serde) + return false; + } + if matches!(representation, SerdeEnumRepresentation::InternallyTagged { tag: _ }) { // BUG: an flattened internally tagged newtype alongside other flattened data // must not contain a unit, unit struct, or untagged unit variant if !inner.supported_inside_internally_tagged_newtype(true) { return false; } + + if !inner.supported_flattened_map_inside_flatten_field( + pretty, + is_flattened, + false, + has_flattened_map, + has_unknown_key, + ) { + return false; + } } if *has_flattened_map { @@ -5820,14 +5896,33 @@ impl<'a> SerdeDataType<'a> { true } SerdeDataVariantType::Struct { fields } => { - if (is_flattened && !matches!(representation, SerdeEnumRepresentation::Untagged)) || matches!(representation, SerdeEnumRepresentation::Untagged if fields.2.iter().any(|x| *x)) + if is_flattened { + if *has_flattened_map { + // BUG: a flattened map will also see the unknown key (serde) + return false; + } + *has_unknown_key = true; + } + + if matches!(representation, SerdeEnumRepresentation::Untagged if is_flattened || fields.2.iter().any(|x| *x)) { if *has_flattened_map { // BUG: a flattened map will also see the unknown key (serde) return false; } + if *has_unknown_key { + // BUG: a flattened untagged enum struct will also see the unknown key (serde) + return false; + } *has_unknown_key = true; } + + if is_flattened && matches!(representation, SerdeEnumRepresentation::ExternallyTagged) && *has_unknown_key { + // BUG: flattened enums are deserialised using the content deserialiser, + // which expects to see a map with just one field (serde) + return false; + } + true } }), diff --git a/src/parse.rs b/src/parse.rs index 0144d0ad..08fd7fe8 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -612,12 +612,18 @@ impl<'a> Parser<'a> { parser.set_cursor(cursor_backup); } let cursor_backup = parser.cursor; - if parser.string().is_err() { - parser.set_cursor(cursor_backup); + match parser.string() { + Ok(_) => (), + // prevent quadratic complexity backtracking for unterminated string + Err(err @ (Error::ExpectedStringEnd | Error::Eof)) => return Err(err), + Err(_) => parser.set_cursor(cursor_backup), } let cursor_backup = parser.cursor; - if parser.byte_string().is_err() { - parser.set_cursor(cursor_backup); + match parser.byte_string() { + Ok(_) => (), + // prevent quadratic complexity backtracking for unterminated byte string + Err(err @ (Error::ExpectedStringEnd | Error::Eof)) => return Err(err), + Err(_) => parser.set_cursor(cursor_backup), } let c = parser.next_char()?; diff --git a/tests/502_known_bugs.rs b/tests/502_known_bugs.rs index 6c300824..11f8abf5 100644 --- a/tests/502_known_bugs.rs +++ b/tests/502_known_bugs.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::collections::{BTreeMap, HashMap}; use ron::{error::Position, error::SpannedError, extensions::Extensions, ser::PrettyConfig, Error}; use serde::{Deserialize, Serialize}; @@ -2885,6 +2885,149 @@ fn untagged_unit_variant_inside_internally_tagged_newtype_variant_inside_multi_f ); } +#[test] +fn flattened_externally_tagged_newtype_variant_beside_flattened_intenally_tagged_enum() { + #[derive(PartialEq, Debug, Serialize, Deserialize)] + enum ExternallyTagged { + Newtype(()), + Other(()), + } + + #[derive(PartialEq, Debug, Serialize, Deserialize)] + #[serde(tag = "tag")] + enum InternallyTagged { + Newtype(ExternallyTagged), + } + + #[derive(PartialEq, Debug, Serialize, Deserialize)] + struct Flattened { + #[serde(flatten)] + a: InternallyTagged, + #[serde(flatten)] + b: ExternallyTagged, + } + + assert_eq!( + check_roundtrip( + &Flattened { + a: InternallyTagged::Newtype(ExternallyTagged::Other(())), + b: ExternallyTagged::Newtype(()), + }, + PrettyConfig::default() + ), + Err(Err(SpannedError { + code: Error::InvalidValueForType { + expected: String::from("map with a single key"), + found: String::from("a map"), + }, + position: Position { line: 5, col: 1 } + })) + ); +} + +#[test] +fn flattened_externally_tagged_struct_variant_beside_flattened_intenally_tagged_enum() { + #[derive(PartialEq, Debug, Serialize, Deserialize)] + enum ExternallyTagged { + Struct { a: i32 }, + Other(()), + } + + #[derive(PartialEq, Debug, Serialize, Deserialize)] + #[serde(tag = "tag")] + enum InternallyTagged { + Newtype(ExternallyTagged), + } + + #[derive(PartialEq, Debug, Serialize, Deserialize)] + struct Flattened { + #[serde(flatten)] + a: InternallyTagged, + #[serde(flatten)] + b: ExternallyTagged, + } + + assert_eq!( + check_roundtrip( + &Flattened { + a: InternallyTagged::Newtype(ExternallyTagged::Other(())), + b: ExternallyTagged::Struct { a: 42 }, + }, + PrettyConfig::default() + ), + Err(Err(SpannedError { + code: Error::InvalidValueForType { + expected: String::from("map with a single key"), + found: String::from("a map"), + }, + position: Position { line: 7, col: 1 } + })) + ); +} + +#[test] +fn flattened_map_inside_option_beside_flattened_struct_variant() { + #[derive(PartialEq, Debug, Serialize, Deserialize)] + #[serde(untagged)] + enum Untagged { + Struct { a: i32 }, + } + + #[derive(PartialEq, Debug, Serialize, Deserialize)] + struct Flattened { + #[serde(flatten)] + a: Untagged, + #[serde(flatten)] + b: Option>, + } + + assert_eq!( + check_roundtrip( + &Flattened { + a: Untagged::Struct { + a: 42, + }, + b: Some([(String::from("b"), 24)].into_iter().collect()), + }, + PrettyConfig::default() + ), + Err(Ok(Error::Message(String::from("ROUNDTRIP error: Flattened { a: Struct { a: 42 }, b: Some({\"b\": 24}) } != Flattened { a: Struct { a: 42 }, b: Some({\"a\": 42, \"b\": 24}) }")))) + ); +} + +#[test] +fn flattened_untagged_struct_beside_flattened_untagged_struct() { + #[derive(PartialEq, Debug, Serialize, Deserialize)] + #[serde(untagged)] + enum Untagged { + Struct { a: i32 }, + Other { b: i32 }, + } + + #[derive(PartialEq, Debug, Serialize, Deserialize)] + struct Flattened { + #[serde(flatten)] + a: Untagged, + #[serde(flatten)] + b: Untagged, + } + + assert_eq!( + check_roundtrip( + &Flattened { + a: Untagged::Struct { + a: 42, + }, + b: Untagged::Other { + b: 24, + }, + }, + PrettyConfig::default() + ), + Err(Ok(Error::Message(String::from("ROUNDTRIP error: Flattened { a: Struct { a: 42 }, b: Other { b: 24 } } != Flattened { a: Struct { a: 42 }, b: Struct { a: 42 } }")))) + ); +} + fn check_roundtrip( val: &T, config: PrettyConfig,