diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml deleted file mode 100644 index 3c13d1b..0000000 --- a/.github/workflows/rust.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: Rust - -on: - push: - branches: [ master ] - pull_request: - branches: [ master ] - -env: - CARGO_TERM_COLOR: always - -jobs: - build: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - - name: Build - run: cargo build --verbose - - name: Run tests - run: cargo test --verbose diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..945d8af --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,46 @@ +name: Test + +on: + push: + branches: [ master ] + + pull_request: + branches: [ master ] + +env: + CARGO_TERM_COLOR: always + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v3 + + - name: Generate Dependencies Hash + id: cargo_toml_hash + uses: KEINOS/gh-action-hash-for-cache@e0515fd0280f1ef616e13cef3b2b9566938da2c4 + with: + path: | + ./Cargo.toml + + - name: Retrieve Cargo's Index - Try Cache + id: cargo_index_cache + uses: actions/cache/restore@v3 + with: + path: ~/.cargo + key: ${{ runner.os }}-cargo-index-${{ steps.cargo_toml_hash.outputs.hash }} + + - name: Build + run: cargo build --verbose + + - name: Retrieve Cargo's Index - Save to Cache + if: steps.cargo_index_cache.outputs.cache-hit != 'true' + uses: actions/cache/save@v3 + with: + path: ~/.cargo + key: ${{ runner.os }}-cargo-index-${{ steps.cargo_toml_hash.outputs.hash }} + + + - name: Run tests + run: cargo test --verbose diff --git a/Cargo.toml b/Cargo.toml index 8d960af..7964d31 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,8 @@ serde = "1.0" serde_json = "1.0" serde_derive = "1.0" minidom = "0.12" +regex = "1.8.3" [features] json_types = [] # Enable to enforce fixed JSON data types for certain XML nodes +regex_path = ["json_types"] # Enable Regex matching for JSON types diff --git a/src/lib.rs b/src/lib.rs index 12ad33c..63c0723 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,8 @@ +#![allow(clippy::items_after_test_module)] +#![allow(clippy::single_match)] +#![allow(clippy::single_char_pattern)] +#![allow(clippy::needless_borrow)] +#![allow(clippy::ptr_arg)] //! # quickxml_to_serde //! Fast and flexible conversion from XML to JSON using [quick-xml](https://github.com/tafia/quick-xml) //! and [serde](https://github.com/serde-rs/json). Inspired by [node2object](https://github.com/vorot93/node2object). @@ -53,12 +58,18 @@ extern crate minidom; extern crate serde_json; +#[cfg(feature = "regex_path")] +extern crate regex; + use minidom::{Element, Error}; use serde_json::{Map, Number, Value}; #[cfg(feature = "json_types")] use std::collections::HashMap; use std::str::FromStr; +#[cfg(feature = "regex_path")] +use regex::Regex; + #[cfg(test)] mod tests; @@ -89,6 +100,41 @@ pub enum JsonArray { Infer(JsonType), } +/// Used as a parameter for `Config.add_json_type_override`. Defines how the XML path should be matched +/// in order to apply the JSON type overriding rules. This enumerator exists to allow the same function +/// to be used for multiple different types of path matching rules. +#[derive(Debug)] +pub enum PathMatcher { + /// An absolute path starting with a leading slash (`/`). E.g. `/a/b/c/@d`. + /// It's implicitly converted from `&str` and automatically includes the leading slash. + Absolute(String), + /// A regex that will be checked against the XML path. E.g. `(\w/)*c$`. + /// It's implicitly converted from `regex::Regex`. + #[cfg(feature = "regex_path")] + Regex(Regex), +} + +// For retro-compatibility and for syntax's sake, a string may be coerced into an absolute path. +impl From<&str> for PathMatcher { + fn from(value: &str) -> Self { + let path_with_leading_slash = if value.starts_with("/") { + value.into() + } else { + ["/", value].concat() + }; + + PathMatcher::Absolute(path_with_leading_slash) + } +} + +// ... While a Regex may be coerced into a regex path. +#[cfg(feature = "regex_path")] +impl From for PathMatcher { + fn from(value: Regex) -> Self { + PathMatcher::Regex(value) + } +} + /// Defines which data type to apply in JSON format for consistency of output. /// E.g., the range of XML values for the same node type may be `1234`, `001234`, `AB1234`. /// It is impossible to guess with 100% consistency which data type to apply without seeing @@ -130,7 +176,7 @@ pub struct Config { pub xml_text_node_prop_name: String, /// Defines how empty elements like `` should be handled. pub empty_element_handling: NullValue, - /// A list of XML paths with their JsonType overrides. They take precedence over the document-wide `json_type` + /// A map of XML paths with their JsonArray overrides. They take precedence over the document-wide `json_type` /// property. The path syntax is based on xPath: literal element names and attribute names prefixed with `@`. /// The path must start with a leading `/`. It is a bit of an inconvenience to remember about it, but it saves /// an extra `if`-check in the code to improve the performance. @@ -140,6 +186,10 @@ pub struct Config { /// - path for `b` text node (007): `/a/b` #[cfg(feature = "json_types")] pub json_type_overrides: HashMap, + /// A list of pairs of regex and JsonArray overrides. They take precedence over both the document-wide `json_type` + /// property and the `json_type_overrides` property. The path syntax is based on xPath just like `json_type_overrides`. + #[cfg(feature = "regex_path")] + pub json_regex_type_overrides: Vec<(Regex, JsonArray)>, } impl Config { @@ -154,6 +204,8 @@ impl Config { empty_element_handling: NullValue::EmptyObject, #[cfg(feature = "json_types")] json_type_overrides: HashMap::new(), + #[cfg(feature = "regex_path")] + json_regex_type_overrides: Vec::new(), } } @@ -171,6 +223,8 @@ impl Config { empty_element_handling, #[cfg(feature = "json_types")] json_type_overrides: HashMap::new(), + #[cfg(feature = "regex_path")] + json_regex_type_overrides: Vec::new(), } } @@ -179,16 +233,27 @@ impl Config { /// - **XML**: `007` /// - path for `c`: `/a/b/@c` /// - path for `b` text node (007): `/a/b` - /// This function will add the leading `/` if it's missing. + /// - regex path for any `element` node: `(\w/)*element$` [requires `regex_path` feature] #[cfg(feature = "json_types")] - pub fn add_json_type_override(self, path: &str, json_type: JsonArray) -> Self { + pub fn add_json_type_override

(self, path: P, json_type: JsonArray) -> Self + where + P: Into + { let mut conf = self; - let path = if path.starts_with("/") { - path.to_owned() - } else { - ["/", path].concat() - }; - conf.json_type_overrides.insert(path, json_type); + + match path.into() { + PathMatcher::Absolute(path) => { + conf.json_type_overrides.insert(path, json_type); + } + #[cfg(feature = "regex_path")] + PathMatcher::Regex(regex) => { + conf.json_regex_type_overrides.push(( + regex, + json_type + )); + } + } + conf } } @@ -386,20 +451,47 @@ pub fn xml_string_to_json(xml: String, config: &Config) -> Result /// in the list of paths with custom config. #[cfg(feature = "json_types")] #[inline] -fn get_json_type(config: &Config, path: &String) -> (bool, JsonType) { +fn get_json_type_with_absolute_path<'conf>(config: &'conf Config, path: &String) -> (bool, &'conf JsonType) { match config - .json_type_overrides - .get(path) - .unwrap_or(&JsonArray::Infer(JsonType::Infer)) + .json_type_overrides + .get(path) + .unwrap_or(&JsonArray::Infer(JsonType::Infer)) { - JsonArray::Infer(v) => (false, v.clone()), - JsonArray::Always(v) => (true, v.clone()), + JsonArray::Infer(v) => (false, v), + JsonArray::Always(v) => (true, v), + } +} + +/// Simply returns `get_json_type_with_absolute_path` if `regex_path` feature is disabled. +#[cfg(feature = "json_types")] +#[cfg(not(feature = "regex_path"))] +#[inline] +fn get_json_type<'conf>(config: &'conf Config, path: &String) -> (bool, &'conf JsonType) { + get_json_type_with_absolute_path(config, path) +} + +/// Returns a tuple for Array and Value enforcements for the current node. Searches both absolute paths +/// and regex paths, giving precedence to regex paths. Returns `(false, JsonArray::Infer(JsonType::Infer)` +/// if the current path is not found in the list of paths with custom config. +#[cfg(feature = "json_types")] +#[cfg(feature = "regex_path")] +#[inline] +fn get_json_type<'conf>(config: &'conf Config, path: &String) -> (bool, &'conf JsonType) { + for (regex, json_array) in &config.json_regex_type_overrides { + if regex.is_match(path) { + return match json_array { + JsonArray::Infer(v) => (false, v), + JsonArray::Always(v) => (true, v), + }; + } } + + get_json_type_with_absolute_path(config, path) } /// Always returns `(false, JsonArray::Infer(JsonType::Infer)` if `json_types` feature is not enabled. #[cfg(not(feature = "json_types"))] #[inline] -fn get_json_type(_config: &Config, _path: &String) -> (bool, JsonType) { - (false, JsonType::Infer) +fn get_json_type<'conf>(_config: &'conf Config, _path: &String) -> (bool, &'conf JsonType) { + (false, &JsonType::Infer) } diff --git a/src/tests.rs b/src/tests.rs index 3476455..e48e7af 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -391,3 +391,74 @@ fn test_xml_str_to_json() { assert_eq!(expected, result.unwrap()); } + +#[cfg(feature = "regex_path")] +#[test] +fn test_regex_json_type_overrides() { + use regex::Regex; + + // test a non-array with array enforcement (as object). + let xml = r#"1"#; + let expected = json!({ + "a": { + "@attr1":"att1", + "b": [{ "@c":"att", "#text":1 }] + } + }); + + let config = Config::new_with_defaults() + .add_json_type_override( + Regex::new(r"\w/b").unwrap(), + JsonArray::Always(JsonType::Infer + ) + ); + + let result = xml_string_to_json(String::from(xml), &config); + assert_eq!(expected, result.unwrap()); + + // test a multiple elements of the same tag nested in different elements + let xml = r#" + + + + + + + + + + + "#; + + let expected = json!({ + "a": { + "@attr1": "att1", + "element": [ + { "@name": "el1" }, + { "@name": "el2" } + ], + "b": { + "@attr2": "att2", + "element": [ + { "@name": "el3" } + ], + "c": { + "@attr3": "att3", + "element": [ + { "@name": "el4" } + ] + } + }, + } + }); + + let config = Config::new_with_defaults() + .add_json_type_override( + Regex::new(r"element").unwrap(), + JsonArray::Always(JsonType::Infer) + ); + + let result = xml_string_to_json(String::from(xml), &config); + assert_eq!(expected, result.unwrap()); + +} \ No newline at end of file