Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cdata sections #59

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 26 additions & 14 deletions c_src/exml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ namespace {
ERL_NIF_TERM atom_xmlstreamstart;
ERL_NIF_TERM atom_xmlstreamend;
ERL_NIF_TERM atom_pretty;
ERL_NIF_TERM atom_node_data;
ERL_NIF_TERM atom_node_cdata;
ERL_NIF_TERM atom_true;
constexpr const unsigned char EMPTY[1] = {0};

Expand Down Expand Up @@ -290,15 +292,15 @@ ERL_NIF_TERM make_xmlel(ParseCtx &ctx,
}

bool build_children(ErlNifEnv *env, xml_document &doc, ERL_NIF_TERM children,
rapidxml::xml_node<unsigned char> &node);
rapidxml::xml_node<unsigned char> &node, rapidxml::node_type cdata_type);

bool build_cdata(ErlNifEnv *env, xml_document &doc, const ERL_NIF_TERM elem[],
rapidxml::xml_node<unsigned char> &node) {
rapidxml::xml_node<unsigned char> &node, rapidxml::node_type cdata_type) {
ErlNifBinary bin;
if (!enif_inspect_iolist_as_binary(env, elem[1], &bin))
return false;

auto child = doc.impl.allocate_node(rapidxml::node_data);
auto child = doc.impl.allocate_node(cdata_type);
child->value(bin.size > 0 ? bin.data : EMPTY, bin.size);
node.append_node(child);
return true;
Expand Down Expand Up @@ -333,7 +335,7 @@ bool build_attrs(ErlNifEnv *env, xml_document &doc, ERL_NIF_TERM attrs,
}

bool build_el(ErlNifEnv *env, xml_document &doc, const ERL_NIF_TERM elem[],
rapidxml::xml_node<unsigned char> &node) {
rapidxml::xml_node<unsigned char> &node, rapidxml::node_type cdata_type) {
ErlNifBinary name;
if (!enif_inspect_iolist_as_binary(env, elem[1], &name))
return false;
Expand All @@ -344,24 +346,24 @@ bool build_el(ErlNifEnv *env, xml_document &doc, const ERL_NIF_TERM elem[],

if (!build_attrs(env, doc, elem[2], *child))
return false;
if (!build_children(env, doc, elem[3], *child))
if (!build_children(env, doc, elem[3], *child, cdata_type))
return false;

return true;
}

bool build_child(ErlNifEnv *env, xml_document &doc, ERL_NIF_TERM child,
rapidxml::xml_node<unsigned char> &node) {
rapidxml::xml_node<unsigned char> &node, rapidxml::node_type cdata_type) {
int arity;
const ERL_NIF_TERM *tuple;
if (!enif_get_tuple(env, child, &arity, &tuple))
return false;

if (arity == 2 && enif_compare(atom_xmlcdata, tuple[0]) == 0) {
if (!build_cdata(env, doc, tuple, node))
if (!build_cdata(env, doc, tuple, node, cdata_type))
return false;
} else if (arity == 4 && enif_compare(atom_xmlel, tuple[0]) == 0) {
if (!build_el(env, doc, tuple, node))
if (!build_el(env, doc, tuple, node, cdata_type))
return false;
} else {
return false;
Expand All @@ -371,14 +373,14 @@ bool build_child(ErlNifEnv *env, xml_document &doc, ERL_NIF_TERM child,
}

bool build_children(ErlNifEnv *env, xml_document &doc, ERL_NIF_TERM children,
rapidxml::xml_node<unsigned char> &node) {
rapidxml::xml_node<unsigned char> &node, rapidxml::node_type cdata_type) {

if (!enif_is_list(env, children))
return false;

for (ERL_NIF_TERM head;
enif_get_list_cell(env, children, &head, &children);) {
if (!build_child(env, doc, head, node))
if (!build_child(env, doc, head, node, cdata_type))
return false;
}

Expand Down Expand Up @@ -442,6 +444,8 @@ static int load(ErlNifEnv *env, void **priv_data, ERL_NIF_TERM load_info) {
atom_xmlstreamstart = enif_make_atom(env, "xmlstreamstart");
atom_xmlstreamend = enif_make_atom(env, "xmlstreamend");
atom_pretty = enif_make_atom(env, "pretty");
atom_node_data = enif_make_atom(env, "node_data");
atom_node_cdata = enif_make_atom(env, "node_cdata");
atom_true = enif_make_atom(env, "true");

get_static_doc().impl.set_allocator(enif_alloc, enif_free);
Expand Down Expand Up @@ -582,7 +586,11 @@ static ERL_NIF_TERM escape_cdata(ErlNifEnv *env, int argc,
if (!enif_inspect_iolist_as_binary(env, argv[0], &bin))
return enif_make_badarg(env);

rapidxml::xml_node<unsigned char> node(rapidxml::node_data);
auto cdata_type = rapidxml::node_data;
if (enif_compare(atom_node_cdata, argv[1]) == 0)
cdata_type = rapidxml::node_cdata;

rapidxml::xml_node<unsigned char> node(cdata_type);
node.value(bin.data, bin.size);
return node_to_binary(env, node, rapidxml::print_no_indenting);
}
Expand All @@ -601,8 +609,12 @@ static ERL_NIF_TERM to_binary(ErlNifEnv *env, int argc,
if (enif_compare(atom_pretty, argv[1]) == 0)
flags = 0;

auto cdata_type = rapidxml::node_data;
if (enif_compare(atom_node_cdata, argv[2]) == 0)
cdata_type = rapidxml::node_cdata;

xml_document &doc = get_static_doc();
if (!build_el(env, doc, xmlel, doc.impl))
if (!build_el(env, doc, xmlel, doc.impl, cdata_type))
return enif_make_badarg(env);

return node_to_binary(env, doc.impl, flags);
Expand All @@ -621,8 +633,8 @@ static ERL_NIF_TERM reset_parser(ErlNifEnv *env, int argc,

static ErlNifFunc nif_funcs[] = {
{"create", 2, create}, {"parse", 1, parse},
{"parse_next", 2, parse_next}, {"escape_cdata", 1, escape_cdata},
{"to_binary", 2, to_binary}, {"reset_parser", 1, reset_parser}};
{"parse_next", 2, parse_next}, {"escape_cdata", 2, escape_cdata},
{"to_binary", 3, to_binary}, {"reset_parser", 1, reset_parser}};
}

ERL_NIF_INIT(exml_nif, nif_funcs, &load, nullptr, nullptr, &unload)
64 changes: 41 additions & 23 deletions src/exml.erl
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,32 @@
-export([to_list/1,
to_binary/1,
to_iolist/1,
to_iolist/3,
xml_size/1,
xml_sort/1,
to_pretty_iolist/1]).

-export_type([attr/0,
cdata/0,
element/0,
item/0]).
item/0,
prettify/0,
cdata_escape/0]).

-type attr() :: {binary(), binary()}.
-type cdata() :: #xmlcdata{}.
-type element() :: #xmlel{}.
-type item() :: element() | attr() | cdata() | exml_stream:start() | exml_stream:stop().
-type prettify() :: pretty | not_pretty.
-type cdata_escape() :: node_data | node_cdata.

-spec xml_size(item() | [item()]) -> non_neg_integer().
xml_size([]) ->
0;
xml_size([Elem | Rest]) ->
xml_size(Elem) + xml_size(Rest);
xml_size(#xmlcdata{ content = Content }) ->
iolist_size(exml_nif:escape_cdata(Content));
iolist_size(exml_nif:escape_cdata(Content, node_data));
xml_size(#xmlel{ name = Name, attrs = Attrs, children = [] }) ->
3 % Self-closing: </>
+ byte_size(Name) + xml_size(Attrs);
Expand Down Expand Up @@ -78,55 +83,68 @@ xml_sort(#xmlstreamend{} = StreamEnd) ->
xml_sort(Elements) when is_list(Elements) ->
lists:sort([ xml_sort(E) || E <- Elements ]).

%% @equiv erlang:binary_to_list(to_binary(Element))
-spec to_list(element() | [exml_stream:element()]) -> string().
to_list(Element) ->
binary_to_list(to_binary(Element)).

%% @equiv erlang:iolist_to_binary(to_iolist(Element, pretty, node_data))
-spec to_binary(element() | [exml_stream:element()]) -> binary().
to_binary(Element) ->
iolist_to_binary(to_iolist(Element, not_pretty)).
iolist_to_binary(to_iolist(Element, not_pretty, node_data)).

-spec to_iolist(element() | [exml_stream:element()]) -> binary().
%% @equiv to_iolist(Element, not_pretty, node_data)
-spec to_iolist(element() | [exml_stream:element()]) -> iodata().
to_iolist(Element) ->
iolist_to_binary(to_iolist(Element, not_pretty)).
to_iolist(Element, not_pretty, node_data).

-spec to_pretty_iolist(element() | [exml_stream:element()]) -> binary().
%% @equiv to_iolist(Element, pretty, node_data)
-spec to_pretty_iolist(element() | [exml_stream:element()]) -> iodata().
to_pretty_iolist(Element) ->
iolist_to_binary(to_iolist(Element, pretty)).
to_iolist(Element, pretty, node_data).

-spec parse(binary() | [binary()]) -> {ok, exml:element()} | {error, any()}.
parse(XML) ->
exml_nif:parse(XML).

-spec to_iolist(element() | [exml_stream:element()], pretty | term()) -> iolist().
to_iolist(#xmlel{} = Element, Pretty) ->
to_binary_nif(Element, Pretty);
to_iolist([Element], Pretty) ->
to_iolist(Element, Pretty);
to_iolist([Head | _] = Elements, Pretty) ->
%% @doc Turn a –list of– exml element into iodata for IO interactions.
%%
%% The `Pretty' argument indicates if the generated XML should have new lines and indentation,
%% which is useful for the debugging eye, or should rather be a minified version,
%% which is better for IO.
%%
%% The `CDataEscape' argument indicates how to escape contents in the XML payload, as regular data
%% that would escape character by character, or using a `<![CDATA[]]>' section.
-spec to_iolist(exml_stream:element() | [exml_stream:element()], prettify(), cdata_escape()) ->
iodata().
to_iolist(#xmlel{} = Element, Pretty, CDataEscape) ->
to_binary_nif(Element, Pretty, CDataEscape);
to_iolist([Element], Pretty, CDataEscape) ->
to_iolist(Element, Pretty, CDataEscape);
to_iolist([Head | _] = Elements, Pretty, CDataEscape) ->
[Last | RevChildren] = lists:reverse(tl(Elements)),
case {Head, Last} of
{#xmlstreamstart{name = Name, attrs = Attrs},
#xmlstreamend{name = Name}} ->
Element = #xmlel{name = Name, attrs = Attrs,
children = lists:reverse(RevChildren)},
to_binary_nif(Element, Pretty);
to_binary_nif(Element, Pretty, CDataEscape);
_ ->
[to_iolist(El, Pretty) || El <- Elements]
[to_iolist(El, Pretty, CDataEscape) || El <- Elements]
end;
to_iolist(#xmlstreamstart{name = Name, attrs = Attrs}, _Pretty) ->
Result = to_binary_nif(#xmlel{name = Name, attrs = Attrs}, not_pretty),
to_iolist(#xmlstreamstart{name = Name, attrs = Attrs}, _Pretty, _CDataEscape) ->
Result = to_binary_nif(#xmlel{name = Name, attrs = Attrs}, not_pretty, node_data),
FrontSize = byte_size(Result) - 2,
<<Front:FrontSize/binary, "/>">> = Result,
[Front, $>];
to_iolist(#xmlstreamend{name = Name}, _Pretty) ->
to_iolist(#xmlstreamend{name = Name}, _Pretty, _CDataEscape) ->
[<<"</">>, Name, <<">">>];
to_iolist(#xmlcdata{content = Content}, _Pretty) ->
exml_nif:escape_cdata(Content).
to_iolist(#xmlcdata{content = Content}, _Pretty, CDataEscape) ->
exml_nif:escape_cdata(Content, CDataEscape).

-spec to_binary_nif(element(), pretty | term()) -> binary().
to_binary_nif(#xmlel{} = Element, Pretty) ->
case catch exml_nif:to_binary(Element, Pretty) of
-spec to_binary_nif(element(), prettify(), cdata_escape()) -> binary().
to_binary_nif(#xmlel{} = Element, Pretty, CDataEscape) ->
case catch exml_nif:to_binary(Element, Pretty, CDataEscape) of
{'EXIT', Reason} -> erlang:error({badxml, Element, Reason});
Result when is_binary(Result) -> Result
end.
15 changes: 6 additions & 9 deletions src/exml_nif.erl
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,11 @@

-module(exml_nif).

-include("exml.hrl").
-include("exml_stream.hrl").

-type parser() :: term().
-type stream_element() :: exml:element() | exml_stream:start() | exml_stream:stop().

-export([create/2, parse/1, parse_next/2, escape_cdata/1,
to_binary/2, reset_parser/1]).
-export([create/2, parse/1, parse_next/2, escape_cdata/2,
to_binary/3, reset_parser/1]).
-export_type([parser/0, stream_element/0]).

-on_load(load/0).
Expand All @@ -39,12 +36,12 @@ load() ->
create(_, _) ->
erlang:nif_error(not_loaded).

-spec escape_cdata(Bin :: iodata()) -> binary().
escape_cdata(_Bin) ->
-spec escape_cdata(Bin :: iodata(), exml:cdata_escape()) -> binary().
escape_cdata(_Bin, _Opt) ->
erlang:nif_error(not_loaded).

-spec to_binary(Elem :: exml:element(), pretty | not_pretty) -> binary().
to_binary(_Elem, _Pretty) ->
-spec to_binary(Elem :: exml:element(), exml:prettify(), exml:cdata_escape()) -> binary().
to_binary(_Elem, _Pretty, _CData) ->
erlang:nif_error(not_loaded).

-spec parse(Bin :: binary() | [binary()]) -> {ok, exml:element()} | {error, Reason :: any()}.
Expand Down
5 changes: 5 additions & 0 deletions test/exml_tests.erl
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ size_of_escaped_characters_test() ->
Raw = <<"<a>&amp;</a>">>,
?assertEqual(iolist_size(Raw), exml:xml_size(parse(Raw))).

to_binary_with_cdata_test() ->
Raw = <<"<a><![CDATA[ Within this Character Data block I can ",
"use double dashes as much as I want (along with <, &, ', and \")]]></a>">>,
?assertEqual(Raw, exml:to_iolist(parse(Raw), not_pretty, node_cdata)).

size_of_exml_with_cdata_test() ->
Raw = <<"<a><![CDATA[ Within this Character Data block I can
use double dashes as much as I want (along with <, &, ', and \")]]></a>">>,
Expand Down