From 0f655b5c2574482c398094bf1d61fc6d906c6091 Mon Sep 17 00:00:00 2001 From: Michael Drake Date: Fri, 10 Apr 2020 20:30:28 +0100 Subject: [PATCH] Avoid recursion in the document loader. (#127) The document loading API (yaml_parser_load) was susseptable to a stack overflow issue when given input data which opened many mappings and/or sequences without closing them. This was due to the use of recurion in the implementation. With this change, we avoid recursion, and maintain our own loader context stack on the heap. The loader context contains a stack of document node indexes. Each time a sequence or mapping start event is encountered, the node index corrasponding to the event is pushed to the stack. Each time a sequence or mapping end event is encountered, the corrasponding node's index is popped from the stack. The yaml_parser_load_nodes() function sits on the event stream, issuing events to the appropriate handlers by type. When an event handler function constructs a node, it needs to connect the new node to its parent (unless it's the root node). This is where the loader context stack is used to find the parent node. The way that the new node is added to the tree depends on whether the parent node is a mapping (with a yaml_node_pair_t to fill), or a sequence (with a yaml_node_item_t). Fixes: https://github.com/yaml/libyaml/issues/107 --- src/loader.c | 306 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 203 insertions(+), 103 deletions(-) diff --git a/src/loader.c b/src/loader.c index db8501ac..dea8ac42 100644 --- a/src/loader.c +++ b/src/loader.c @@ -37,27 +37,47 @@ yaml_parser_register_anchor(yaml_parser_t *parser, static void yaml_parser_delete_aliases(yaml_parser_t *parser); +/* + * Document loading context. + */ +struct loader_ctx { + int *start; + int *end; + int *top; +}; + /* * Composer functions. */ +static int +yaml_parser_load_nodes(yaml_parser_t *parser, struct loader_ctx *ctx); + +static int +yaml_parser_load_document(yaml_parser_t *parser, yaml_event_t *event); static int -yaml_parser_load_document(yaml_parser_t *parser, yaml_event_t *first_event); +yaml_parser_load_alias(yaml_parser_t *parser, yaml_event_t *event, + struct loader_ctx *ctx); static int -yaml_parser_load_node(yaml_parser_t *parser, yaml_event_t *first_event); +yaml_parser_load_scalar(yaml_parser_t *parser, yaml_event_t *event, + struct loader_ctx *ctx); static int -yaml_parser_load_alias(yaml_parser_t *parser, yaml_event_t *first_event); +yaml_parser_load_sequence(yaml_parser_t *parser, yaml_event_t *event, + struct loader_ctx *ctx); static int -yaml_parser_load_scalar(yaml_parser_t *parser, yaml_event_t *first_event); +yaml_parser_load_mapping(yaml_parser_t *parser, yaml_event_t *event, + struct loader_ctx *ctx); static int -yaml_parser_load_sequence(yaml_parser_t *parser, yaml_event_t *first_event); +yaml_parser_load_sequence_end(yaml_parser_t *parser, yaml_event_t *event, + struct loader_ctx *ctx); static int -yaml_parser_load_mapping(yaml_parser_t *parser, yaml_event_t *first_event); +yaml_parser_load_mapping_end(yaml_parser_t *parser, yaml_event_t *event, + struct loader_ctx *ctx); /* * Load the next document of the stream. @@ -162,59 +182,78 @@ yaml_parser_delete_aliases(yaml_parser_t *parser) */ static int -yaml_parser_load_document(yaml_parser_t *parser, yaml_event_t *first_event) +yaml_parser_load_document(yaml_parser_t *parser, yaml_event_t *event) { - yaml_event_t event; + struct loader_ctx ctx = { NULL, NULL, NULL }; - assert(first_event->type == YAML_DOCUMENT_START_EVENT); + assert(event->type == YAML_DOCUMENT_START_EVENT); /* DOCUMENT-START is expected. */ parser->document->version_directive - = first_event->data.document_start.version_directive; + = event->data.document_start.version_directive; parser->document->tag_directives.start - = first_event->data.document_start.tag_directives.start; + = event->data.document_start.tag_directives.start; parser->document->tag_directives.end - = first_event->data.document_start.tag_directives.end; + = event->data.document_start.tag_directives.end; parser->document->start_implicit - = first_event->data.document_start.implicit; - parser->document->start_mark = first_event->start_mark; - - if (!yaml_parser_parse(parser, &event)) return 0; - - if (!yaml_parser_load_node(parser, &event)) return 0; - - if (!yaml_parser_parse(parser, &event)) return 0; - assert(event.type == YAML_DOCUMENT_END_EVENT); - /* DOCUMENT-END is expected. */ + = event->data.document_start.implicit; + parser->document->start_mark = event->start_mark; - parser->document->end_implicit = event.data.document_end.implicit; - parser->document->end_mark = event.end_mark; + if (!STACK_INIT(parser, ctx, int*)) return 0; + if (!yaml_parser_load_nodes(parser, &ctx)) { + STACK_DEL(parser, ctx); + return 0; + } + STACK_DEL(parser, ctx); return 1; } /* - * Compose a node. + * Compose a node tree. */ static int -yaml_parser_load_node(yaml_parser_t *parser, yaml_event_t *first_event) +yaml_parser_load_nodes(yaml_parser_t *parser, struct loader_ctx *ctx) { - switch (first_event->type) { - case YAML_ALIAS_EVENT: - return yaml_parser_load_alias(parser, first_event); - case YAML_SCALAR_EVENT: - return yaml_parser_load_scalar(parser, first_event); - case YAML_SEQUENCE_START_EVENT: - return yaml_parser_load_sequence(parser, first_event); - case YAML_MAPPING_START_EVENT: - return yaml_parser_load_mapping(parser, first_event); - default: - assert(0); /* Could not happen. */ - return 0; - } + yaml_event_t event; - return 0; + do { + if (!yaml_parser_parse(parser, &event)) return 0; + + switch (event.type) { + case YAML_ALIAS_EVENT: + if (!yaml_parser_load_alias(parser, &event, ctx)) return 0; + break; + case YAML_SCALAR_EVENT: + if (!yaml_parser_load_scalar(parser, &event, ctx)) return 0; + break; + case YAML_SEQUENCE_START_EVENT: + if (!yaml_parser_load_sequence(parser, &event, ctx)) return 0; + break; + case YAML_SEQUENCE_END_EVENT: + if (!yaml_parser_load_sequence_end(parser, &event, ctx)) + return 0; + break; + case YAML_MAPPING_START_EVENT: + if (!yaml_parser_load_mapping(parser, &event, ctx)) return 0; + break; + case YAML_MAPPING_END_EVENT: + if (!yaml_parser_load_mapping_end(parser, &event, ctx)) + return 0; + break; + default: + assert(0); /* Could not happen. */ + return 0; + case YAML_DOCUMENT_END_EVENT: + break; + } + } while (event.type != YAML_DOCUMENT_END_EVENT); + + parser->document->end_implicit = event.data.document_end.implicit; + parser->document->end_mark = event.end_mark; + + return 1; } /* @@ -252,27 +291,80 @@ yaml_parser_register_anchor(yaml_parser_t *parser, return 1; } +/* + * Compose node into its parent in the stree. + */ + +static int +yaml_parser_load_node_add(yaml_parser_t *parser, struct loader_ctx *ctx, + int index) +{ + struct yaml_node_s *parent; + int parent_index; + + if (STACK_EMPTY(parser, *ctx)) { + /* This is the root node, there's no tree to add it to. */ + return 1; + } + + parent_index = *((*ctx).top - 1); + parent = &parser->document->nodes.start[parent_index-1]; + + switch (parent->type) { + case YAML_SEQUENCE_NODE: + if (!STACK_LIMIT(parser, parent->data.sequence.items, INT_MAX-1)) + return 0; + if (!PUSH(parser, parent->data.sequence.items, index)) + return 0; + break; + case YAML_MAPPING_NODE: { + yaml_node_pair_t pair; + if (!STACK_EMPTY(parser, parent->data.mapping.pairs)) { + yaml_node_pair_t *p = parent->data.mapping.pairs.top - 1; + if (p->key != 0 && p->value == 0) { + p->value = index; + break; + } + } + + pair.key = index; + pair.value = 0; + if (!STACK_LIMIT(parser, parent->data.mapping.pairs, INT_MAX-1)) + return 0; + if (!PUSH(parser, parent->data.mapping.pairs, pair)) + return 0; + + break; + } + default: + assert(0); /* Could not happen. */ + return 0; + } + return 1; +} + /* * Compose a node corresponding to an alias. */ static int -yaml_parser_load_alias(yaml_parser_t *parser, yaml_event_t *first_event) +yaml_parser_load_alias(yaml_parser_t *parser, yaml_event_t *event, + struct loader_ctx *ctx) { - yaml_char_t *anchor = first_event->data.alias.anchor; + yaml_char_t *anchor = event->data.alias.anchor; yaml_alias_data_t *alias_data; for (alias_data = parser->aliases.start; alias_data != parser->aliases.top; alias_data ++) { if (strcmp((char *)alias_data->anchor, (char *)anchor) == 0) { yaml_free(anchor); - return alias_data->index; + return yaml_parser_load_node_add(parser, ctx, alias_data->index); } } yaml_free(anchor); return yaml_parser_set_composer_error(parser, "found undefined alias", - first_event->start_mark); + event->start_mark); } /* @@ -280,11 +372,12 @@ yaml_parser_load_alias(yaml_parser_t *parser, yaml_event_t *first_event) */ static int -yaml_parser_load_scalar(yaml_parser_t *parser, yaml_event_t *first_event) +yaml_parser_load_scalar(yaml_parser_t *parser, yaml_event_t *event, + struct loader_ctx *ctx) { yaml_node_t node; int index; - yaml_char_t *tag = first_event->data.scalar.tag; + yaml_char_t *tag = event->data.scalar.tag; if (!STACK_LIMIT(parser, parser->document->nodes, INT_MAX-1)) goto error; @@ -294,23 +387,23 @@ yaml_parser_load_scalar(yaml_parser_t *parser, yaml_event_t *first_event) if (!tag) goto error; } - SCALAR_NODE_INIT(node, tag, first_event->data.scalar.value, - first_event->data.scalar.length, first_event->data.scalar.style, - first_event->start_mark, first_event->end_mark); + SCALAR_NODE_INIT(node, tag, event->data.scalar.value, + event->data.scalar.length, event->data.scalar.style, + event->start_mark, event->end_mark); if (!PUSH(parser, parser->document->nodes, node)) goto error; index = parser->document->nodes.top - parser->document->nodes.start; if (!yaml_parser_register_anchor(parser, index, - first_event->data.scalar.anchor)) return 0; + event->data.scalar.anchor)) return 0; - return index; + return yaml_parser_load_node_add(parser, ctx, index); error: yaml_free(tag); - yaml_free(first_event->data.scalar.anchor); - yaml_free(first_event->data.scalar.value); + yaml_free(event->data.scalar.anchor); + yaml_free(event->data.scalar.value); return 0; } @@ -319,17 +412,17 @@ yaml_parser_load_scalar(yaml_parser_t *parser, yaml_event_t *first_event) */ static int -yaml_parser_load_sequence(yaml_parser_t *parser, yaml_event_t *first_event) +yaml_parser_load_sequence(yaml_parser_t *parser, yaml_event_t *event, + struct loader_ctx *ctx) { - yaml_event_t event; yaml_node_t node; struct { yaml_node_item_t *start; yaml_node_item_t *end; yaml_node_item_t *top; } items = { NULL, NULL, NULL }; - int index, item_index; - yaml_char_t *tag = first_event->data.sequence_start.tag; + int index; + yaml_char_t *tag = event->data.sequence_start.tag; if (!STACK_LIMIT(parser, parser->document->nodes, INT_MAX-1)) goto error; @@ -342,48 +435,54 @@ yaml_parser_load_sequence(yaml_parser_t *parser, yaml_event_t *first_event) if (!STACK_INIT(parser, items, yaml_node_item_t*)) goto error; SEQUENCE_NODE_INIT(node, tag, items.start, items.end, - first_event->data.sequence_start.style, - first_event->start_mark, first_event->end_mark); + event->data.sequence_start.style, + event->start_mark, event->end_mark); if (!PUSH(parser, parser->document->nodes, node)) goto error; index = parser->document->nodes.top - parser->document->nodes.start; if (!yaml_parser_register_anchor(parser, index, - first_event->data.sequence_start.anchor)) return 0; - - if (!yaml_parser_parse(parser, &event)) return 0; - - while (event.type != YAML_SEQUENCE_END_EVENT) { - if (!STACK_LIMIT(parser, - parser->document->nodes.start[index-1].data.sequence.items, - INT_MAX-1)) return 0; - item_index = yaml_parser_load_node(parser, &event); - if (!item_index) return 0; - if (!PUSH(parser, - parser->document->nodes.start[index-1].data.sequence.items, - item_index)) return 0; - if (!yaml_parser_parse(parser, &event)) return 0; - } + event->data.sequence_start.anchor)) return 0; + + if (!yaml_parser_load_node_add(parser, ctx, index)) return 0; - parser->document->nodes.start[index-1].end_mark = event.end_mark; + if (!STACK_LIMIT(parser, *ctx, INT_MAX-1)) return 0; + if (!PUSH(parser, *ctx, index)) return 0; - return index; + return 1; error: yaml_free(tag); - yaml_free(first_event->data.sequence_start.anchor); + yaml_free(event->data.sequence_start.anchor); return 0; } +static int +yaml_parser_load_sequence_end(yaml_parser_t *parser, yaml_event_t *event, + struct loader_ctx *ctx) +{ + int index; + + assert(((*ctx).top - (*ctx).start) > 0); + + index = *((*ctx).top - 1); + assert(parser->document->nodes.start[index-1].type == YAML_SEQUENCE_NODE); + parser->document->nodes.start[index-1].end_mark = event->end_mark; + + (void)POP(parser, *ctx); + + return 1; +} + /* * Compose a mapping node. */ static int -yaml_parser_load_mapping(yaml_parser_t *parser, yaml_event_t *first_event) +yaml_parser_load_mapping(yaml_parser_t *parser, yaml_event_t *event, + struct loader_ctx *ctx) { - yaml_event_t event; yaml_node_t node; struct { yaml_node_pair_t *start; @@ -391,8 +490,7 @@ yaml_parser_load_mapping(yaml_parser_t *parser, yaml_event_t *first_event) yaml_node_pair_t *top; } pairs = { NULL, NULL, NULL }; int index; - yaml_node_pair_t pair; - yaml_char_t *tag = first_event->data.mapping_start.tag; + yaml_char_t *tag = event->data.mapping_start.tag; if (!STACK_LIMIT(parser, parser->document->nodes, INT_MAX-1)) goto error; @@ -405,40 +503,42 @@ yaml_parser_load_mapping(yaml_parser_t *parser, yaml_event_t *first_event) if (!STACK_INIT(parser, pairs, yaml_node_pair_t*)) goto error; MAPPING_NODE_INIT(node, tag, pairs.start, pairs.end, - first_event->data.mapping_start.style, - first_event->start_mark, first_event->end_mark); + event->data.mapping_start.style, + event->start_mark, event->end_mark); if (!PUSH(parser, parser->document->nodes, node)) goto error; index = parser->document->nodes.top - parser->document->nodes.start; if (!yaml_parser_register_anchor(parser, index, - first_event->data.mapping_start.anchor)) return 0; + event->data.mapping_start.anchor)) return 0; - if (!yaml_parser_parse(parser, &event)) return 0; - - while (event.type != YAML_MAPPING_END_EVENT) { - if (!STACK_LIMIT(parser, - parser->document->nodes.start[index-1].data.mapping.pairs, - INT_MAX-1)) return 0; - pair.key = yaml_parser_load_node(parser, &event); - if (!pair.key) return 0; - if (!yaml_parser_parse(parser, &event)) return 0; - pair.value = yaml_parser_load_node(parser, &event); - if (!pair.value) return 0; - if (!PUSH(parser, - parser->document->nodes.start[index-1].data.mapping.pairs, - pair)) return 0; - if (!yaml_parser_parse(parser, &event)) return 0; - } + if (!yaml_parser_load_node_add(parser, ctx, index)) return 0; - parser->document->nodes.start[index-1].end_mark = event.end_mark; + if (!STACK_LIMIT(parser, *ctx, INT_MAX-1)) return 0; + if (!PUSH(parser, *ctx, index)) return 0; - return index; + return 1; error: yaml_free(tag); - yaml_free(first_event->data.mapping_start.anchor); + yaml_free(event->data.mapping_start.anchor); return 0; } +static int +yaml_parser_load_mapping_end(yaml_parser_t *parser, yaml_event_t *event, + struct loader_ctx *ctx) +{ + int index; + + assert(((*ctx).top - (*ctx).start) > 0); + + index = *((*ctx).top - 1); + assert(parser->document->nodes.start[index-1].type == YAML_MAPPING_NODE); + parser->document->nodes.start[index-1].end_mark = event->end_mark; + + (void)POP(parser, *ctx); + + return 1; +} \ No newline at end of file