Skip to content

Commit

Permalink
utils: Import fluent-bit's string split functionalities
Browse files Browse the repository at this point in the history
Signed-off-by: Hiroshi Hatake <[email protected]>
  • Loading branch information
cosmo0920 committed Jul 31, 2024
1 parent 6ea1f3c commit 781037e
Show file tree
Hide file tree
Showing 4 changed files with 305 additions and 0 deletions.
1 change: 1 addition & 0 deletions include/cfl/cfl.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include <cfl/cfl_time.h>
#include <cfl/cfl_variant.h>
#include <cfl/cfl_object.h>
#include <cfl/cfl_utils.h>

int cfl_init();
char *cfl_version();
Expand Down
18 changes: 18 additions & 0 deletions include/cfl/cfl_utils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#ifndef CFL_UTILS_H
#define CFL_UTILS_H

#include <cfl/cfl_sds.h>

struct cfl_split_entry {
char *value;
int len;
off_t last_pos;
struct cfl_list _head;
};

struct cfl_list *cfl_utils_split_quoted(const char *line, int separator, int max_split);
struct cfl_list *cfl_utils_split(const char *line, int separator, int max_split);
void cfl_utils_split_free_entry(struct cfl_split_entry *entry);
void cfl_utils_split_free(struct cfl_list *list);

#endif
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ set(src
cfl_array.c
cfl_variant.c
cfl_checksum.c
cfl_utils.c
)

# Static Library
Expand Down
285 changes: 285 additions & 0 deletions src/cfl_utils.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,285 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */

/* CFL
* ===
* Copyright (C) 2022 The CFL Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cfl/cfl.h>

/* Lookup char into string, return position
* Based on monkey/monkey's mk_string_char_search.
*/
static int cfl_string_char_search(const char *string, int c, int len)
{
char *p;

if (len < 0) {
len = strlen(string);
}

p = memchr(string, c, len);
if (p) {
return (p - string);
}

return -1;
}

/* Return a buffer with a new string from string.
* Based on monkey/monkey's mk_string_copy_substr.
*/
static char *cfl_string_copy_substr(const char *string, int pos_init, int pos_end)
{
unsigned int size, bytes;
char *buffer = 0;

if (pos_init > pos_end) {
return NULL;
}

size = (unsigned int) (pos_end - pos_init) + 1;
if (size <= 2) {
size = 4;
}

buffer = calloc(1, size);

if (!buffer) {
return NULL;
}

bytes = pos_end - pos_init;
memcpy(buffer, string + pos_init, bytes);
buffer[bytes] = '\0';

return (char *) buffer;
}

/*
* quoted_string_len returns the length of a quoted string, not including the quotes.
*/
static int quoted_string_len(const char *str)
{
int len = 0;
char quote = *str++; /* Consume the quote character. */

while (quote != 0) {
char c = *str++;
switch (c) {
case '\0':
/* Error: string ends before end-quote was seen. */
return -1;
case '\\':
/* Skip escaped quote or \\. */
if (*str == quote || *str == '\\') {
str++;
}
break;
case '\'':
case '"':
/* End-quote seen: stop iterating. */
if (c == quote) {
quote = 0;
}
break;
default:
break;
}
len++;
}

/* Go back one character to ignore end-quote */
len--;

return len;
}

/*
* next_token returns the next token in the string 'str' delimited by 'separator'.
* 'out' is set to the beginning of the token.
* 'out_len' is set to the length of the token.
* 'parse_quotes' is set to CFL_TRUE when quotes shall be considered when tokenizing the 'str'.
* The function returns offset to next token in the string.
*/
static int next_token(const char *str, int separator, char **out, int *out_len, int parse_quotes) {
const char *token_in = str;
char *token_out;
int next_separator = 0;
int quote = 0; /* Parser state: 0 not inside quoted string, or '"' or '\'' when inside quoted string. */
int len = 0;
int i;

/* Skip leading separators. */
while (*token_in == separator) {
token_in++;
}

/* Should quotes be parsed? Or is token quoted? If not, copy until separator or the end of string. */
if (parse_quotes == CFL_FALSE || (*token_in != '"' && *token_in != '\'')) {
len = (int)strlen(token_in);
next_separator = cfl_string_char_search(token_in, separator, len);
if (next_separator > 0) {
len = next_separator;
}
*out_len = len;
*out = cfl_string_copy_substr(token_in, 0, len);
if (*out == NULL) {
return -1;
}

return (int)(token_in - str) + len;
}

/* Token is quoted. */

len = quoted_string_len(token_in);
if (len < 0) {
return -1;
}

/* Consume the quote character. */
quote = *token_in++;

token_out = calloc(1, len + 1);
if (!token_out) {
return -1;
}

/* Copy the token */
for (i = 0; i < len; i++) {
/* Handle escapes when inside quoted token:
* \" -> "
* \' -> '
* \\ -> \
*/
if (*token_in == '\\' && (token_in[1] == quote || token_in[1] == '\\')) {
token_in++;
}
token_out[i] = *token_in++;
}
token_out[i] = '\0';

*out = token_out;
*out_len = len;

return (int)(token_in - str);
}


static struct cfl_list *split(const char *line, int separator, int max_split, int quoted)
{
int i = 0;
int count = 0;
int val_len;
int len;
int end;
char *val;
struct cfl_list *list;
struct cfl_split_entry *new;

if (!line) {
return NULL;
}

list = calloc(1, sizeof(struct cfl_list));
if (!list) {
cfl_errno();
return NULL;
}
cfl_list_init(list);

len = strlen(line);
while (i < len) {
end = next_token(line + i, separator, &val, &val_len, quoted);
if (end == -1) {
cfl_report_runtime_error();
cfl_utils_split_free(list);
return NULL;
}

/* Update last position */
i += end;

/* Create new entry */
new = calloc(1, sizeof(struct cfl_split_entry));
if (!new) {
cfl_errno();
free(val);
cfl_utils_split_free(list);
return NULL;
}
new->value = val;
new->len = val_len;
new->last_pos = i;
cfl_list_add(&new->_head, list);
count++;

/* Update index for next loop */
i++;

/*
* If the counter exceeded the maximum specified and there
* are still remaining bytes, append those bytes in a new
* and last entry.
*/
if (count >= max_split && max_split > 0 && i < len) {
new = calloc(1, sizeof(struct cfl_split_entry));
if (!new) {
cfl_errno();
cfl_utils_split_free(list);
return NULL;
}
new->value = cfl_string_copy_substr(line, i, len);
new->len = len - i;
cfl_list_add(&new->_head, list);
break;
}
}

return list;
}

struct cfl_list *cfl_utils_split_quoted(const char *line, int separator, int max_split)
{
return split(line, separator, max_split, CFL_TRUE);
}

struct cfl_list *cfl_utils_split(const char *line, int separator, int max_split)
{
return split(line, separator, max_split, CFL_FALSE);
}


void cfl_utils_split_free_entry(struct cfl_split_entry *entry)
{
cfl_list_del(&entry->_head);
free(entry->value);
free(entry);
}

void cfl_utils_split_free(struct cfl_list *list)
{
struct cfl_list *tmp;
struct cfl_list *head;
struct cfl_split_entry *entry;

cfl_list_foreach_safe(head, tmp, list) {
entry = cfl_list_entry(head, struct cfl_split_entry, _head);
cfl_utils_split_free_entry(entry);
}

free(list);
}

0 comments on commit 781037e

Please sign in to comment.