Skip to content

Commit

Permalink
support escaped quotes in strings
Browse files Browse the repository at this point in the history
  • Loading branch information
FlorianPommerening committed Nov 16, 2023
1 parent 8fcac61 commit e3de553
Show file tree
Hide file tree
Showing 7 changed files with 86 additions and 27 deletions.
18 changes: 13 additions & 5 deletions src/search/parser/abstract_syntax_tree.cc
Original file line number Diff line number Diff line change
Expand Up @@ -408,13 +408,21 @@ LiteralNode::LiteralNode(Token value)
}

DecoratedASTNodePtr LiteralNode::decorate(DecorateContext &context) const {
utils::TraceBlock block(context, "Checking Literal: " + value.content);
utils::TraceBlock block(context, "Checking Literal: " + value.repr());
if (context.has_variable(value.content)) {
if (value.type != TokenType::IDENTIFIER) {
if (value.type == TokenType::IDENTIFIER) {
string variable_name = value.content;
return utils::make_unique_ptr<VariableNode>(variable_name);
} else if (value.type != TokenType::STRING) {
/*
Variable names may be identical to a string literal but not
identical to any other token, e.g., a boolean:
"let(true, blind(), astar(true))"
This kind of mistake is handled earlier, so ending up here is a
programming mistake, not an input error.
*/
ABORT("A non-identifier token was defined as variable.");
}
string variable_name = value.content;
return utils::make_unique_ptr<VariableNode>(variable_name);
}

switch (value.type) {
Expand All @@ -436,7 +444,7 @@ DecoratedASTNodePtr LiteralNode::decorate(DecorateContext &context) const {

void LiteralNode::dump(string indent) const {
cout << indent << token_type_name(value.type) << ": "
<< value.content << endl;
<< value.repr() << endl;
}

const plugins::Type &LiteralNode::get_type(DecorateContext &context) const {
Expand Down
2 changes: 1 addition & 1 deletion src/search/parser/decorated_abstract_syntax_tree.cc
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ plugins::Any StringLiteralNode::construct(ConstructContext &context) const {
}

void StringLiteralNode::dump(string indent) const {
cout << indent << "STRING: " << value << endl;
cout << indent << "STRING: \"" << utils::escape(value) << "\"" << endl;
}

IntLiteralNode::IntLiteralNode(const string &value)
Expand Down
40 changes: 21 additions & 19 deletions src/search/parser/lexical_analyzer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ static vector<pair<TokenType, regex>> construct_token_type_expressions() {
{TokenType::INTEGER,
R"([+-]?(infinity|\d+([kmg]\b)?))"},
{TokenType::BOOLEAN, R"(true|false)"},
// TODO: support quoted strings.
{TokenType::STRING, R"("([^"]*)\")"},
{TokenType::STRING, R"(\"((\\\\|\\"|\\n|[^"\\])*)\")"},
{TokenType::LET, R"(let)"},
{TokenType::IDENTIFIER, R"([a-zA-Z_]\w*)"}
};
Expand All @@ -44,6 +43,23 @@ static vector<pair<TokenType, regex>> construct_token_type_expressions() {
static const vector<pair<TokenType, regex>> token_type_expressions =
construct_token_type_expressions();

static string highlight_position(const string &text, string::const_iterator pos) {
ostringstream error;
int distance_to_error = pos - text.begin();
for (const string &line : utils::split(text, "\n")) {
int line_length = line.size();
bool error_in_line =
distance_to_error < line_length && distance_to_error >= 0;
error << (error_in_line ? "> " : " ") << line << endl;
if (error_in_line)
error << string(distance_to_error + 2, ' ') << "^" << endl;

distance_to_error -= line.size() + 1;
}
string message = error.str();
utils::rstrip(message);
return message;
}

TokenStream split_tokens(const string &text) {
utils::Context context;
Expand All @@ -63,7 +79,7 @@ TokenStream split_tokens(const string &text) {
if (regex_search(start, end, match, expression, regex_constants::match_continuous)) {
string value;
if (token_type == TokenType::STRING) {
value = match[2];
value = utils::unescape(match[2]);
} else {
value = utils::tolower(match[1]);
}
Expand All @@ -74,22 +90,8 @@ TokenStream split_tokens(const string &text) {
}
}
if (!has_match) {
ostringstream error;
error << "Unable to recognize next token:" << endl;
int distance_to_error = start - text.begin();
for (const string &line : utils::split(text, "\n")) {
int line_length = line.size();
bool error_in_line =
distance_to_error < line_length && distance_to_error >= 0;
error << (error_in_line ? "> " : " ") << line << endl;
if (error_in_line)
error << string(distance_to_error + 2, ' ') << "^" << endl;

distance_to_error -= line.size() + 1;
}
string message = error.str();
utils::rstrip(message);
context.error(message);
context.error("Unable to recognize next token:\n" +
highlight_position(text, start));
}
}
return TokenStream(move(tokens));
Expand Down
12 changes: 10 additions & 2 deletions src/search/parser/token_stream.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@ Token::Token(const string &content, TokenType type)
: content(content), type(type) {
}

string Token::repr() const {
if (type == TokenType::STRING) {
return "\"" + utils::escape(content) + "\"";
} else {
return content;
}
}

TokenStream::TokenStream(vector<Token> &&tokens)
: tokens(move(tokens)), pos(0) {
}
Expand Down Expand Up @@ -70,7 +78,7 @@ string TokenStream::str(int from, int to) const {
int max_position = min(static_cast<int>(tokens.size()), to);
ostringstream message;
while (curr_position < max_position) {
message << tokens[curr_position].content;
message << tokens[curr_position].repr();
curr_position++;
}
return message.str();
Expand Down Expand Up @@ -113,7 +121,7 @@ ostream &operator<<(ostream &out, TokenType token_type) {
}

ostream &operator<<(ostream &out, const Token &token) {
out << "<Type: '" << token.type << "', Value: '" << token.content << "'>";
out << "<Type: '" << token.type << "', Value: '" << token.repr() << "'>";
return out;
}
}
1 change: 1 addition & 0 deletions src/search/parser/token_stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ struct Token {
TokenType type;

Token(const std::string &content, TokenType type);
std::string repr() const;
};

class TokenStream {
Expand Down
38 changes: 38 additions & 0 deletions src/search/utils/strings.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,44 @@
using namespace std;

namespace utils {
string escape(const string &s) {
string result;
result.reserve(s.length());
for (char c : s) {
if (c == '\\') {
result += "\\\\";
} else if (c == '"') {
result += "\\\"";
} else if (c == '\n') {
result += "\\n";
} else {
result += c;
}
}
return result;
}

string unescape(const string &s) {
string result;
result.reserve(s.length());
bool escaped = false;
for (char c : s) {
if (escaped) {
escaped = false;
if (c == 'n') {
result += "\n";
} else {
result += c;
}
} else if (c == '\\') {
escaped = true;
} else {
result += c;
}
}
return result;
}

void lstrip(string &s) {
s.erase(s.begin(), find_if(s.begin(), s.end(), [](int ch) {
return !isspace(ch);
Expand Down
2 changes: 2 additions & 0 deletions src/search/utils/strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#include <vector>

namespace utils {
extern std::string escape(const std::string &s);
extern std::string unescape(const std::string &s);
extern void lstrip(std::string &s);
extern void rstrip(std::string &s);
extern void strip(std::string &s);
Expand Down

0 comments on commit e3de553

Please sign in to comment.