aibasel · FlorianPommerening · Nov 21, 2023 · Aug 18, 2023 · Nov 16, 2023 · Nov 20, 2023
diff --git a/src/search/parser/abstract_syntax_tree.cc b/src/search/parser/abstract_syntax_tree.cc
@@ -408,18 +408,28 @@ LiteralNode::LiteralNode(Token value)
 }
 
 DecoratedASTNodePtr LiteralNode::decorate(DecorateContext &context) const {
-    utils::TraceBlock block(context, "Checking Literal: " + value.content);
+    utils::TraceBlock block(context, "Checking Literal: " + value.repr());
     if (context.has_variable(value.content)) {
-        if (value.type != TokenType::IDENTIFIER) {
+        if (value.type == TokenType::IDENTIFIER) {
+            string variable_name = value.content;
+            return utils::make_unique_ptr<VariableNode>(variable_name);
+        } else if (value.type != TokenType::STRING) {
+            /*
+              Variable names may be identical to a string literal but not
+              identical to any other token, e.g., a boolean:
+                  "let(true, blind(), astar(true))"
+              This kind of mistake is handled earlier, so ending up here is a
+              programming mistake, not an input error.
+            */
             ABORT("A non-identifier token was defined as variable.");
         }
-        string variable_name = value.content;
-        return utils::make_unique_ptr<VariableNode>(variable_name);
     }
 
     switch (value.type) {
     case TokenType::BOOLEAN:
         return utils::make_unique_ptr<BoolLiteralNode>(value.content);
+    case TokenType::STRING:
+        return utils::make_unique_ptr<StringLiteralNode>(value.content);
     case TokenType::INTEGER:
         return utils::make_unique_ptr<IntLiteralNode>(value.content);
     case TokenType::FLOAT:
@@ -434,13 +444,15 @@ DecoratedASTNodePtr LiteralNode::decorate(DecorateContext &context) const {
 
 void LiteralNode::dump(string indent) const {
     cout << indent << token_type_name(value.type) << ": "
-         << value.content << endl;
+         << value.repr() << endl;
 }
 
 const plugins::Type &LiteralNode::get_type(DecorateContext &context) const {
     switch (value.type) {
     case TokenType::BOOLEAN:
         return plugins::TypeRegistry::instance()->get_type<bool>();
+    case TokenType::STRING:
+        return plugins::TypeRegistry::instance()->get_type<string>();
     case TokenType::INTEGER:
         return plugins::TypeRegistry::instance()->get_type<int>();
     case TokenType::FLOAT:

diff --git a/src/search/parser/decorated_abstract_syntax_tree.cc b/src/search/parser/decorated_abstract_syntax_tree.cc
@@ -218,6 +218,19 @@ void BoolLiteralNode::dump(string indent) const {
     cout << indent << "BOOL: " << value << endl;
 }
 
+StringLiteralNode::StringLiteralNode(const string &value)
+    : value(value) {
+}
+
+plugins::Any StringLiteralNode::construct(ConstructContext &context) const {
+    utils::TraceBlock block(context, "Constructing string value from '" + value + "'");
+    return value;
+}
+
+void StringLiteralNode::dump(string indent) const {
+    cout << indent << "STRING: \"" << utils::escape(value) << "\"" << endl;
+}
+
 IntLiteralNode::IntLiteralNode(const string &value)
     : value(value) {
 }
@@ -473,6 +486,18 @@ shared_ptr<DecoratedASTNode> BoolLiteralNode::clone_shared() const {
     return make_shared<BoolLiteralNode>(*this);
 }
 
+StringLiteralNode::StringLiteralNode(const StringLiteralNode &other)
+    : value(other.value) {
+}
+
+unique_ptr<DecoratedASTNode> StringLiteralNode::clone() const {
+    return utils::make_unique_ptr<StringLiteralNode>(*this);
+}
+
+shared_ptr<DecoratedASTNode> StringLiteralNode::clone_shared() const {
+    return make_shared<StringLiteralNode>(*this);
+}
+
 IntLiteralNode::IntLiteralNode(const IntLiteralNode &other)
     : value(other.value) {
 }

diff --git a/src/search/parser/decorated_abstract_syntax_tree.h b/src/search/parser/decorated_abstract_syntax_tree.h
@@ -157,6 +157,20 @@ class BoolLiteralNode : public DecoratedASTNode {
     BoolLiteralNode(const BoolLiteralNode &other);
 };
 
+class StringLiteralNode : public DecoratedASTNode {
+    std::string value;
+public:
+    StringLiteralNode(const std::string &value);
+
+    plugins::Any construct(ConstructContext &context) const override;
+    void dump(std::string indent) const override;
+
+    // TODO: once we get rid of lazy construction, this should no longer be necessary.
+    virtual std::unique_ptr<DecoratedASTNode> clone() const override;
+    virtual std::shared_ptr<DecoratedASTNode> clone_shared() const override;
+    StringLiteralNode(const StringLiteralNode &other);
+};
+
 class IntLiteralNode : public DecoratedASTNode {
     std::string value;
 public:

diff --git a/src/search/parser/lexical_analyzer.cc b/src/search/parser/lexical_analyzer.cc
@@ -29,6 +29,7 @@ static vector<pair<TokenType, regex>> construct_token_type_expressions() {
         {TokenType::INTEGER,
          R"([+-]?(infinity|\d+([kmg]\b)?))"},
         {TokenType::BOOLEAN, R"(true|false)"},
+        {TokenType::STRING, R"(\"((\\\\|\\"|\\n|[^"\\])*)\")"},
         {TokenType::LET, R"(let)"},
         {TokenType::IDENTIFIER, R"([a-zA-Z_]\w*)"}
     };
@@ -42,6 +43,23 @@ static vector<pair<TokenType, regex>> construct_token_type_expressions() {
 static const vector<pair<TokenType, regex>> token_type_expressions =
     construct_token_type_expressions();
 
+static string highlight_position(const string &text, string::const_iterator pos) {
+    ostringstream error;
+    int distance_to_error = pos - text.begin();
+    for (const string &line : utils::split(text, "\n")) {
+        int line_length = line.size();
+        bool error_in_line =
+            distance_to_error < line_length && distance_to_error >= 0;
+        error << (error_in_line ? "> " : "  ") << line << endl;
+        if (error_in_line)
+            error << string(distance_to_error + 2, ' ') << "^" << endl;
+
+        distance_to_error -= line.size() + 1;
+    }
+    string message = error.str();
+    utils::rstrip(message);
+    return message;
+}
 
 TokenStream split_tokens(const string &text) {
     utils::Context context;
@@ -59,29 +77,21 @@ TokenStream split_tokens(const string &text) {
             TokenType token_type = type_and_expression.first;
             const regex &expression = type_and_expression.second;
             if (regex_search(start, end, match, expression, regex_constants::match_continuous)) {
-                tokens.push_back({utils::tolower(match[1]), token_type});
+                string value;
+                if (token_type == TokenType::STRING) {
+                    value = utils::unescape(match[2]);
+                } else {
+                    value = utils::tolower(match[1]);
+                }
+                tokens.push_back({value, token_type});
                 start += match[0].length();
                 has_match = true;
                 break;
             }
         }
         if (!has_match) {
-            ostringstream error;
-            error << "Unable to recognize next token:" << endl;
-            int distance_to_error = start - text.begin();
-            for (const string &line : utils::split(text, "\n")) {
-                int line_length = line.size();
-                bool error_in_line =
-                    distance_to_error < line_length && distance_to_error >= 0;
-                error << (error_in_line ? "> " : "  ") << line << endl;
-                if (error_in_line)
-                    error << string(distance_to_error + 2, ' ') << "^" << endl;
-
-                distance_to_error -= line.size() + 1;
-            }
-            string message = error.str();
-            utils::rstrip(message);
-            context.error(message);
+            context.error("Unable to recognize next token:\n" +
+                          highlight_position(text, start));
         }
     }
     return TokenStream(move(tokens));

diff --git a/src/search/parser/syntax_analyzer.cc b/src/search/parser/syntax_analyzer.cc
@@ -162,6 +162,7 @@ static unordered_set<TokenType> literal_tokens {
     TokenType::FLOAT,
     TokenType::INTEGER,
     TokenType::BOOLEAN,
+    TokenType::STRING,
     TokenType::IDENTIFIER
 };
 
@@ -193,7 +194,8 @@ static ASTNodePtr parse_list(TokenStream &tokens, SyntaxAnalyzerContext &context
 
 static vector<TokenType> PARSE_NODE_TOKEN_TYPES = {
     TokenType::LET, TokenType::IDENTIFIER, TokenType::BOOLEAN,
-    TokenType::INTEGER, TokenType::FLOAT, TokenType::OPENING_BRACKET};
+    TokenType::STRING, TokenType::INTEGER, TokenType::FLOAT,
+    TokenType::OPENING_BRACKET};
 
 static ASTNodePtr parse_node(TokenStream &tokens,
                              SyntaxAnalyzerContext &context) {
@@ -220,6 +222,7 @@ static ASTNodePtr parse_node(TokenStream &tokens,
             return parse_literal(tokens, context);
         }
     case TokenType::BOOLEAN:
+    case TokenType::STRING:
     case TokenType::INTEGER:
     case TokenType::FLOAT:
         return parse_literal(tokens, context);

diff --git a/src/search/parser/token_stream.cc b/src/search/parser/token_stream.cc
@@ -15,6 +15,14 @@ Token::Token(const string &content, TokenType type)
     : content(content), type(type) {
 }
 
+string Token::repr() const {
+    if (type == TokenType::STRING) {
+        return "\"" + utils::escape(content) + "\"";
+    } else {
+        return content;
+    }
+}
+
 TokenStream::TokenStream(vector<Token> &&tokens)
     : tokens(move(tokens)), pos(0) {
 }
@@ -70,7 +78,7 @@ string TokenStream::str(int from, int to) const {
     int max_position = min(static_cast<int>(tokens.size()), to);
     ostringstream message;
     while (curr_position < max_position) {
-        message << tokens[curr_position].content;
+        message << tokens[curr_position].repr();
         curr_position++;
     }
     return message.str();
@@ -96,6 +104,8 @@ string token_type_name(TokenType token_type) {
         return "Float";
     case TokenType::BOOLEAN:
         return "Boolean";
+    case TokenType::STRING:
+        return "String";
     case TokenType::IDENTIFIER:
         return "Identifier";
     case TokenType::LET:
@@ -111,7 +121,7 @@ ostream &operator<<(ostream &out, TokenType token_type) {
 }
 
 ostream &operator<<(ostream &out, const Token &token) {
-    out << "<Type: '" << token.type << "', Value: '" << token.content << "'>";
+    out << "<Type: '" << token.type << "', Value: '" << token.repr() << "'>";
     return out;
 }
 }
diff --git a/src/search/parser/token_stream.h b/src/search/parser/token_stream.h
@@ -19,6 +19,7 @@ enum class TokenType {
     INTEGER,
     FLOAT,
     BOOLEAN,
+    STRING,
     IDENTIFIER,
     LET
 };
@@ -28,6 +29,7 @@ struct Token {
     TokenType type;
 
     Token(const std::string &content, TokenType type);
+    std::string repr() const;
 };
 
 class TokenStream {

diff --git a/src/search/plugins/types.cc b/src/search/plugins/types.cc
@@ -292,6 +292,7 @@ BasicType TypeRegistry::NO_TYPE = BasicType(typeid(void), "<no type>");
 
 TypeRegistry::TypeRegistry() {
     insert_basic_type<bool>();
+    insert_basic_type<string>();
     insert_basic_type<int>();
     insert_basic_type<double>();
 }

diff --git a/src/search/utils/strings.cc b/src/search/utils/strings.cc
@@ -8,6 +8,44 @@
 using namespace std;
 
 namespace utils {
+string escape(const string &s) {
+    string result;
+    result.reserve(s.length());
+    for (char c : s) {
+        if (c == '\\') {
+            result += "\\\\";
+        } else if (c == '"') {
+            result += "\\\"";
+        } else if (c == '\n') {
+            result += "\\n";
+        } else {
+            result += c;
+        }
+    }
+    return result;
+}
+
+string unescape(const string &s) {
+    string result;
+    result.reserve(s.length());
+    bool escaped = false;
+    for (char c : s) {
+        if (escaped) {
+            escaped = false;
+            if (c == 'n') {
+                result += "\n";
+            } else {
+                result += c;
+            }
+        } else if (c == '\\') {
+            escaped = true;
+        } else {
+            result += c;
+        }
+    }
+    return result;
+}
+
 void lstrip(string &s) {
     s.erase(s.begin(), find_if(s.begin(), s.end(), [](int ch) {
                                    return !isspace(ch);

diff --git a/src/search/utils/strings.h b/src/search/utils/strings.h
@@ -8,6 +8,8 @@
 #include <vector>
 
 namespace utils {
+extern std::string escape(const std::string &s);
+extern std::string unescape(const std::string &s);
 extern void lstrip(std::string &s);
 extern void rstrip(std::string &s);
 extern void strip(std::string &s);