Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for f-strings #2566

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions src/lpython/parser/parser.yy
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ void yyerror(YYLTYPE *yyloc, LCompilers::LPython::Parser &p, const std::string &
%token TK_CARET "^"
%token TK_AT "@"
%token <string> TK_STRING
%token <string> TK_FSTRING_START
%token <string> TK_FSTRING_MIDDLE
%token <string> TK_FSTRING_END
%token <string> TK_COMMENT
%token <string> TK_EOLCOMMENT
%token <string> TK_TYPE_COMMENT
Expand Down Expand Up @@ -260,6 +263,8 @@ void yyerror(YYLTYPE *yyloc, LCompilers::LPython::Parser &p, const std::string &
%type <ast> sep_one
%type <string> type_comment
%type <ast> string
%type <ast> fstring
%type <ast> fstring_middle
%type <ast> ternary_if_statement
%type <ast> comprehension
%type <vec_ast> id_list
Expand Down Expand Up @@ -1106,8 +1111,19 @@ subscript
string
: string TK_STRING { $$ = STRING2($1, $2, @$); } // TODO
| string KW_STR_PREFIX TK_STRING { $$ = STRING4($1, STRING3($2, $3, @$), @$); }
| string fstring { $$ = STRING4($1, $2, @$); }
| TK_STRING { $$ = STRING1($1, @$); }
| KW_STR_PREFIX TK_STRING { $$ = STRING3($1, $2, @$); }
| fstring
;

fstring_middle
: fstring_middle TK_FSTRING_MIDDLE expr { $$ = FSTRING_MIDDLE($1, $2, $3, @$); }
| expr { $$ = FSTRING_MIDDLE1($1, @$); }
;

fstring
: TK_FSTRING_START fstring_middle TK_FSTRING_END { $$ = FSTRING($1, $2, $3, @$); }
;

lambda_parameter
Expand Down
88 changes: 45 additions & 43 deletions src/lpython/parser/semantics.h
Original file line number Diff line number Diff line change
Expand Up @@ -802,10 +802,53 @@ static inline ast_t* concat_string(Allocator &al, Location &l,
#define STRING2(x, y, l) concat_string(p.m_a, l, EXPR(x), str_unescape_c(p.m_a, y), nullptr)
#define STRING3(prefix, x, l) PREFIX_STRING(p.m_a, l, prefix.c_str(p.m_a), x.c_str(p.m_a))
#define STRING4(x, s, l) concat_string(p.m_a, l, EXPR(x), "", EXPR(s))
#define FSTRING(s, m, e, l) fstring(p.m_a, l, s.c_str(p.m_a), EXPR(m), e.c_str(p.m_a))
#define FSTRING_MIDDLE(s, m, e, l) fstring_middle(p.m_a, l, EXPR(s), m.c_str(p.m_a), EXPR(e))
#define FSTRING_MIDDLE1(x, l) fstring_middle1(p.m_a, l, EXPR(x))
#define FLOAT(x, l) make_ConstantFloat_t(p.m_a, l, x, nullptr)
#define COMPLEX(x, l) make_ConstantComplex_t(p.m_a, l, 0, x, nullptr)
#define BOOL(x, l) make_ConstantBool_t(p.m_a, l, x, nullptr)

static inline ast_t *fstring_middle1(Allocator &al, Location &l, expr_t *start){
return make_FormattedValue_t(al, l, start, -1, nullptr);
}

static inline ast_t *fstring_middle(Allocator &al, Location &l, expr_t *start,
char *middle, expr_t *end) {
Vec<expr_t *> exprs;
exprs.reserve(al, 3);
exprs.push_back(al, start);
ast_t *tmp = make_ConstantStr_t(al, l, middle, nullptr);
exprs.push_back(al, EXPR(tmp));
tmp = make_FormattedValue_t(al, l, end, -1, nullptr);
exprs.push_back(al, EXPR(tmp));
tmp = make_JoinedStr_t(al, l, exprs.p, exprs.size());
return tmp;
}

static inline ast_t *fstring(Allocator &al, Location &l, char *start,
expr_t *middle, char *end) {
size_t p = 0, q = 0;
while(isalpha(start[p]) && p < strlen(start)) p++;
q = p;
while(start[q] == start[p] && q < strlen(start)) q++;
//discard the start prefix & quote & end brace characters, prefix can be 'r'|'f'
std::string str = std::string(start).substr(q, strlen(start)-q-1);
start = LCompilers::s2c(al, str);
str = std::string(end).substr(1, strlen(end)-(q-p)-1);
end = LCompilers::s2c(al, str);

Vec<expr_t *> exprs;
exprs.reserve(al, 3);
ast_t* tmp = make_ConstantStr_t(al, l, start, nullptr);
exprs.push_back(al, EXPR(tmp));
exprs.push_back(al, middle);
tmp = make_ConstantStr_t(al, l, end, nullptr);
exprs.push_back(al, EXPR(tmp));
tmp = make_JoinedStr_t(al, l, exprs.p, exprs.size());
return tmp;
}

static inline ast_t *PREFIX_STRING(Allocator &al, Location &l, char *prefix, char *s){
Vec<expr_t *> exprs;
exprs.reserve(al, 4);
Expand All @@ -818,49 +861,8 @@ static inline ast_t *PREFIX_STRING(Allocator &al, Location &l, char *prefix, cha
}
if (strcmp(prefix, "f") == 0 || strcmp(prefix, "fr") == 0
|| strcmp(prefix, "rf") == 0) {
std::string str = std::string(s);
std::string s1 = "\"";
std::string id;
std::vector<std::string> strs;
bool open_paren = false;
for (size_t i = 0; i < str.length(); i++) {
if(str[i] == '{') {
if(s1 != "\"") {
s1.push_back('"');
strs.push_back(s1);
s1 = "\"";
}
open_paren = true;
} else if (str[i] != '}' && open_paren) {
id.push_back(s[i]);
} else if (str[i] == '}') {
if(id != "") {
strs.push_back(id);
id = "";
}
open_paren = false;
} else if (!open_paren) {
s1.push_back(s[i]);
}
if(i == str.length()-1 && s1 != "\"") {
s1.push_back('"');
strs.push_back(s1);
}
}

for (size_t i = 0; i < strs.size(); i++) {
if (strs[i][0] == '"') {
strs[i] = strs[i].substr(1, strs[i].length() - 2);
tmp = make_ConstantStr_t(al, l, LCompilers::s2c(al, strs[i]), nullptr);
exprs.push_back(al, down_cast<expr_t>(tmp));
} else {
tmp = make_Name_t(al, l,
LCompilers::s2c(al, strs[i]), expr_contextType::Load);
tmp = make_FormattedValue_t(al, l, EXPR(tmp), -1, nullptr);
exprs.push_back(al, down_cast<expr_t>(tmp));
}
}
tmp = make_JoinedStr_t(al, l, exprs.p, exprs.size());
// ignore 'f', assuming it is handled by fstring
tmp = make_ConstantStr_t(al, l, s, nullptr);
} else if (strcmp(prefix, "b") == 0) {
LCompilers::Str s_;
s_.from_str(al, std::string(s));
Expand Down
3 changes: 2 additions & 1 deletion src/lpython/parser/tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class Tokenizer
uint32_t prev_loc; // The previous file ended at this location.

int last_token=-1;

int fstring_flag = 0;
bool indent = false; // Next line is expected to be indented
int dedent = 0; // Allowed values: 0, 1, 2, see the code below the meaning of this state variable
bool colon_actual_last_token = false; // If the actual last token was a colon
Expand Down Expand Up @@ -79,6 +79,7 @@ class Tokenizer

void lex_match_or_case(Location &loc, unsigned char *cur,
bool &is_match_or_case_keyword);
int lex_fstring(Location &loc, unsigned char * &cur, YYSTYPE &yylval);
};

std::string token2text(const int token);
Expand Down
139 changes: 135 additions & 4 deletions src/lpython/parser/tokenizer.re
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,37 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost
| ("''" | "''" "\\"+) [^'\x00\\]
| [^'\x00\\] )*
"'''";

fstring_format1 = ('\\'[^\x00{}] | [^"\x00\n\\{}])*;
fstring_format2 = ("\\"[^\x00{}] | [^'\x00\n\\{}])*;
fstring_format3 = ( '\\'[^\x00{}]
| ('"' | '"' '\\'+ '"' | '"' '\\'+) [^"\x00\\{}]
| ('""' | '""' '\\'+) [^"\x00\\{}]
| [^"\x00\\{}] )*;
fstring_format4 = ( "\\"[^\x00{}]
| ("'" | "'" "\\"+ "'" | "'" "\\"+) [^'\x00\\{}]
| ("''" | "''" "\\"+) [^'\x00\\{}]
| [^'\x00\\{}] )*;

fstring_prefix = ([fF] | [fF][rR] | [rR][fF]);
fstring_start1 = fstring_prefix '"' fstring_format1 '{';
fstring_start2 = fstring_prefix "'" fstring_format2 '{';
fstring_start3 = fstring_prefix '"""' fstring_format3 '{';
fstring_start4 = fstring_prefix "'''" fstring_format4 '{';

fstring_start1 {
fstring_flag = 1; token(yylval.string); RET(TK_FSTRING_START)
}
fstring_start2 {
fstring_flag = 2; token(yylval.string); RET(TK_FSTRING_START)
}
fstring_start3 {
fstring_flag = 3; token(yylval.string); RET(TK_FSTRING_START)
}
fstring_start4 {
fstring_flag = 4; token(yylval.string); RET(TK_FSTRING_START)
}

type_ignore = "#" whitespace? "type:" whitespace? "ignore" [^\n\x00]*;
type_comment = "#" whitespace? "type:" whitespace? [^\n\x00]*;
comment = "#" [^\n\x00]*;
Expand All @@ -305,6 +336,7 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost
})
);
}

end {
token_loc(loc);
if(parenlevel) {
Expand Down Expand Up @@ -434,10 +466,10 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost
RET(TK_NAME);
}
}

[rR][bB] | [bB][rR]
| [fF][rR] | [rR][fF]
| [rR] | [bB] | [fF] | [uU]
| [rR] | [fF] | [bB] | [uU]
{
if(cur[0] == '\'' || cur[0] == '"'){
KW(STR_PREFIX);
Expand Down Expand Up @@ -472,7 +504,13 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost
"{" { token_loc(loc); record_paren(loc, '{'); RET(TK_LBRACE) }
")" { token_loc(loc); record_paren(loc, ')'); RET(TK_RPAREN) }
"]" { token_loc(loc); record_paren(loc, ']'); RET(TK_RBRACKET) }
"}" { token_loc(loc); record_paren(loc, '}'); RET(TK_RBRACE) }
"}" {
if(fstring_flag >= 1){
return lex_fstring(loc, cur, yylval);
}else{
token_loc(loc); record_paren(loc, '}'); RET(TK_RBRACE)
}
}
"+" { RET(TK_PLUS) }
"-" { RET(TK_MINUS) }
"=" { RET(TK_EQUAL) }
Expand Down Expand Up @@ -606,6 +644,95 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost
}
}

int Tokenizer::lex_fstring(Location &loc, unsigned char * &cur, YYSTYPE &yylval){
unsigned char *mar;

/*!re2c
re2c:define:YYCURSOR = cur;
re2c:define:YYMARKER = mar;
re2c:yyfill:enable = 0;
re2c:define:YYCTYPE = "unsigned char";

//fstring_format1 = ('\\'[^\x00{}] | [^"\x00\n\\{}])*;
//fstring_format2 = ( "\\"[^\x00{}]
// | ("'" | "'" "\\"+ "'" | "'" "\\"+) [^'\x00\\{}]
// | ("''" | "''" "\\"+) [^'\x00\\{}]
// | [^'\x00\\{}] )*;
*/
switch(fstring_flag){
case 1: goto fstring1;
case 2: goto fstring2;
case 3: goto fstring3;
case 4: goto fstring4;
default: return -1;
}
fstring1:
/*!re2c
fstring_middle1 = fstring_format1 "{";
fstring_end1 = fstring_format1 '"';

fstring_middle1 {
token_loc(loc);
token_str(yylval.string);
RET(TK_FSTRING_MIDDLE)
}
fstring_end1 { token_loc(loc); fstring_flag = 0; token(yylval.string); RET(TK_FSTRING_END) }
* { goto default_rule; }
*/
fstring2:
/*!re2c
fstring_middle2 = fstring_format2 "{";
fstring_end2 = fstring_format2 "'";
fstring_middle2 {
token_loc(loc);
token_str(yylval.string);
RET(TK_FSTRING_MIDDLE)
}
fstring_end2 { token_loc(loc); fstring_flag = 0; token(yylval.string); RET(TK_FSTRING_END) }
* { goto default_rule; }
*/
fstring3:
/*!re2c
fstring_middle3 = fstring_format3 "{";
fstring_end3 = fstring_format3 '"""';
fstring_middle3 {
token_loc(loc);
token_str(yylval.string);
RET(TK_FSTRING_MIDDLE)
}
fstring_end3 { token_loc(loc); fstring_flag = 0; token(yylval.string); RET(TK_FSTRING_END) }
* { goto default_rule; }
*/
fstring4:
/*!re2c
fstring_middle4 = fstring_format4 "{";
fstring_end4 = fstring_format4 "'''";
fstring_middle4 {
token_loc(loc);
token_str(yylval.string);
RET(TK_FSTRING_MIDDLE)
}
fstring_end4 { token_loc(loc); fstring_flag = 0; token(yylval.string); RET(TK_FSTRING_END) }
* { goto default_rule; }
*/
default_rule:
/*!re2c
* {
token_loc(loc);
std::string t = std::string((char *)tok, cur - tok);
throw parser_local::TokenizerError("Token '"
+ t + "' is not recognized in `fstring` statement", loc);
}
end {
token_loc(loc);
std::string t = std::string((char *)tok, cur - tok);
throw parser_local::TokenizerError(
"End of file not expected within `fstring` statement: '" + t
+ "'", loc);
}
*/
}

void Tokenizer::lex_match_or_case(Location &loc, unsigned char *cur,
bool &is_match_or_case_keyword) {
for (;;) {
Expand Down Expand Up @@ -700,6 +827,9 @@ std::string token2text(const int token)
T(TK_AT, "@")

T(TK_STRING, "string")
T(TK_FSTRING_START, "fstring_start")
T(TK_FSTRING_MIDDLE, "fstring_middle")
T(TK_FSTRING_END, "fstring_end")
T(TK_COMMENT, "comment")
T(TK_EOLCOMMENT, "eolcomment")
T(TK_TYPE_COMMENT, "type_comment")
Expand Down Expand Up @@ -838,7 +968,8 @@ std::string pickle_token(int token, const YYSTYPE &yystype)
t += " " + std::to_string(yystype.f);
} else if (token == yytokentype::TK_IMAG_NUM) {
t += " " + std::to_string(yystype.f) + "j";
} else if (token == yytokentype::TK_STRING) {
} else if (token == yytokentype::TK_STRING || token == yytokentype::TK_FSTRING_START
|| token == yytokentype::TK_FSTRING_MIDDLE || token == yytokentype::TK_FSTRING_END) {
t = t + " " + "\"" + str_escape_c(yystype.string.str()) + "\"";
} else if (token == yytokentype::TK_TYPE_COMMENT) {
t = t + " " + "\"" + yystype.string.str() + "\"";
Expand Down
24 changes: 24 additions & 0 deletions src/lpython/semantics/python_ast_to_asr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3352,6 +3352,30 @@ class CommonVisitor : public AST::BaseVisitor<Struct> {
}
tmp = ASR::make_LogicalBinOp_t(al, x.base.base.loc, lhs, op, rhs, dest_type, value);
}

void visit_FormattedValue(const AST::FormattedValue_t &x){
this->visit_expr(*x.m_value);
// converting x as call_arg for the handle_intrinsic_str function
ASR::expr_t* expr = ASRUtils::EXPR(tmp);
ASR::call_arg_t arg;
arg.loc = expr->base.loc;
arg.m_value = expr;
Vec<ASR::call_arg_t> call_args;
call_args.reserve(al, 1);
call_args.push_back(al, arg);
tmp = intrinsic_node_handler.handle_intrinsic_str(al, call_args, x.base.base.loc);
}

void visit_JoinedStr(const AST::JoinedStr_t &x){
this->visit_expr(*x.m_values[0]);
ASR::expr_t *left = ASRUtils::EXPR(tmp);
for(size_t i = 1; i < x.n_values; i++){
this->visit_expr(*x.m_values[i]);
ASR::expr_t *right = ASRUtils::EXPR(tmp);
make_BinOp_helper(left, right, ASR::binopType::Add, x.base.base.loc);
left = ASRUtils::EXPR(tmp);
}
}

void visit_BinOp(const AST::BinOp_t &x) {
this->visit_expr(*x.m_left);
Expand Down
Loading
Loading