Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: sql parser #182

Merged
merged 6 commits into from
Sep 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 150 additions & 0 deletions src/Ydb.Sdk/src/Ado/Internal/SqlParser.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
using System.Collections.Concurrent;
using System.Text;

namespace Ydb.Sdk.Ado.Internal;

internal static class SqlParser
{
private static readonly IDictionary<string, ParsedResult> CacheQueries =
new ConcurrentDictionary<string, ParsedResult>();

internal static ParsedResult Parse(string sql)
{
if (CacheQueries.TryGetValue(sql, out var preparedYql))
{
return preparedYql;
}

var newYql = new StringBuilder();
var paramNames = new List<string>();

var prevToken = 0;

for (var curToken = 0; curToken < sql.Length; curToken++)
{
switch (sql[curToken])
{
case '`':
curToken = SkipTerminals(sql, '`', curToken);
break;
case '"':
curToken = SkipTerminals(sql, '"', curToken);
break;
case '\'':
curToken = SkipTerminals(sql, '\'', curToken);
break;
case '-':
if (curToken + 1 < sql.Length && sql[curToken + 1] == '-')
{
while (curToken + 1 < sql.Length)
{
curToken++;
if (sql[curToken] == '\r' || sql[curToken] == '\n')
{
break;
}
}
}

break;
case '/':
if (curToken + 1 < sql.Length && sql[curToken + 1] == '*')
{
// /* /* */ */ nest, according to SQL spec
var level = 1;
for (curToken += 2; curToken < sql.Length; curToken++)
{
switch (sql[curToken - 1])
{
case '*':
if (sql[curToken] == '/')
{
--level;
++curToken; // don't parse / in */* twice
}

break;
case '/':
if (sql[curToken] == '*')
{
++level;
++curToken; // don't parse * in /*/ twice
}

break;
}

if (level == 0)
{
break;
}
}
}

break;
case '@':
if (curToken + 1 < sql.Length && sql[curToken + 1] == '@') // $text = @@ a b c @ @@
{
for (curToken += 2; curToken + 1 < sql.Length; curToken++)
{
if (sql[curToken] == '@' && sql[curToken + 1] == '@')
{
curToken++;
break;
}
}

break;
}

// Parse params
newYql.Append(sql[prevToken .. curToken]);
prevToken = ++curToken;

for (;
curToken < sql.Length && (char.IsLetterOrDigit(sql[curToken]) || sql[curToken] == '_');
curToken++)
{
}

if (curToken - prevToken == 0)
{
throw new YdbException($"Have empty name parameter, invalid SQL [position: {prevToken}]");
}

var originalParamName = $"${sql[prevToken .. curToken]}";

paramNames.Add(originalParamName);
newYql.Append(originalParamName);
prevToken = curToken;

break;
}
}

newYql.Append(sql.AsSpan(prevToken, sql.Length - prevToken));

return CacheQueries[sql] = new ParsedResult(newYql.ToString(), paramNames.ToArray());
}

private static int SkipTerminals(string sql, char stopSymbol, int curToken)
{
while (++curToken < sql.Length)
{
if (sql[curToken] == '\\')
{
++curToken;
continue;
}

if (sql[curToken] == stopSymbol)
{
return curToken;
}
}

return sql.Length;
}
}

internal record ParsedResult(string ParsedSql, IReadOnlyList<string> ParamNames);
245 changes: 245 additions & 0 deletions src/Ydb.Sdk/tests/Ado/Internal/SqlParserTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
using Xunit;
using Ydb.Sdk.Ado.Internal;

namespace Ydb.Sdk.Tests.Ado.Internal;

[Trait("Category", "Unit")]
public class SqlParserTests
{
[Fact]
public void Parse_WhenDigitOrLetterOrUnderliningInParamName_ReturnParsedSqlAndCached()
{
var (sql, paramNames) = SqlParser.Parse("SELECT @p; SELECT @p2; SELECT @p_3;");

Assert.Equal("SELECT $p; SELECT $p2; SELECT $p_3;", sql);
Assert.Equal(new[] { "$p", "$p2", "$p_3" }, paramNames);
}

[Fact]
public void Parse_WhenEmptySql_ReturnEmpty()
{
var (sql, paramsNames) = SqlParser.Parse("");
Assert.Equal("", sql);
Assert.True(paramsNames.Count == 0);
}

[Fact]
public void Parse_WhenLineComment_ReturnSqlWithComment()
{
var (sql, paramNames) = SqlParser.Parse(@"-- Comment with params @param, @p2, @p_3
SELECT @param; SELECT @p2; SELECT @p_3;");

Assert.Equal(@"-- Comment with params @param, @p2, @p_3
SELECT $param; SELECT $p2; SELECT $p_3;", sql);
Assert.Equal(new[] { "$param", "$p2", "$p_3" }, paramNames);
}

[Fact]
public void Parse_WhenBlockComment_ReturnSqlWithComment()
{
var (sql, paramNames) = SqlParser.Parse(@"/* Comment SQL
/*
Comment with params @param, @p2, @p_3
--
@
*/
SELECT @param; SELECT @p2; SELECT @p_3
*/
INSERT INTO Table
(id, bool_column, bigint_column, smallint_column, tinyint_column, float_column, double_column, decimal_column,
uint8_column, uint16_column, uint32_column, uint64_column, text_column, binary_column, json_column,
jsondocument_column, date_column, datetime_column, timestamp_column, interval_column) VALUES
(@name1, @name2, @name3, @name4, @name5, @name6, @name7, @name8, @name9, @name10, @name11, @name12, @name13, @name14,
@name15, @name16, @name17, @name18, @name19, @name20);");

Assert.Equal(@"/* Comment SQL
/*
Comment with params @param, @p2, @p_3
--
@
*/
SELECT @param; SELECT @p2; SELECT @p_3
*/
INSERT INTO Table
(id, bool_column, bigint_column, smallint_column, tinyint_column, float_column, double_column, decimal_column,
uint8_column, uint16_column, uint32_column, uint64_column, text_column, binary_column, json_column,
jsondocument_column, date_column, datetime_column, timestamp_column, interval_column) VALUES
($name1, $name2, $name3, $name4, $name5, $name6, $name7, $name8, $name9, $name10, $name11, $name12, $name13, $name14,
$name15, $name16, $name17, $name18, $name19, $name20);", sql);
Assert.Equal(new[]
{
"$name1", "$name2", "$name3", "$name4", "$name5", "$name6", "$name7", "$name8", "$name9", "$name10",
"$name11", "$name12", "$name13", "$name14", "$name15", "$name16", "$name17", "$name18", "$name19", "$name20"
}, paramNames);
}

[Fact]
public void Parse_WhenDoubleQuotes_ReturnSql()
{
var (sql, paramNames) = SqlParser.Parse(@"REPLACE INTO episodes
(
series_id,
season_id,
episode_id,
title,
air_date
)
VALUES
(
2,
5,
12,
""@ @ @ @ @ @Test Episode !!! \"" \"" \"" \"" SELECT @param; SELECT @p2; SELECT @p_3"",
@air_date
)
COMMIT;
-- View result:
SELECT * FROM episodes WHERE series_id = ""123 @ \"" @ @"" AND season_id = @param;
;");
Assert.Equal(@"REPLACE INTO episodes
(
series_id,
season_id,
episode_id,
title,
air_date
)
VALUES
(
2,
5,
12,
""@ @ @ @ @ @Test Episode !!! \"" \"" \"" \"" SELECT @param; SELECT @p2; SELECT @p_3"",
$air_date
)
COMMIT;
-- View result:
SELECT * FROM episodes WHERE series_id = ""123 @ \"" @ @"" AND season_id = $param;
;", sql);
Assert.Equal(new[] { "$air_date", "$param" }, paramNames);
}

[Fact]
public void Parse_WhenSingleQuotes_ReturnSql()
{
var (sql, paramNames) = SqlParser.Parse(@"UPSERT INTO episodes
(
series_id,
season_id,
episode_id,
title,
air_date
)
VALUES
(
2,
5,
13,
'Test Episode @ \'@ @ \'@ \'@ \'@ @ @ @ @ @ @ @ ',
@air_date
)
;
COMMIT;
-- View result:
SELECT * FROM episodes WHERE series_id = '123 @ \' @ @' AND season_id = @param;");
Assert.Equal(@"UPSERT INTO episodes
(
series_id,
season_id,
episode_id,
title,
air_date
)
VALUES
(
2,
5,
13,
'Test Episode @ \'@ @ \'@ \'@ \'@ @ @ @ @ @ @ @ ',
$air_date
)
;
COMMIT;
-- View result:
SELECT * FROM episodes WHERE series_id = '123 @ \' @ @' AND season_id = $param;", sql);
Assert.Equal(new[] { "$air_date", "$param" }, paramNames);
}

[Fact]
public void Parse_WhenBacktickQuotes_ReturnSql()
{
var (sql, paramNames) = SqlParser.Parse(@"UPSERT INTO `episodes @ @ @ @ "" "" \` @ @ @`
(
series_id,
season_id,
episode_id,
title,
air_date
)
VALUES
(
2,
5,
13,
'Test Episode @ \'@ @ \'@ \'@ \'@ @ @ @ @ @ @ @ ',
@air_date
)
;
COMMIT;
-- View result:
SELECT * FROM episodes WHERE series_id = '123 @ \' @ @' AND season_id = @param;");

Assert.Equal(@"UPSERT INTO `episodes @ @ @ @ "" "" \` @ @ @`
(
series_id,
season_id,
episode_id,
title,
air_date
)
VALUES
(
2,
5,
13,
'Test Episode @ \'@ @ \'@ \'@ \'@ @ @ @ @ @ @ @ ',
$air_date
)
;
COMMIT;
-- View result:
SELECT * FROM episodes WHERE series_id = '123 @ \' @ @' AND season_id = $param;", sql);
Assert.Equal(new[] { "$air_date", "$param" }, paramNames);
}

[Fact]
public void Parse_WhenMultilineStringLiterals_ReturnSql()
{
var (sql, paramNames) = SqlParser.Parse(@"$text = @@some
multiline with double at: @@@@
text@@;
SELECT $text;
-- Comment with params @param, @p2, @p_3
SELECT @param; SELECT @p2; SELECT @p_3;");

Assert.Equal(@"$text = @@some
multiline with double at: @@@@
text@@;
SELECT $text;
-- Comment with params @param, @p2, @p_3
SELECT $param; SELECT $p2; SELECT $p_3;", sql);
Assert.Equal(new[] { "$param", "$p2", "$p_3" }, paramNames);
}
}
Loading