Skip to content

Commit

Permalink
feat: sql parser (#182)
Browse files Browse the repository at this point in the history
  • Loading branch information
KirillKurdyukov authored Sep 10, 2024
1 parent e09a738 commit 9b69b30
Show file tree
Hide file tree
Showing 2 changed files with 395 additions and 0 deletions.
150 changes: 150 additions & 0 deletions src/Ydb.Sdk/src/Ado/Internal/SqlParser.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
using System.Collections.Concurrent;
using System.Text;

namespace Ydb.Sdk.Ado.Internal;

internal static class SqlParser
{
private static readonly IDictionary<string, ParsedResult> CacheQueries =
new ConcurrentDictionary<string, ParsedResult>();

internal static ParsedResult Parse(string sql)
{
if (CacheQueries.TryGetValue(sql, out var preparedYql))
{
return preparedYql;
}

var newYql = new StringBuilder();
var paramNames = new List<string>();

var prevToken = 0;

for (var curToken = 0; curToken < sql.Length; curToken++)
{
switch (sql[curToken])
{
case '`':
curToken = SkipTerminals(sql, '`', curToken);
break;
case '"':
curToken = SkipTerminals(sql, '"', curToken);
break;
case '\'':
curToken = SkipTerminals(sql, '\'', curToken);
break;
case '-':
if (curToken + 1 < sql.Length && sql[curToken + 1] == '-')
{
while (curToken + 1 < sql.Length)
{
curToken++;
if (sql[curToken] == '\r' || sql[curToken] == '\n')
{
break;
}
}
}

break;
case '/':
if (curToken + 1 < sql.Length && sql[curToken + 1] == '*')
{
// /* /* */ */ nest, according to SQL spec
var level = 1;
for (curToken += 2; curToken < sql.Length; curToken++)
{
switch (sql[curToken - 1])
{
case '*':
if (sql[curToken] == '/')
{
--level;
++curToken; // don't parse / in */* twice
}

break;
case '/':
if (sql[curToken] == '*')
{
++level;
++curToken; // don't parse * in /*/ twice
}

break;
}

if (level == 0)
{
break;
}
}
}

break;
case '@':
if (curToken + 1 < sql.Length && sql[curToken + 1] == '@') // $text = @@ a b c @ @@
{
for (curToken += 2; curToken + 1 < sql.Length; curToken++)
{
if (sql[curToken] == '@' && sql[curToken + 1] == '@')
{
curToken++;
break;
}
}

break;
}

// Parse params
newYql.Append(sql[prevToken .. curToken]);
prevToken = ++curToken;

for (;
curToken < sql.Length && (char.IsLetterOrDigit(sql[curToken]) || sql[curToken] == '_');
curToken++)
{
}

if (curToken - prevToken == 0)
{
throw new YdbException($"Have empty name parameter, invalid SQL [position: {prevToken}]");
}

var originalParamName = $"${sql[prevToken .. curToken]}";

paramNames.Add(originalParamName);
newYql.Append(originalParamName);
prevToken = curToken;

break;
}
}

newYql.Append(sql.AsSpan(prevToken, sql.Length - prevToken));

return CacheQueries[sql] = new ParsedResult(newYql.ToString(), paramNames.ToArray());
}

private static int SkipTerminals(string sql, char stopSymbol, int curToken)
{
while (++curToken < sql.Length)
{
if (sql[curToken] == '\\')
{
++curToken;
continue;
}

if (sql[curToken] == stopSymbol)
{
return curToken;
}
}

return sql.Length;
}
}

internal record ParsedResult(string ParsedSql, IReadOnlyList<string> ParamNames);
245 changes: 245 additions & 0 deletions src/Ydb.Sdk/tests/Ado/Internal/SqlParserTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
using Xunit;
using Ydb.Sdk.Ado.Internal;

namespace Ydb.Sdk.Tests.Ado.Internal;

[Trait("Category", "Unit")]
public class SqlParserTests
{
[Fact]
public void Parse_WhenDigitOrLetterOrUnderliningInParamName_ReturnParsedSqlAndCached()
{
var (sql, paramNames) = SqlParser.Parse("SELECT @p; SELECT @p2; SELECT @p_3;");

Assert.Equal("SELECT $p; SELECT $p2; SELECT $p_3;", sql);
Assert.Equal(new[] { "$p", "$p2", "$p_3" }, paramNames);
}

[Fact]
public void Parse_WhenEmptySql_ReturnEmpty()
{
var (sql, paramsNames) = SqlParser.Parse("");
Assert.Equal("", sql);
Assert.True(paramsNames.Count == 0);
}

[Fact]
public void Parse_WhenLineComment_ReturnSqlWithComment()
{
var (sql, paramNames) = SqlParser.Parse(@"-- Comment with params @param, @p2, @p_3
SELECT @param; SELECT @p2; SELECT @p_3;");

Assert.Equal(@"-- Comment with params @param, @p2, @p_3
SELECT $param; SELECT $p2; SELECT $p_3;", sql);
Assert.Equal(new[] { "$param", "$p2", "$p_3" }, paramNames);
}

[Fact]
public void Parse_WhenBlockComment_ReturnSqlWithComment()
{
var (sql, paramNames) = SqlParser.Parse(@"/* Comment SQL
/*
Comment with params @param, @p2, @p_3
--
@
*/
SELECT @param; SELECT @p2; SELECT @p_3
*/
INSERT INTO Table
(id, bool_column, bigint_column, smallint_column, tinyint_column, float_column, double_column, decimal_column,
uint8_column, uint16_column, uint32_column, uint64_column, text_column, binary_column, json_column,
jsondocument_column, date_column, datetime_column, timestamp_column, interval_column) VALUES
(@name1, @name2, @name3, @name4, @name5, @name6, @name7, @name8, @name9, @name10, @name11, @name12, @name13, @name14,
@name15, @name16, @name17, @name18, @name19, @name20);");

Assert.Equal(@"/* Comment SQL
/*
Comment with params @param, @p2, @p_3
--
@
*/
SELECT @param; SELECT @p2; SELECT @p_3
*/
INSERT INTO Table
(id, bool_column, bigint_column, smallint_column, tinyint_column, float_column, double_column, decimal_column,
uint8_column, uint16_column, uint32_column, uint64_column, text_column, binary_column, json_column,
jsondocument_column, date_column, datetime_column, timestamp_column, interval_column) VALUES
($name1, $name2, $name3, $name4, $name5, $name6, $name7, $name8, $name9, $name10, $name11, $name12, $name13, $name14,
$name15, $name16, $name17, $name18, $name19, $name20);", sql);
Assert.Equal(new[]
{
"$name1", "$name2", "$name3", "$name4", "$name5", "$name6", "$name7", "$name8", "$name9", "$name10",
"$name11", "$name12", "$name13", "$name14", "$name15", "$name16", "$name17", "$name18", "$name19", "$name20"
}, paramNames);
}

[Fact]
public void Parse_WhenDoubleQuotes_ReturnSql()
{
var (sql, paramNames) = SqlParser.Parse(@"REPLACE INTO episodes
(
series_id,
season_id,
episode_id,
title,
air_date
)
VALUES
(
2,
5,
12,
""@ @ @ @ @ @Test Episode !!! \"" \"" \"" \"" SELECT @param; SELECT @p2; SELECT @p_3"",
@air_date
)
COMMIT;
-- View result:
SELECT * FROM episodes WHERE series_id = ""123 @ \"" @ @"" AND season_id = @param;
;");
Assert.Equal(@"REPLACE INTO episodes
(
series_id,
season_id,
episode_id,
title,
air_date
)
VALUES
(
2,
5,
12,
""@ @ @ @ @ @Test Episode !!! \"" \"" \"" \"" SELECT @param; SELECT @p2; SELECT @p_3"",
$air_date
)
COMMIT;
-- View result:
SELECT * FROM episodes WHERE series_id = ""123 @ \"" @ @"" AND season_id = $param;
;", sql);
Assert.Equal(new[] { "$air_date", "$param" }, paramNames);
}

[Fact]
public void Parse_WhenSingleQuotes_ReturnSql()
{
var (sql, paramNames) = SqlParser.Parse(@"UPSERT INTO episodes
(
series_id,
season_id,
episode_id,
title,
air_date
)
VALUES
(
2,
5,
13,
'Test Episode @ \'@ @ \'@ \'@ \'@ @ @ @ @ @ @ @ ',
@air_date
)
;
COMMIT;
-- View result:
SELECT * FROM episodes WHERE series_id = '123 @ \' @ @' AND season_id = @param;");
Assert.Equal(@"UPSERT INTO episodes
(
series_id,
season_id,
episode_id,
title,
air_date
)
VALUES
(
2,
5,
13,
'Test Episode @ \'@ @ \'@ \'@ \'@ @ @ @ @ @ @ @ ',
$air_date
)
;
COMMIT;
-- View result:
SELECT * FROM episodes WHERE series_id = '123 @ \' @ @' AND season_id = $param;", sql);
Assert.Equal(new[] { "$air_date", "$param" }, paramNames);
}

[Fact]
public void Parse_WhenBacktickQuotes_ReturnSql()
{
var (sql, paramNames) = SqlParser.Parse(@"UPSERT INTO `episodes @ @ @ @ "" "" \` @ @ @`
(
series_id,
season_id,
episode_id,
title,
air_date
)
VALUES
(
2,
5,
13,
'Test Episode @ \'@ @ \'@ \'@ \'@ @ @ @ @ @ @ @ ',
@air_date
)
;
COMMIT;
-- View result:
SELECT * FROM episodes WHERE series_id = '123 @ \' @ @' AND season_id = @param;");

Assert.Equal(@"UPSERT INTO `episodes @ @ @ @ "" "" \` @ @ @`
(
series_id,
season_id,
episode_id,
title,
air_date
)
VALUES
(
2,
5,
13,
'Test Episode @ \'@ @ \'@ \'@ \'@ @ @ @ @ @ @ @ ',
$air_date
)
;
COMMIT;
-- View result:
SELECT * FROM episodes WHERE series_id = '123 @ \' @ @' AND season_id = $param;", sql);
Assert.Equal(new[] { "$air_date", "$param" }, paramNames);
}

[Fact]
public void Parse_WhenMultilineStringLiterals_ReturnSql()
{
var (sql, paramNames) = SqlParser.Parse(@"$text = @@some
multiline with double at: @@@@
text@@;
SELECT $text;
-- Comment with params @param, @p2, @p_3
SELECT @param; SELECT @p2; SELECT @p_3;");

Assert.Equal(@"$text = @@some
multiline with double at: @@@@
text@@;
SELECT $text;
-- Comment with params @param, @p2, @p_3
SELECT $param; SELECT $p2; SELECT $p_3;", sql);
Assert.Equal(new[] { "$param", "$p2", "$p_3" }, paramNames);
}
}

0 comments on commit 9b69b30

Please sign in to comment.