Skip to content

Commit

Permalink
Simplyfying delimiters
Browse files Browse the repository at this point in the history
  • Loading branch information
Leonard Wolters committed May 29, 2024
1 parent 61f61be commit 81dc923
Showing 1 changed file with 20 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,27 @@ trait SplitMergeFunctionTokenizer {

def tokenizeSplitMergeFunction(col: SplitMergeFunction[_])(implicit ctx: TokenizeContext): String = col match {
case SplitByChar(sep: StringColMagnet[_], col: StringColMagnet[_]) =>
// Some small optimizations
val separator = sep.column match {
case c: Const[_] => c.const.asInstanceOf[String]
}

if (separator.length == 1) {
val s = separator.charAt(0).toInt
sep.column match {
case c: Const[String] =>
val separator = c.const
if (separator.length == 1) {
val s = separator.charAt(0).toInt

// https://en.wikipedia.org/wiki/List_of_Unicode_characters
// 34 == DoubleQuote ("), 39 == Single Quote ('),
// 47 == Forward Slash (/), 92 == Backward Slash (\\)
// 96 == Grave Accent (` under tilde)
if (s == 34 || s == 39 || s == 92 || s == 96) {
s"splitByChar(char($s), ${tokenizeColumn(col.column)})"
} else if (s >= 32 && s <= 126) {
s"splitByChar(${tokenizeColumn(sep.column)}, ${tokenizeColumn(col.column)})"
} else {
s"splitByChar(char($s), ${tokenizeColumn(col.column)})"
}
} else {
s"splitByString(${tokenizeColumn(sep.column)}, ${tokenizeColumn(col.column)})"
// https://en.wikipedia.org/wiki/List_of_Unicode_characters
// 34 == DoubleQuote ("), 39 == Single Quote ('),
// 47 == Forward Slash (/), 92 == Backward Slash (\\)
// 96 == Grave Accent (` under tilde)
if (s == 34 || s == 39 || s == 92 || s == 96) {
s"splitByChar(char($s), ${tokenizeColumn(col.column)})"
} else if (s >= 32 && s <= 126) {
s"splitByChar(${tokenizeColumn(sep.column)}, ${tokenizeColumn(col.column)})"
} else {
s"splitByChar(char($s), ${tokenizeColumn(col.column)})"
}
} else {
s"splitByString(${tokenizeColumn(sep.column)}, ${tokenizeColumn(col.column)})"
}
case _ => s"splitByString(${tokenizeColumn(sep.column)}, ${tokenizeColumn(col.column)})"
}
case SplitByString(sep: StringColMagnet[_], col: StringColMagnet[_]) =>
s"splitByString(${tokenizeColumn(sep.column)}, ${tokenizeColumn(col.column)})"
Expand Down

0 comments on commit 81dc923

Please sign in to comment.