Skip to content

Commit

Permalink
Corrections hinted by rubocop.
Browse files Browse the repository at this point in the history
  • Loading branch information
informatimago committed Aug 2, 2023
1 parent 9065444 commit 6666f89
Show file tree
Hide file tree
Showing 3 changed files with 315 additions and 307 deletions.
37 changes: 22 additions & 15 deletions lib/CLexer.rb
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
#
# This is a simple lexer for the C programming language.
# This is a simple lexer for the C programming language.
# MIT license. (c) 2023 Pascal Bourguignon
#

class CLexer
#
# CLexer is a simple C lexer. It is used to tokenize a C source file.
#
# Usage:
# lexer = CLexer.new(pre_processed_c_source)
# tokens = lexer.tokenize
#
# The tokenize method returns an array of tokens.

KEYWORDS = %w[auto break case char const continue default do double else enum
extern float for goto if int long register return short signed
Expand Down Expand Up @@ -67,22 +75,21 @@ class CLexer
'{' => :open_brace,
'|' => :logical_or_op,
'}' => :close_brace,
'~' => :bitwise_not_op,
'~' => :bitwise_not_op

}.freeze


OPERATOR_REGEX = Regexp.new('\A(' + OPERATOR_SYMBOLS.keys.map { |op| Regexp.escape(op) }.join('|') + ')')
OPERATOR_SYMS = OPERATOR_SYMBOLS.values.freeze
KEYWORDS_SYMS = KEYWORDS.map{ |n| n.to_sym }.freeze
KEYWORDS_SYMS = KEYWORDS.map(&:to_sym).freeze

def initialize(input)
@input = input
@tokens = []
end

def tokenize
while @input.size > 0
while @input.size.positive?
case @input
when /\A[[:space:]]+/m
@input = $'
Expand All @@ -91,35 +98,35 @@ def tokenize
when /\A\/\*/
consume_multiline_comment
when /\A[_a-zA-Z][_a-zA-Z0-9]*/
identifier_or_keyword = $& ;
identifier_or_keyword = $&
@input = $'
if KEYWORDS.include?(identifier_or_keyword)
@tokens << identifier_or_keyword.to_sym
else
@tokens << [:identifier, identifier_or_keyword]
end
when /\A\d+\.\d*([eE][+-]?\d+)?[fFlL]?|\.\d+([eE][+-]?\d+)?[fFlL]?|\d+[eE][+-]?\d+[fFlL]?/
float_constant = $& ;
float_constant = $&
@input = $'
@tokens << [:float_literal, float_constant]
when /\A\d+/
integer_constant = $& ;
integer_constant = $&
@input = $'
@tokens << [:integer_literal, integer_constant]
when /\A0[xX][0-9a-fA-F]+/
hex_constant = $& ;
hex_constant = $&
@input = $'
@tokens << [:hex_literal, hex_constant]
when /\A'((\\.|[^\\'])*)'/
char_literal = $& ;
char_literal = $&
@input = $'
@tokens << [:char_literal, char_literal]
when /\A"((\\.|[^\\"])*)"/
string_literal = $& ;
string_literal = $&
@input = $'
@tokens << [:string_literal, string_literal]
when OPERATOR_REGEX
operator = $& ;
operator = $&
@input = $'
@tokens << OPERATOR_SYMBOLS[operator]
else
Expand All @@ -133,7 +140,7 @@ def tokenize
private

def consume_multiline_comment
while @input.size > 0
while @input.size.positive?
case @input
when /\A\*\//
@input = $'
Expand All @@ -145,8 +152,8 @@ def consume_multiline_comment
end
end

def example
input = File.read("/home/pbourguignon/src/c-tidbits/pipes/tee.out.c")
def example
input = File.read('tee.c')
lexer = CLexer.new(input)
tokens = lexer.tokenize
puts tokens.inspect
Expand Down
Loading

0 comments on commit 6666f89

Please sign in to comment.