A tokenizer for the Ruby language. It recognizes all common syntax (and some less common syntax) but because it is not a true lexer, it will make mistakes on some ambiguous cases.
Methods
Constants
| KEYWORDS | = | %w{if then elsif else end begin do rescue ensure while for class module def yield raise until unless and or not when case super undef break next redo retry in return alias defined?} |
| The list of all identifiers recognized as keywords. | ||
Public Instance methods
Perform ruby-specific setup
[ show source ]
# File lib/syntax/lang/ruby.rb, line 18
18: def setup
19: @selector = false
20: @allow_operator = false
21: @heredocs = []
22: end
Step through a single iteration of the tokenization process.
[ show source ]
# File lib/syntax/lang/ruby.rb, line 25
25: def step
26: case
27: when bol? && check( /=begin/ )
28: start_group( :comment, scan_until( /^=end#{EOL}/ ) )
29: when bol? && check( /__END__#{EOL}/ )
30: start_group( :comment, scan_until( /\Z/ ) )
31: else
32: case
33: when check( /def\s+/ )
34: start_group :keyword, scan( /def\s+/ )
35: start_group :method, scan_until( /(?=[;(\s]|#{EOL})/ )
36: when check( /class\s+/ )
37: start_group :keyword, scan( /class\s+/ )
38: start_group :class, scan_until( /(?=[;\s<]|#{EOL})/ )
39: when check( /module\s+/ )
40: start_group :keyword, scan( /module\s+/ )
41: start_group :module, scan_until( /(?=[;\s]|#{EOL})/ )
42: when check( /::/ )
43: start_group :punct, scan(/::/)
44: when check( /:"/ )
45: start_group :symbol, scan(/:/)
46: scan_delimited_region :symbol, :symbol, "", true
47: @allow_operator = true
48: when check( /:'/ )
49: start_group :symbol, scan(/:/)
50: scan_delimited_region :symbol, :symbol, "", false
51: @allow_operator = true
52: when scan( /:[_a-zA-Z@$][$@\w]*[=!?]?/ )
53: start_group :symbol, matched
54: @allow_operator = true
55: when scan( /\?(\\[^\n\r]|[^\\\n\r\s])/ )
56: start_group :char, matched
57: @allow_operator = true
58: when check( /(__FILE__|__LINE__|true|false|nil|self)[?!]?/ )
59: if @selector || matched[-1] == ?? || matched[-1] == ?!
60: start_group :ident,
61: scan(/(__FILE__|__LINE__|true|false|nil|self)[?!]?/)
62: else
63: start_group :constant,
64: scan(/(__FILE__|__LINE__|true|false|nil|self)/)
65: end
66: @selector = false
67: @allow_operator = true
68: when scan(/0([bB][01]+|[oO][0-7]+|[dD][0-9]+|[xX][0-9a-fA-F]+)/)
69: start_group :number, matched
70: @allow_operator = true
71: else
72: case peek(2)
73: when "%r"
74: scan_delimited_region :punct, :regex, scan( /../ ), true
75: @allow_operator = true
76: when "%w", "%q"
77: scan_delimited_region :punct, :string, scan( /../ ), false
78: @allow_operator = true
79: when "%s"
80: scan_delimited_region :punct, :symbol, scan( /../ ), false
81: @allow_operator = true
82: when "%W", "%Q", "%x"
83: scan_delimited_region :punct, :string, scan( /../ ), true
84: @allow_operator = true
85: when /%[^\sa-zA-Z0-9]/
86: scan_delimited_region :punct, :string, scan( /./ ), true
87: @allow_operator = true
88: when "<<"
89: saw_word = ( chunk[-1,1] =~ /[\w!?]/ )
90: start_group :punct, scan( /<</ )
91: if saw_word
92: @allow_operator = false
93: return
94: end
95:
96: float_right = scan( /-/ )
97: append "-" if float_right
98: if ( type = scan( /['"]/ ) )
99: append type
100: delim = scan_until( /(?=#{type})/ )
101: if delim.nil?
102: append scan_until( /\Z/ )
103: return
104: end
105: else
106: delim = scan( /\w+/ ) or return
107: end
108: start_group :constant, delim
109: start_group :punct, scan( /#{type}/ ) if type
110: @heredocs << [ float_right, type, delim ]
111: @allow_operator = true
112: else
113: case peek(1)
114: when /[\n\r]/
115: unless @heredocs.empty?
116: scan_heredoc(*@heredocs.shift)
117: else
118: start_group :normal, scan( /\s+/ )
119: end
120: @allow_operator = false
121: when /\s/
122: start_group :normal, scan( /\s+/ )
123: when "#"
124: start_group :comment, scan( /#[^\n\r]*/ )
125: when /[A-Z]/
126: start_group @selector ? :ident : :constant, scan( /\w+/ )
127: @allow_operator = true
128: when /[a-z_]/
129: word = scan( /\w+[?!]?/ )
130: if !@selector && KEYWORDS.include?( word )
131: start_group :keyword, word
132: @allow_operator = false
133: elsif
134: start_group :ident, word
135: @allow_operator = true
136: end
137: @selector = false
138: when /\d/
139: start_group :number,
140: scan( /[\d_]+(\.[\d_]+)?([eE][\d_]+)?/ )
141: @allow_operator = true
142: when '"'
143: scan_delimited_region :punct, :string, "", true
144: @allow_operator = true
145: when '/'
146: if @allow_operator
147: start_group :punct, scan(%r{/})
148: @allow_operator = false
149: else
150: scan_delimited_region :punct, :regex, "", true
151: @allow_operator = true
152: end
153: when "'"
154: scan_delimited_region :punct, :string, "", false
155: @allow_operator = true
156: when "."
157: dots = scan( /\.{1,3}/ )
158: start_group :punct, dots
159: @selector = ( dots.length == 1 )
160: when /[@]/
161: start_group :attribute, scan( /@{1,2}\w*/ )
162: @allow_operator = true
163: when /[$]/
164: start_group :global, scan(/\$/)
165: start_group :global, scan( /\w+|./ ) if check(/./)
166: @allow_operator = true
167: when /[-!?*\/+=<>(\[\{}:;,&|%]/
168: start_group :punct, scan(/./)
169: @allow_operator = false
170: when /[)\]]/
171: start_group :punct, scan(/./)
172: @allow_operator = true
173: else
174: # all else just falls through this, to prevent
175: # infinite loops...
176: append getch
177: end
178: end
179: end
180: end
181: end