class Regexp::Scanner
Constants
- POSIX_CLASSES
-
Use each_with_object for required_ruby_version >= 2.2,or to_h for >= 2.6
Attributes
Public Class Methods
Source
# File lib/regexp_parser/scanner.rb, line 2468 def self.long_prop_map @long_prop_map ||= parse_prop_map('long') end
Source
# File lib/regexp_parser/scanner.rb, line 2472 def self.parse_prop_map(name) File.read("#{__dir__}/scanner/properties/#{name}.csv").scan(/(.+),(.+)/).to_h end
Source
# File lib/regexp_parser/scanner.rb, line 19 def self.scan(input_object, options: nil, collect_tokens: true, &block) new.scan(input_object, options: options, collect_tokens: collect_tokens, &block) end
Scans the given regular expression text, or Regexp object and collects the emitted token into an array that gets returned at the end. If a block is given, it gets called for each emitted token.
This method may raise errors if a syntax error is encountered.
Source
# File lib/regexp_parser/scanner.rb, line 2464 def self.short_prop_map @short_prop_map ||= parse_prop_map('short') end
lazy-load property maps when first needed
Public Instance Methods
Source
# File lib/regexp_parser/scanner.rb, line 2483 def emit(type, token, text) #puts "EMIT: type: #{type}, token: #{token}, text: #{text}, ts: #{ts}, te: #{te}" emit_literal if literal_run # Ragel runs with byte-based indices (ts, te). These are of little value to # end-users, so we keep track of char-based indices and emit those instead. ts_char_pos = char_pos te_char_pos = char_pos + text.length tok = [type, token, text, ts_char_pos, te_char_pos] self.prev_token = tok self.char_pos = te_char_pos if block block.call type, token, text, ts_char_pos, te_char_pos # TODO: in v3.0.0,remove `collect_tokens:` kwarg and only collect if no block given tokens << tok if collect_tokens elsif collect_tokens tokens << tok end end
Emits an array with the details of the scanned pattern
Source
# File lib/regexp_parser/scanner.rb, line 23 def scan(input_object, options: nil, collect_tokens: true, &block) self.collect_tokens = collect_tokens self.literal_run = nil stack = [] input = input_object.is_a?(Regexp) ? input_object.source : input_object self.free_spacing = free_spacing?(input_object, options) self.regexp_encoding = extract_encoding(input_object, options) self.spacing_stack = [{:free_spacing => free_spacing, :depth => 0}] data = input.unpack("c*") eof = data.length self.tokens = [] self.block = block self.set_depth = 0 self.group_depth = 0 self.capturing_group_count = 0 self.conditional_stack = [] self.char_pos = 0 class << self attr_accessor :_re_scanner_trans_keys private :_re_scanner_trans_keys, :_re_scanner_trans_keys= end self._re_scanner_trans_keys = [ 0,0,-128,-65,-128,-65, -128,-65,41,41,39, 57,39,39,33,62, 62,62,39,60,39,57, 39,39,48,57,39, 57,48,57,39,57, 33,62,62,62,48,57, 43,62,48,57,48, 62,39,60,39,57, 39,39,48,57,39,57, 48,57,39,57,33, 62,62,62,48,57, 43,62,48,57,48,62, 48,57,48,125,44, 125,123,123,9,122, 9,125,9,122,-128,-65, -128,-65,38,38,58, 93,58,93,-128,-65, -128,-65,-128,-65,-128,-65, 45,45,92,92,92, 92,45,45,92,92, 92,92,48,123,48,102, 48,102,48,102,48, 102,9,125,9,125, 9,125,9,125,9,125, 9,125,48,102,120, 120,56,102,48,102, 39,39,41,41,41,57, 62,62,-128,127,-62, -12,1,127,1,127, 9,32,33,126,10,10, 63,63,33,126,33, 126,43,63,43,63, 43,63,65,122,44,57, 68,119,80,112,-62, 125,-128,-65,-128,-65, -128,-65,38,38,38,93, 58,58,-62,120,-128, -65,-128,-65,-128,-65, 48,55,48,55,-62,125, -128,-65,-128,-65,-128, -65,48,55,48,55, 48,57,48,57,48,57, 48,57,77,77,45, 45,0,0,67,99, 45,45,0,0,92,92, 48,102,48,102,92, 92,39,60,39,57, 48,57,41,57,33,62, 0 ] class << self attr_accessor :_re_scanner_key_spans private :_re_scanner_key_spans, :_re_scanner_key_spans= end self._re_scanner_key_spans = [ 0,64,64,64,1,19,1,30, 1,22,19,1,10,19,10,19, 30,1,10,20,10,15,22,19, 1,10,19,10,19,30,1,10, 20,10,15,10,78,82,1,114, 117,114,64,64,1,36,36,64, 64,64,64,1,1,1,1,1, 1,76,55,55,55,55,117,117, 117,117,117,117,55,1,47,55, 1,1,17,1,256,51,127,127, 24,94,1,1,94,94,21,21, 21,58,14,52,33,188,64,64, 64,1,56,1,183,64,64,64, 8,8,188,64,64,64,8,8, 10,10,10,10,1,1,0,33, 1,0,1,55,55,1,22,19, 10,17,30 ] class << self attr_accessor :_re_scanner_index_offsets private :_re_scanner_index_offsets, :_re_scanner_index_offsets= end self._re_scanner_index_offsets = [ 0,0,65,130,195,197,217,219, 250,252,275,295,297,308,328,339, 359,390,392,403,424,435,451,474, 494,496,507,527,538,558,589,591, 602,623,634,650,661,740,823,825, 940,1058,1173,1238,1303,1305,1342,1379, 1444,1509,1574,1639,1641,1643,1645,1647, 1649,1651,1728,1784,1840,1896,1952,2070, 2188,2306,2424,2542,2660,2716,2718,2766, 2822,2824,2826,2844,2846,3103,3155,3283, 3411,3436,3531,3533,3535,3630,3725,3747, 3769,3791,3850,3865,3918,3952,4141,4206, 4271,4336,4338,4395,4397,4581,4646,4711, 4776,4785,4794,4983,5048,5113,5178,5187, 5196,5207,5218,5229,5240,5242,5244,5245, 5279,5281,5282,5284,5340,5396,5398,5421, 5441,5452,5470 ] class << self attr_accessor :_re_scanner_indicies private :_re_scanner_indicies, :_re_scanner_indicies= end self._re_scanner_indicies = [ 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 0,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,0,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 3,3,0,6,5,8,7,7, 7,7,7,4,7,7,4,4, 4,4,4,4,4,4,4,4, 7,8,7,10,9,9,9,9, 9,9,9,9,9,9,9,4, 9,9,4,4,4,4,4,4, 4,4,4,4,9,9,9,10, 8,9,8,9,12,11,11,11, 11,11,11,11,11,11,11,11, 11,11,11,11,11,11,11,11, 11,13,11,15,14,14,14,14, 14,16,14,14,17,17,17,17, 17,17,17,17,17,17,14,15, 14,17,17,17,17,17,17,17, 17,17,17,11,15,11,11,11, 18,11,18,11,11,17,17,17, 17,17,17,17,17,17,17,11, 19,19,19,19,19,19,19,19, 19,19,11,15,11,11,11,11, 11,11,11,11,19,19,19,19, 19,19,19,19,19,19,11,11, 20,20,20,20,20,20,20,20, 20,20,20,21,20,20,22,22, 22,22,22,22,22,22,22,22, 20,20,20,11,15,20,15,20, 22,22,22,22,22,22,22,22, 22,22,11,23,11,23,11,11, 22,22,22,22,22,22,22,22, 22,22,11,11,11,11,15,11, 24,24,24,24,24,24,24,24, 24,24,11,24,24,24,24,24, 24,24,24,24,24,11,11,11, 11,15,11,25,11,11,11,11, 11,11,11,11,11,11,11,11, 11,11,11,11,11,11,11,11, 26,11,28,27,27,27,27,27, 29,27,27,30,30,30,30,30, 30,30,30,30,30,27,28,27, 30,30,30,30,30,30,30,30, 30,30,11,28,11,11,11,31, 11,31,11,11,30,30,30,30, 30,30,30,30,30,30,11,32, 32,32,32,32,32,32,32,32, 32,11,28,11,11,11,11,11, 11,11,11,32,32,32,32,32, 32,32,32,32,32,11,11,33, 33,33,33,33,33,33,33,33, 33,33,34,33,33,35,35,35, 35,35,35,35,35,35,35,33, 33,33,11,28,33,28,33,35, 35,35,35,35,35,35,35,35, 35,11,36,11,36,11,11,35, 35,35,35,35,35,35,35,35, 35,11,11,11,11,28,11,37, 37,37,37,37,37,37,37,37, 37,11,37,37,37,37,37,37, 37,37,37,37,11,11,11,11, 28,11,39,39,39,39,39,39, 39,39,39,39,38,39,39,39, 39,39,39,39,39,39,39,38, 38,38,38,38,38,38,38,38, 38,38,38,38,38,38,38,38, 38,38,38,38,38,38,38,38, 38,38,38,38,38,38,38,38, 38,38,38,38,38,38,38,38, 38,38,38,38,38,38,38,38, 38,38,38,38,38,38,38,38, 38,38,38,38,38,38,38,38, 38,38,40,38,39,38,38,38, 41,41,41,41,41,41,41,41, 41,41,38,38,38,38,38,38, 38,38,38,38,38,38,38,38, 38,38,38,38,38,38,38,38, 38,38,38,38,38,38,38,38, 38,38,38,38,38,38,38,38, 38,38,38,38,38,38,38,38, 38,38,38,38,38,38,38,38, 38,38,38,38,38,38,38,38, 38,38,38,38,38,40,38,42, 43,44,44,44,44,44,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 44,43,43,43,43,43,43,43, 43,43,43,43,43,44,44,43, 44,44,44,44,44,44,44,44, 44,44,43,43,43,44,43,43, 43,44,44,44,44,44,44,44, 44,44,44,44,44,44,44,44, 44,44,44,44,44,44,44,44, 44,44,44,43,43,43,45,44, 43,44,44,44,44,44,44,44, 44,44,44,44,44,44,44,44, 44,44,44,44,44,44,44,44, 44,44,44,43,44,44,44,44, 44,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,44,43,43,43,43, 43,43,43,43,43,43,43,43, 44,44,43,44,44,44,44,44, 44,44,44,44,44,43,43,43, 44,43,43,43,44,44,44,44, 44,44,44,44,44,44,44,44, 44,44,44,44,44,44,44,44, 44,44,44,44,44,44,43,43, 43,43,44,43,44,44,44,44, 44,44,44,44,44,44,44,44, 44,44,44,44,44,44,44,44, 44,44,44,44,44,44,43,43, 46,43,44,44,44,44,44,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,44,43,43,43,43,43,43, 43,43,43,43,43,43,44,44, 43,44,44,44,44,44,44,44, 44,44,44,43,43,43,44,43, 43,43,44,44,44,44,44,44, 44,44,44,44,44,44,44,44, 44,44,44,44,44,44,44,44, 44,44,44,44,43,43,43,43, 44,43,44,44,44,44,44,44, 44,44,44,44,44,44,44,44, 44,44,44,44,44,44,44,44, 44,44,44,44,43,48,48,48, 48,48,48,48,48,48,48,48, 48,48,48,48,48,48,48,48, 48,48,48,48,48,48,48,48, 48,48,48,48,48,48,48,48, 48,48,48,48,48,48,48,48, 48,48,48,48,48,48,48,48, 48,48,48,48,48,48,48,48, 48,48,48,48,48,47,49,49, 49,49,49,49,49,49,49,49, 49,49,49,49,49,49,49,49, 49,49,49,49,49,49,49,49, 49,49,49,49,49,49,49,49, 49,49,49,49,49,49,49,49, 49,49,49,49,49,49,49,49, 49,49,49,49,49,49,49,49, 49,49,49,49,49,49,47,51, 50,54,53,53,53,53,53,53, 53,53,53,53,53,53,53,53, 53,53,53,53,53,53,53,53, 53,53,53,53,53,53,53,53, 53,53,55,53,55,53,54,53, 53,53,53,53,53,53,53,53, 53,53,53,53,53,53,53,53, 53,53,53,53,53,53,53,53, 53,53,53,53,53,53,53,55, 53,56,53,58,58,58,58,58, 58,58,58,58,58,58,58,58, 58,58,58,58,58,58,58,58, 58,58,58,58,58,58,58,58, 58,58,58,58,58,58,58,58, 58,58,58,58,58,58,58,58, 58,58,58,58,58,58,58,58, 58,58,58,58,58,58,58,58, 58,58,58,57,59,59,59,59, 59,59,59,59,59,59,59,59, 59,59,59,59,59,59,59,59, 59,59,59,59,59,59,59,59, 59,59,59,59,59,59,59,59, 59,59,59,59,59,59,59,59, 59,59,59,59,59,59,59,59, 59,59,59,59,59,59,59,59, 59,59,59,59,57,61,61,61, 61,61,61,61,61,61,61,61, 61,61,61,61,61,61,61,61, 61,61,61,61,61,61,61,61, 61,61,61,61,61,61,61,61, 61,61,61,61,61,61,61,61, 61,61,61,61,61,61,61,61, 61,61,61,61,61,61,61,61, 61,61,61,61,61,60,62,62, 62,62,62,62,62,62,62,62, 62,62,62,62,62,62,62,62, 62,62,62,62,62,62,62,62, 62,62,62,62,62,62,62,62, 62,62,62,62,62,62,62,62, 62,62,62,62,62,62,62,62, 62,62,62,62,62,62,62,62, 62,62,62,62,62,62,60,63, 43,65,64,67,64,68,43,70, 69,72,69,73,73,73,73,73, 73,73,73,73,73,43,43,43, 43,43,43,43,73,73,73,73, 73,73,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,73,73,73,73, 73,73,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,74,43, 75,75,75,75,75,75,75,75, 75,75,43,43,43,43,43,43, 43,75,75,75,75,75,75,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,75,75,75,75,75,75,43, 76,76,76,76,76,76,76,76, 76,76,43,43,43,43,43,43, 43,76,76,76,76,76,76,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,76,76,76,76,76,76,43, 77,77,77,77,77,77,77,77, 77,77,43,43,43,43,43,43, 43,77,77,77,77,77,77,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,77,77,77,77,77,77,43, 78,78,78,78,78,78,78,78, 78,78,43,43,43,43,43,43, 43,78,78,78,78,78,78,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,78,78,78,78,78,78,43, 74,74,74,74,74,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,74, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,79, 79,79,79,79,79,79,79,79, 79,43,43,43,43,43,43,43, 79,79,79,79,79,79,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 79,79,79,79,79,79,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,77,43,74,74, 74,74,74,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,74,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,80,80,80, 80,80,80,80,80,80,80,43, 43,43,43,43,43,43,80,80, 80,80,80,80,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,80,80, 80,80,80,80,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,77,43,74,74,74,74, 74,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,74,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,81,81,81,81,81, 81,81,81,81,81,43,43,43, 43,43,43,43,81,81,81,81, 81,81,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,81,81,81,81, 81,81,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 77,43,74,74,74,74,74,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,74,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,82,82,82,82,82,82,82, 82,82,82,43,43,43,43,43, 43,43,82,82,82,82,82,82, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,82,82,82,82,82,82, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,77,43, 74,74,74,74,74,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,74, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,83, 83,83,83,83,83,83,83,83, 83,43,43,43,43,43,43,43, 83,83,83,83,83,83,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 83,83,83,83,83,83,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,77,43,74,74, 74,74,74,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,74,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,77,43,85,85,85,85, 85,85,85,85,86,86,84,84, 84,84,84,84,84,86,86,86, 86,86,86,84,84,84,84,84, 84,84,84,84,84,84,84,84, 84,84,84,84,84,84,84,84, 84,84,84,84,84,86,86,86, 86,86,86,84,88,87,89,89, 87,87,87,87,87,87,87,89, 89,89,89,89,89,87,87,87, 87,87,87,87,87,87,87,87, 87,87,87,87,87,87,87,87, 87,87,87,87,87,87,87,89, 89,89,89,89,89,87,90,90, 90,90,90,90,90,90,90,90, 87,87,87,87,87,87,87,90, 90,90,90,90,90,87,87,87, 87,87,87,87,87,87,87,87, 87,87,87,87,87,87,87,87, 87,87,87,87,87,87,87,90, 90,90,90,90,90,87,93,92, 94,91,94,91,91,91,91,91, 91,95,95,95,95,95,95,95, 95,95,95,91,93,96,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,3,3, 3,3,3,3,3,3,3,3, 3,3,3,3,3,3,97,97, 97,97,97,43,43,43,43,43, 43,43,43,43,43,43,43,98, 98,98,98,98,98,98,98,99, 99,99,99,99,98,98,98,98, 98,98,98,98,98,98,98,98, 98,98,98,98,98,98,100,101, 101,102,103,101,101,101,104,105, 106,107,101,101,108,101,101,101, 101,101,101,101,101,101,101,101, 101,101,101,101,101,109,101,101, 101,101,101,101,101,101,101,101, 101,101,101,101,101,101,101,101, 101,101,101,101,101,101,101,101, 101,110,111,112,113,101,101,101, 101,101,101,101,101,101,101,101, 101,101,101,101,101,101,101,101, 101,101,101,101,101,101,101,101, 101,114,115,112,101,98,101,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,3,3,3, 3,3,3,3,3,3,3,3, 3,3,3,3,3,97,97,97, 97,97,116,98,98,98,98,98, 98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98, 98,98,116,116,116,116,116,116, 116,116,116,116,116,116,116,116, 116,116,116,116,116,116,116,116, 116,116,116,116,116,116,116,116, 116,116,116,116,116,116,116,116, 116,116,116,116,116,116,116,116, 116,116,116,116,116,116,116,116, 116,116,116,116,116,116,116,116, 116,116,116,116,116,116,116,116, 116,116,116,116,116,116,116,116, 116,116,116,116,116,116,116,116, 116,116,116,116,116,116,116,116, 116,98,116,98,98,98,98,98, 98,98,98,99,99,99,99,99, 98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98, 98,98,100,117,117,117,117,117, 117,117,117,117,117,117,117,117, 117,117,117,117,117,117,117,117, 117,117,117,117,117,117,117,117, 117,117,117,117,117,117,117,117, 117,117,117,117,117,117,117,117, 117,117,117,117,117,117,117,117, 117,117,117,117,117,117,117,117, 117,117,117,117,117,117,117,117, 117,117,117,117,117,117,117,117, 117,117,117,117,117,117,117,117, 117,117,117,117,117,117,117,117, 117,98,117,100,100,100,100,100, 117,117,117,117,117,117,117,117, 117,117,117,117,117,117,117,117, 117,117,100,117,101,101,116,116, 101,101,101,116,116,116,116,101, 101,116,101,101,101,101,101,101, 101,101,101,101,101,101,101,101, 101,101,116,101,101,101,101,101, 101,101,101,101,101,101,101,101, 101,101,101,101,101,101,101,101, 101,101,101,101,101,101,116,116, 116,116,101,101,101,101,101,101, 101,101,101,101,101,101,101,101, 101,101,101,101,101,101,101,101, 101,101,101,101,101,101,116,116, 116,101,116,119,102,121,120,10, 123,5,123,123,123,124,125,122, 123,123,123,123,123,123,123,123, 123,123,123,123,123,123,123,123, 8,123,126,10,8,123,123,123, 123,123,123,123,123,123,123,123, 123,123,123,123,123,123,123,123, 123,123,123,123,123,123,123,123, 123,123,123,123,123,123,123,123, 123,123,123,123,123,123,123,123, 123,123,123,123,123,123,123,123, 123,123,123,123,123,123,123,123, 123,123,123,123,8,123,122,123, 122,123,123,123,122,122,122,123, 123,123,123,123,123,123,123,123, 123,123,123,123,123,123,123,127, 123,122,122,122,123,123,123,123, 123,123,123,123,123,123,123,123, 123,123,123,123,123,123,123,123, 123,123,123,123,123,123,123,123, 123,123,123,123,123,123,123,123, 123,123,123,123,123,123,123,123, 123,123,123,123,123,123,123,123, 123,123,123,123,123,123,123,123, 123,123,123,122,123,129,128,128, 128,128,128,128,128,128,128,128, 128,128,128,128,128,128,128,128, 128,129,128,131,130,130,130,130, 130,130,130,130,130,130,130,130, 130,130,130,130,130,130,130,131, 130,133,132,132,132,132,132,132, 132,132,132,132,132,132,132,132, 132,132,132,132,132,133,132,135, 135,134,134,134,134,135,134,134, 134,136,134,134,134,134,134,134, 134,134,134,134,134,134,134,134, 135,134,134,134,134,134,134,134, 135,134,134,134,134,137,134,134, 134,138,134,134,134,134,134,134, 134,134,134,134,134,134,134,134, 135,134,140,139,139,139,41,41, 41,41,41,41,41,41,41,41, 139,141,43,43,43,141,43,43, 43,43,43,43,43,43,43,141, 141,43,43,43,141,141,43,43, 43,43,43,43,43,43,43,43, 43,141,43,43,43,141,43,43, 43,43,43,43,43,43,43,43, 141,43,43,43,141,43,142,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,43,43, 43,43,43,43,43,43,142,43, 143,143,143,143,143,143,143,143, 143,143,143,143,143,143,143,143, 143,143,143,143,143,143,143,143, 143,143,143,143,143,143,144,144, 144,144,144,144,144,144,144,144, 144,144,144,144,144,144,145,145, 145,145,145,48,48,48,48,48, 48,48,48,48,48,48,48,48, 48,48,48,48,48,48,48,48, 48,48,48,48,48,48,48,48, 48,48,48,48,48,48,48,48, 48,48,48,48,48,48,48,48, 48,48,146,48,147,48,146,146, 146,146,48,148,146,48,48,48, 48,48,48,48,48,48,48,48, 48,48,48,48,48,146,48,48, 48,48,48,48,48,48,48,48, 48,48,48,48,48,48,48,48, 48,48,48,48,48,48,48,48, 48,149,150,151,152,48,48,48, 48,48,48,48,48,48,48,48, 48,48,48,48,48,48,48,48, 48,48,48,48,48,48,48,48, 48,146,146,146,48,48,48,48, 48,48,48,48,48,48,48,48, 48,48,48,48,48,48,48,48, 48,48,48,48,48,48,48,48, 48,48,48,48,48,48,48,48, 48,48,48,48,48,48,48,48, 48,48,48,48,48,48,48,48, 48,48,48,48,48,48,48,48, 48,48,48,48,48,153,49,49, 49,49,49,49,49,49,49,49, 49,49,49,49,49,49,49,49, 49,49,49,49,49,49,49,49, 49,49,49,49,49,49,49,49, 49,49,49,49,49,49,49,49, 49,49,49,49,49,49,49,49, 49,49,49,49,49,49,49,49, 49,49,49,49,49,49,153,154, 154,154,154,154,154,154,154,154, 154,154,154,154,154,154,154,154, 154,154,154,154,154,154,154,154, 154,154,154,154,154,154,154,154, 154,154,154,154,154,154,154,154, 154,154,154,154,154,154,154,154, 154,154,154,154,154,154,154,154, 154,154,154,154,154,154,154,153, 155,153,157,156,156,156,156,156, 156,156,156,156,156,156,156,156, 156,156,156,156,156,156,156,156, 156,156,156,156,156,156,156,156, 156,156,156,156,156,156,156,156, 156,156,156,156,156,156,156,156, 156,156,156,156,156,156,156,156, 156,158,156,53,160,161,161,161, 161,161,161,161,161,161,161,161, 161,161,161,161,161,161,161,161, 161,161,161,161,161,161,161,161, 161,161,161,162,162,162,162,162, 162,162,162,162,162,162,162,162, 162,162,162,163,163,163,163,163, 58,58,58,58,58,58,58,58, 58,58,58,58,58,58,58,58, 58,58,58,58,58,58,58,58, 58,58,58,58,58,58,58,58, 58,58,58,58,58,58,58,58, 58,58,58,58,58,58,58,58, 58,58,58,58,58,58,58,58, 58,58,58,164,164,164,164,164, 164,164,164,58,58,58,58,58, 58,58,58,58,58,58,165,165, 58,58,58,165,58,58,58,58, 165,58,58,165,58,58,165,58, 58,58,165,58,58,58,165,165, 165,58,58,58,165,165,165,165, 165,165,58,165,58,58,58,58, 58,165,58,165,58,165,165,165, 165,165,165,165,58,58,58,58, 58,58,58,58,58,58,58,58, 58,58,58,58,58,58,58,58, 58,58,58,58,58,58,58,58, 58,58,58,58,58,58,58,58, 58,58,58,58,58,58,58,58, 58,58,58,58,58,58,58,58, 58,58,58,58,58,58,58,58, 58,58,58,58,58,166,59,59, 59,59,59,59,59,59,59,59, 59,59,59,59,59,59,59,59, 59,59,59,59,59,59,59,59, 59,59,59,59,59,59,59,59, 59,59,59,59,59,59,59,59, 59,59,59,59,59,59,59,59, 59,59,59,59,59,59,59,59, 59,59,59,59,59,59,166,167, 167,167,167,167,167,167,167,167, 167,167,167,167,167,167,167,167, 167,167,167,167,167,167,167,167, 167,167,167,167,167,167,167,167, 167,167,167,167,167,167,167,167, 167,167,167,167,167,167,167,167, 167,167,167,167,167,167,167,167, 167,167,167,167,167,167,167,166, 169,169,169,169,169,169,169,169, 168,170,170,170,170,170,170,170, 170,168,171,171,171,171,171,171, 171,171,171,171,171,171,171,171, 171,171,171,171,171,171,171,171, 171,171,171,171,171,171,171,171, 172,172,172,172,172,172,172,172, 172,172,172,172,172,172,172,172, 173,173,173,173,173,61,61,61, 61,61,61,61,61,61,61,61, 61,61,61,61,61,61,61,61, 61,61,61,61,61,61,61,61, 61,61,61,61,61,61,61,61, 61,61,61,61,61,61,61,61, 61,61,61,61,174,61,61,61, 174,174,174,174,61,61,174,61, 175,176,176,176,176,176,176,176, 177,177,61,61,61,61,61,174, 61,43,43,178,179,61,61,43, 179,61,61,43,61,180,61,61, 181,61,179,179,61,61,61,179, 179,61,43,174,174,174,174,61, 61,182,182,63,179,182,182,61, 179,61,61,61,61,61,182,61, 181,61,182,179,182,183,182,179, 184,61,43,174,174,174,61,61, 61,61,61,61,61,61,61,61, 61,61,61,61,61,61,61,61, 61,61,61,61,61,61,61,61, 61,61,61,61,61,61,61,61, 61,61,61,61,61,61,61,61, 61,61,61,61,61,61,61,61, 61,61,61,61,61,61,61,61, 61,61,61,61,61,61,61,185, 62,62,62,62,62,62,62,62, 62,62,62,62,62,62,62,62, 62,62,62,62,62,62,62,62, 62,62,62,62,62,62,62,62, 62,62,62,62,62,62,62,62, 62,62,62,62,62,62,62,62, 62,62,62,62,62,62,62,62, 62,62,62,62,62,62,62,62, 185,186,186,186,186,186,186,186, 186,186,186,186,186,186,186,186, 186,186,186,186,186,186,186,186, 186,186,186,186,186,186,186,186, 186,186,186,186,186,186,186,186, 186,186,186,186,186,186,186,186, 186,186,186,186,186,186,186,186, 186,186,186,186,186,186,186,186, 186,185,188,188,188,188,188,188, 188,188,187,189,189,189,189,189, 189,189,189,187,191,191,191,191, 191,191,191,191,192,192,190,192, 192,192,192,192,192,192,192,192, 192,190,192,192,192,192,192,192, 192,192,192,192,190,192,192,192, 192,192,192,192,192,192,192,190, 194,64,196,195,64,198,69,69, 69,69,69,69,69,69,69,69, 69,69,69,69,69,69,69,69, 69,69,69,69,69,69,69,69, 69,69,69,69,69,199,69,201, 200,69,72,69,203,203,203,203, 203,203,203,203,203,203,202,202, 202,202,202,202,202,203,203,203, 203,203,203,202,202,202,202,202, 202,202,202,202,202,202,202,202, 202,202,202,202,202,202,202,202, 202,202,202,202,202,203,203,203, 203,203,203,202,90,90,90,90, 90,90,90,90,90,90,202,202, 202,202,202,202,202,90,90,90, 90,90,90,202,202,202,202,202, 202,202,202,202,202,202,202,202, 202,202,202,202,202,202,202,202, 202,202,202,202,202,90,90,90, 90,90,90,202,205,204,207,206, 206,206,206,206,208,206,206,209, 209,209,209,209,209,209,209,209, 209,206,206,210,206,93,92,92, 92,92,92,211,92,92,211,211, 211,211,211,211,211,211,211,211, 92,95,95,95,95,95,95,95, 95,95,95,211,94,211,211,211, 211,211,211,95,95,95,95,95, 95,95,95,95,95,211,211,96, 96,96,96,96,96,96,96,96, 96,96,211,96,96,211,211,211, 211,211,211,211,211,211,211,96, 96,96,211,93,96,0 ] class << self attr_accessor :_re_scanner_trans_targs private :_re_scanner_trans_targs, :_re_scanner_trans_targs= end self._re_scanner_trans_targs = [ 76,77,1,2,76,4,76,6, 76,8,76,76,10,16,11,76, 12,13,14,15,17,18,19,20, 21,23,29,24,76,25,26,27, 28,30,31,32,33,34,76,36, 76,37,39,0,40,41,92,93, 93,42,93,93,93,45,46,93, 93,100,100,47,106,106,49,52, 106,116,106,118,55,106,119,106, 121,58,61,59,60,106,62,63, 64,65,66,67,106,123,124,106, 70,71,125,126,72,73,126,74, 75,3,78,79,80,81,82,76, 83,76,86,87,76,88,76,89, 76,76,90,76,76,76,76,76, 76,84,76,85,5,76,7,76, 76,76,76,76,76,76,76,76, 76,9,22,76,35,91,38,94, 95,96,93,97,98,99,93,93, 93,93,43,93,93,44,93,93, 93,101,102,103,104,100,100,48, 100,105,100,107,108,109,106,110, 112,115,51,106,54,106,106,57, 68,106,50,106,111,106,106,113, 114,106,117,106,53,106,120,122, 106,56,106,106,106,69,126,127, 128,129,130,126 ] class << self attr_accessor :_re_scanner_trans_actions private :_re_scanner_trans_actions, :_re_scanner_trans_actions= end self._re_scanner_trans_actions = [ 1,2,0,0,3,0,4,0, 5,0,6,7,0,0,0,8, 0,0,0,0,0,0,0,0, 0,0,0,0,9,0,0,0, 0,0,0,0,0,0,10,0, 11,0,0,0,0,0,13,14, 15,0,16,17,18,0,0,19, 20,21,22,0,23,24,0,0, 26,0,27,0,0,28,0,29, 0,0,0,0,0,30,0,0, 0,0,0,0,31,0,0,32, 0,0,33,34,0,0,35,0, 0,0,0,0,0,0,0,38, 39,40,0,0,41,0,42,33, 43,44,33,45,46,47,48,49, 50,33,51,0,0,52,0,53, 54,55,56,57,58,59,60,61, 62,0,0,63,0,65,0,0, 33,33,66,0,33,67,68,69, 70,71,0,72,73,0,74,75, 76,0,33,33,0,77,78,0, 79,0,80,0,33,33,81,0, 0,0,0,82,0,83,84,0, 0,85,0,86,0,87,88,0, 0,89,33,90,0,91,33,0, 92,0,93,94,95,0,96,33, 33,33,33,97 ] class << self attr_accessor :_re_scanner_to_state_actions private :_re_scanner_to_state_actions, :_re_scanner_to_state_actions= end self._re_scanner_to_state_actions = [ 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,36,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,64,64,64,0,0, 0,0,0,0,64,0,0,0, 0,0,64,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,64,0, 0,0,0 ] class << self attr_accessor :_re_scanner_from_state_actions private :_re_scanner_from_state_actions, :_re_scanner_from_state_actions= end self._re_scanner_from_state_actions = [ 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,37,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,37,37,37,0,0, 0,0,0,0,37,0,0,0, 0,0,37,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,37,0, 0,0,0 ] class << self attr_accessor :_re_scanner_eof_actions private :_re_scanner_eof_actions, :_re_scanner_eof_actions= end self._re_scanner_eof_actions = [ 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,12,12, 12,12,0,0,0,0,0,0, 0,0,0,25,25,0,25,25, 0,25,25,25,25,25,25,25, 25,25,25,25,25,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,25,0,0, 0,0,0,0,0,0,0,0, 0,0,25,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0 ] class << self attr_accessor :_re_scanner_eof_trans private :_re_scanner_eof_trans, :_re_scanner_eof_trans= end self._re_scanner_eof_trans = [ 0,1,1,1,5,5,5,5, 5,12,12,12,12,12,12,12, 12,12,12,12,12,12,12,12, 12,12,12,12,12,12,12,12, 12,12,12,39,39,39,0,0, 0,0,48,48,51,53,53,58, 58,61,61,0,0,67,0,0, 72,0,0,0,0,0,0,0, 0,0,0,0,0,88,88,88, 92,92,92,92,0,117,117,118, 118,117,119,121,123,123,129,131, 133,135,140,0,0,0,154,154, 154,154,157,160,0,167,167,167, 169,169,0,186,186,186,188,188, 191,191,191,191,194,194,194,198, 198,198,198,203,203,205,0,212, 212,212,212 ] class << self attr_accessor :re_scanner_start end self.re_scanner_start = 76; class << self attr_accessor :re_scanner_first_final end self.re_scanner_first_final = 76; class << self attr_accessor :re_scanner_error end self.re_scanner_error = 0; class << self attr_accessor :re_scanner_en_char_type end self.re_scanner_en_char_type = 91; class << self attr_accessor :re_scanner_en_unicode_property end self.re_scanner_en_unicode_property = 92; class << self attr_accessor :re_scanner_en_character_set end self.re_scanner_en_character_set = 93; class << self attr_accessor :re_scanner_en_set_escape_sequence end self.re_scanner_en_set_escape_sequence = 100; class << self attr_accessor :re_scanner_en_escape_sequence end self.re_scanner_en_escape_sequence = 106; class << self attr_accessor :re_scanner_en_conditional_expression end self.re_scanner_en_conditional_expression = 126; class << self attr_accessor :re_scanner_en_main end self.re_scanner_en_main = 76; begin p ||= 0 pe ||= data.length cs = re_scanner_start top = 0 ts = nil te = nil act = 0 end begin testEof = false _slen, _trans, _keys, _inds, _acts, _nacts = nil _goto_level = 0 _resume = 10 _eof_trans = 15 _again = 20 _test_eof = 30 _out = 40 while true if _goto_level <= 0 if p == pe _goto_level = _test_eof next end if cs == 0 _goto_level = _out next end end if _goto_level <= _resume case _re_scanner_from_state_actions[cs] when 37 begin ts = p end end _keys = cs << 1 _inds = _re_scanner_index_offsets[cs] _slen = _re_scanner_key_spans[cs] _wide = data[p].ord _trans = if ( _slen > 0 && _re_scanner_trans_keys[_keys] <= _wide && _wide <= _re_scanner_trans_keys[_keys + 1] ) _re_scanner_indicies[ _inds + _wide - _re_scanner_trans_keys[_keys] ] else _re_scanner_indicies[ _inds + _slen ] end end if _goto_level <= _eof_trans cs = _re_scanner_trans_targs[_trans] if _re_scanner_trans_actions[_trans] != 0 case _re_scanner_trans_actions[_trans] when 39 begin self.group_depth = group_depth + 1 end when 33 begin te = p+1 end when 65 begin te = p+1 begin case text = copy(data, ts-1,te) when '\d'; emit(:type, :digit, text) when '\D'; emit(:type, :nondigit, text) when '\h'; emit(:type, :hex, text) when '\H'; emit(:type, :nonhex, text) when '\s'; emit(:type, :space, text) when '\S'; emit(:type, :nonspace, text) when '\w'; emit(:type, :word, text) when '\W'; emit(:type, :nonword, text) when '\R'; emit(:type, :linebreak, text) when '\X'; emit(:type, :xgrapheme, text) end begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 13 begin te = p+1 begin text = copy(data, ts-1,te) type = (text[1] == 'P') ^ (text[3] == '^') ? :nonproperty : :property name = text[3..-2].gsub(/[\^\s_\-]/, '').downcase token = self.class.short_prop_map[name] || self.class.long_prop_map[name] raise ValidationError.for(:property, name) unless token self.emit(type, token.to_sym, text) begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 17 begin te = p+1 begin # special case, emits two tokens emit(:literal, :literal, '-') emit(:set, :intersection, '&&') end end when 70 begin te = p+1 begin if prev_token[1] == :open emit(:set, :negate, '^') else emit(:literal, :literal, '^') end end end when 72 begin te = p+1 begin emit(:set, :intersection, '&&') end end when 68 begin te = p+1 begin begin stack[top] = cs top+= 1 cs = 100 _goto_level = _again next end end end when 66 begin te = p+1 begin emit(:literal, :literal, copy(data, ts, te)) end end when 15 begin te = p+1 begin text = copy(data, ts, te) emit(:literal, :literal, text) end end when 73 begin te = p p = p - 1; begin # ranges cant start with the opening bracket, a subset, or # intersection/negation/range operators if prev_token[0] == :set emit(:literal, :literal, '-') else emit(:set, :range, '-') end end end when 76 begin te = p p = p - 1; begin emit(:set, :open, '[') begin stack[top] = cs top+= 1 cs = 93 _goto_level = _again next end end end when 71 begin te = p p = p - 1; begin text = copy(data, ts, te) emit(:literal, :literal, text) end end when 16 begin begin p = ((te))-1; end begin # ranges cant start with the opening bracket, a subset, or # intersection/negation/range operators if prev_token[0] == :set emit(:literal, :literal, '-') else emit(:set, :range, '-') end end end when 19 begin begin p = ((te))-1; end begin emit(:set, :open, '[') begin stack[top] = cs top+= 1 cs = 93 _goto_level = _again next end end end when 14 begin begin p = ((te))-1; end begin text = copy(data, ts, te) emit(:literal, :literal, text) end end when 80 begin te = p+1 begin emit(:escape, :octal, copy(data, ts-1,te)) begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 77 begin te = p+1 begin p = p - 1; cs = 93; begin stack[top] = cs top+= 1 cs = 106 _goto_level = _again next end end end when 22 begin te = p+1 begin emit(:escape, :literal, copy(data, ts-1,te)) begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 79 begin te = p p = p - 1; begin emit(:escape, :octal, copy(data, ts-1,te)) begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 78 begin te = p p = p - 1; begin emit(:escape, :literal, copy(data, ts-1,te)) begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 21 begin begin p = ((te))-1; end begin emit(:escape, :literal, copy(data, ts-1,te)) begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 87 begin te = p+1 begin emit(:escape, :octal, copy(data, ts-1,te)) begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 81 begin te = p+1 begin case text = copy(data, ts-1,te) when '\.'; emit(:escape, :dot, text) when '\|'; emit(:escape, :alternation, text) when '\^'; emit(:escape, :bol, text) when '\$'; emit(:escape, :eol, text) when '\?'; emit(:escape, :zero_or_one, text) when '\*'; emit(:escape, :zero_or_more, text) when '\+'; emit(:escape, :one_or_more, text) when '\('; emit(:escape, :group_open, text) when '\)'; emit(:escape, :group_close, text) when '\{'; emit(:escape, :interval_open, text) when '\}'; emit(:escape, :interval_close, text) when '\['; emit(:escape, :set_open, text) when '\]'; emit(:escape, :set_close, text) when "\\\\"; emit(:escape, :backslash, text) end begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 84 begin te = p+1 begin # \b is emitted as backspace only when inside a character set, otherwise # it is a word boundary anchor. A syntax might "normalize" it if needed. case text = copy(data, ts-1,te) when '\a'; emit(:escape, :bell, text) when '\b'; emit(:escape, :backspace, text) when '\e'; emit(:escape, :escape, text) when '\f'; emit(:escape, :form_feed, text) when '\n'; emit(:escape, :newline, text) when '\r'; emit(:escape, :carriage, text) when '\t'; emit(:escape, :tab, text) when '\v'; emit(:escape, :vertical_tab, text) end begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 30 begin te = p+1 begin text = copy(data, ts-1,te) if text[2] == '{' emit(:escape, :codepoint_list, text) else emit(:escape, :codepoint, text) end begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 94 begin te = p+1 begin emit(:escape, :hex, copy(data, ts-1,te)) begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 26 begin te = p+1 begin emit_meta_control_sequence(data, ts, te, :control) begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 28 begin te = p+1 begin emit_meta_control_sequence(data, ts, te, :meta_sequence) begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 82 begin te = p+1 begin p = p - 1; cs = ((in_set? ? 93 : 76)); begin stack[top] = cs top+= 1 cs = 91 _goto_level = _again next end end end when 83 begin te = p+1 begin p = p - 1; cs = ((in_set? ? 93 : 76)); begin stack[top] = cs top+= 1 cs = 92 _goto_level = _again next end end end when 24 begin te = p+1 begin emit(:escape, :literal, copy(data, ts-1,te)) begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 88 begin te = p p = p - 1; begin text = copy(data, ts-1,te) # If not enough groups have been opened, there is a fallback to either an # octal or literal interpretation for 2+ digit numerical escapes. digits = text[1..-1] if digits.size == 1 || digits.to_i <= capturing_group_count emit(:backref, :number, text) elsif digits =~ /\A[0-7]{2,}\z/ emit(:escape, :octal, text) else emit(:escape, :literal, text[0..1]) emit(:literal, :literal, text[2..-1]) end begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 86 begin te = p p = p - 1; begin emit(:escape, :octal, copy(data, ts-1,te)) begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 95 begin te = p p = p - 1; begin text = copy(data, ts-1,te) if regexp_encoding == Encoding::BINARY text.split(/(?=\\)/).each { |part| emit(:escape, :hex, part) } else emit(:escape, :utf8_hex, text) end begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 93 begin te = p p = p - 1; begin emit(:escape, :hex, copy(data, ts-1,te)) begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 90 begin te = p p = p - 1; begin emit_meta_control_sequence(data, ts, te, :control) begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 92 begin te = p p = p - 1; begin emit_meta_control_sequence(data, ts, te, :meta_sequence) begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 85 begin te = p p = p - 1; begin emit(:escape, :literal, copy(data, ts-1,te)) begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 32 begin begin p = ((te))-1; end begin text = copy(data, ts-1,te) if regexp_encoding == Encoding::BINARY text.split(/(?=\\)/).each { |part| emit(:escape, :hex, part) } else emit(:escape, :utf8_hex, text) end begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 23 begin begin p = ((te))-1; end begin emit(:escape, :literal, copy(data, ts-1,te)) begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 35 begin te = p+1 begin text = copy(data, ts, te-1) text =~ /[^0]/ or raise ValidationError.for(:backref, 'condition', 'invalid ref ID') emit(:conditional, :condition, text) emit(:conditional, :condition_close, ')') end end when 96 begin te = p+1 begin p = p - 1; begin stack[top] = cs top+= 1 cs = 76 _goto_level = _again next end end end when 97 begin te = p p = p - 1; begin p = p - 1; begin stack[top] = cs top+= 1 cs = 76 _goto_level = _again next end end end when 34 begin begin p = ((te))-1; end begin p = p - 1; begin stack[top] = cs top+= 1 cs = 76 _goto_level = _again next end end end when 41 begin te = p+1 begin emit(:meta, :dot, copy(data, ts, te)) end end when 45 begin te = p+1 begin if conditional_stack.last == group_depth emit(:conditional, :separator, copy(data, ts, te)) else emit(:meta, :alternation, copy(data, ts, te)) end end end when 44 begin te = p+1 begin emit(:anchor, :bol, copy(data, ts, te)) end end when 38 begin te = p+1 begin emit(:anchor, :eol, copy(data, ts, te)) end end when 62 begin te = p+1 begin emit(:keep, :mark, copy(data, ts, te)) end end when 61 begin te = p+1 begin case text = copy(data, ts, te) when '\A'; emit(:anchor, :bos, text) when '\z'; emit(:anchor, :eos, text) when '\Z'; emit(:anchor, :eos_ob_eol, text) when '\b'; emit(:anchor, :word_boundary, text) when '\B'; emit(:anchor, :nonword_boundary, text) when '\G'; emit(:anchor, :match_start, text) end end end when 43 begin te = p+1 begin append_literal(data, ts, te) end end when 52 begin te = p+1 begin text = copy(data, ts, te) conditional_stack << group_depth emit(:conditional, :open, text[0..-2]) emit(:conditional, :condition_open, '(') begin stack[top] = cs top+= 1 cs = 126 _goto_level = _again next end end end when 53 begin te = p+1 begin text = copy(data, ts, te) if text[2..-1] =~ /([^\-mixdau:]|^$)|-.*([dau])/ raise ValidationError.for(:group_option, $1 || "-#{$2}", text) end emit_options(text) end end when 6 begin te = p+1 begin case text = copy(data, ts, te) when '(?='; emit(:assertion, :lookahead, text) when '(?!'; emit(:assertion, :nlookahead, text) when '(?<='; emit(:assertion, :lookbehind, text) when '(?<!'; emit(:assertion, :nlookbehind, text) end end end when 5 begin te = p+1 begin case text = copy(data, ts, te) when '(?:'; emit(:group, :passive, text) when '(?>'; emit(:group, :atomic, text) when '(?~'; emit(:group, :absence, text) when /^\(\?(?:<>|'')/ raise ValidationError.for(:group, 'named group', 'name is empty') when /^\(\?<[^>]+>/ emit(:group, :named_ab, text) when /^\(\?'[^']+'/ emit(:group, :named_sq, text) end end end when 9 begin te = p+1 begin case text = copy(data, ts, te) when /^\\k(.)[^0-9\-][^+\-]*['>]$/ emit(:backref, $1 == '<' ? :name_ref_ab : :name_ref_sq, text) when /^\\k(.)0*[1-9]\d*['>]$/ emit(:backref, $1 == '<' ? :number_ref_ab : :number_ref_sq, text) when /^\\k(.)-0*[1-9]\d*['>]$/ emit(:backref, $1 == '<' ? :number_rel_ref_ab : :number_rel_ref_sq, text) when /^\\k(.)[^0-9\-].*[+\-]\d+['>]$/ emit(:backref, $1 == '<' ? :name_recursion_ref_ab : :name_recursion_ref_sq, text) when /^\\k(.)-?0*[1-9]\d*[+\-]\d+['>]$/ emit(:backref, $1 == '<' ? :number_recursion_ref_ab : :number_recursion_ref_sq, text) else raise ValidationError.for(:backref, 'backreference', 'invalid ref ID') end end end when 8 begin te = p+1 begin case text = copy(data, ts, te) when /^\\g(.)[^0-9+\-].*['>]$/ emit(:backref, $1 == '<' ? :name_call_ab : :name_call_sq, text) when /^\\g(.)(?:0|0*[1-9]\d*)['>]$/ emit(:backref, $1 == '<' ? :number_call_ab : :number_call_sq, text) when /^\\g(.)[+-]0*[1-9]\d*/ emit(:backref, $1 == '<' ? :number_rel_call_ab : :number_rel_call_sq, text) else raise ValidationError.for(:backref, 'subexpression call', 'invalid ref ID') end end end when 59 begin te = p+1 begin case text = copy(data, ts, te) when '?' ; emit(:quantifier, :zero_or_one, text) when '??'; emit(:quantifier, :zero_or_one_reluctant, text) when '?+'; emit(:quantifier, :zero_or_one_possessive, text) end end end when 55 begin te = p+1 begin case text = copy(data, ts, te) when '*' ; emit(:quantifier, :zero_or_more, text) when '*?'; emit(:quantifier, :zero_or_more_reluctant, text) when '*+'; emit(:quantifier, :zero_or_more_possessive, text) end end end when 57 begin te = p+1 begin case text = copy(data, ts, te) when '+' ; emit(:quantifier, :one_or_more, text) when '+?'; emit(:quantifier, :one_or_more_reluctant, text) when '++'; emit(:quantifier, :one_or_more_possessive, text) end end end when 11 begin te = p+1 begin emit(:quantifier, :interval, copy(data, ts, te)) end end when 49 begin te = p+1 begin if free_spacing emit(:free_space, :comment, copy(data, ts, te)) else # consume only the pound sign (#) and backtrack to do regular scanning append_literal(data, ts, ts + 1) begin p = (( ts + 1))-1; end end end end when 51 begin te = p p = p - 1; begin text = copy(data, ts, te) if text[2..-1] =~ /([^\-mixdau:]|^$)|-.*([dau])/ raise ValidationError.for(:group_option, $1 || "-#{$2}", text) end emit_options(text) end end when 50 begin te = p p = p - 1; begin self.capturing_group_count = capturing_group_count + 1 text = copy(data, ts, te) emit(:group, :capture, text) end end when 58 begin te = p p = p - 1; begin case text = copy(data, ts, te) when '?' ; emit(:quantifier, :zero_or_one, text) when '??'; emit(:quantifier, :zero_or_one_reluctant, text) when '?+'; emit(:quantifier, :zero_or_one_possessive, text) end end end when 54 begin te = p p = p - 1; begin case text = copy(data, ts, te) when '*' ; emit(:quantifier, :zero_or_more, text) when '*?'; emit(:quantifier, :zero_or_more_reluctant, text) when '*+'; emit(:quantifier, :zero_or_more_possessive, text) end end end when 56 begin te = p p = p - 1; begin case text = copy(data, ts, te) when '+' ; emit(:quantifier, :one_or_more, text) when '+?'; emit(:quantifier, :one_or_more_reluctant, text) when '++'; emit(:quantifier, :one_or_more_possessive, text) end end end when 63 begin te = p p = p - 1; begin append_literal(data, ts, te) end end when 60 begin te = p p = p - 1; begin begin stack[top] = cs top+= 1 cs = 106 _goto_level = _again next end end end when 48 begin te = p p = p - 1; begin if free_spacing emit(:free_space, :comment, copy(data, ts, te)) else # consume only the pound sign (#) and backtrack to do regular scanning append_literal(data, ts, ts + 1) begin p = (( ts + 1))-1; end end end end when 47 begin te = p p = p - 1; begin if free_spacing emit(:free_space, :whitespace, copy(data, ts, te)) else append_literal(data, ts, te) end end end when 46 begin te = p p = p - 1; begin append_literal(data, ts, te) end end when 3 begin begin p = ((te))-1; end begin text = copy(data, ts, te) if text[2..-1] =~ /([^\-mixdau:]|^$)|-.*([dau])/ raise ValidationError.for(:group_option, $1 || "-#{$2}", text) end emit_options(text) end end when 10 begin begin p = ((te))-1; end begin append_literal(data, ts, te) end end when 7 begin begin p = ((te))-1; end begin begin stack[top] = cs top+= 1 cs = 106 _goto_level = _again next end end end when 1 begin case act when 0 begin begin cs = 0 _goto_level = _again next end end when 58 begin begin p = ((te))-1; end append_literal(data, ts, te) end end end when 75 begin text = copy(data, ts ? ts-1 : 0,-1) raise PrematureEndError.new(text) end begin te = p p = p - 1; begin emit(:set, :open, '[') begin stack[top] = cs top+= 1 cs = 93 _goto_level = _again next end end end when 18 begin text = copy(data, ts ? ts-1 : 0,-1) raise PrematureEndError.new(text) end begin begin p = ((te))-1; end begin emit(:set, :open, '[') begin stack[top] = cs top+= 1 cs = 93 _goto_level = _again next end end end when 89 begin text = copy(data, ts ? ts-1 : 0,-1) raise PrematureEndError.new(text) end begin te = p p = p - 1; begin emit_meta_control_sequence(data, ts, te, :control) begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 91 begin text = copy(data, ts ? ts-1 : 0,-1) raise PrematureEndError.new(text) end begin te = p p = p - 1; begin emit_meta_control_sequence(data, ts, te, :meta_sequence) begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 27 begin text = copy(data, ts ? ts-1 : 0,-1) raise PrematureEndError.new(text) end begin begin p = ((te))-1; end begin emit_meta_control_sequence(data, ts, te, :control) begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 29 begin text = copy(data, ts ? ts-1 : 0,-1) raise PrematureEndError.new(text) end begin begin p = ((te))-1; end begin emit_meta_control_sequence(data, ts, te, :meta_sequence) begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 31 begin text = copy(data, ts ? ts-1 : 0,-1) raise ValidationError.for(:sequence, 'sequence', text) end begin te = p+1 begin begin top -= 1 cs = stack[top] _goto_level = _again next end end end when 4 begin self.group_depth = group_depth - 1 end begin te = p+1 begin emit(:group, :comment, copy(data, ts, te)) end end when 40 begin self.group_depth = group_depth - 1 end begin te = p+1 begin if conditional_stack.last == group_depth + 1 conditional_stack.pop emit(:conditional, :close, ')') elsif group_depth >= 0 if spacing_stack.length > 1 && spacing_stack.last[:depth] == group_depth + 1 spacing_stack.pop self.free_spacing = spacing_stack.last[:free_spacing] end emit(:group, :close, ')') else raise ValidationError.for(:group, 'group', 'unmatched close parenthesis') end end end when 42 begin self.set_depth = set_depth + 1 end begin te = p+1 begin emit(:set, :open, copy(data, ts, te)) begin stack[top] = cs top+= 1 cs = 93 _goto_level = _again next end end end when 69 begin self.set_depth = set_depth - 1 end begin te = p+1 begin emit(:set, :close, copy(data, ts, te)) if in_set? begin top -= 1 cs = stack[top] _goto_level = _again next end else begin cs = 76 _goto_level = _again next end end end end when 74 begin self.set_depth = set_depth - 1 end begin te = p+1 begin # special case, emits two tokens emit(:literal, :literal, '-') emit(:set, :close, ']') if in_set? begin top -= 1 cs = stack[top] _goto_level = _again next end else begin cs = 76 _goto_level = _again next end end end end when 20 begin self.set_depth = set_depth - 1 end begin te = p+1 begin text = copy(data, ts, te) type = :posixclass class_name = text[2..-3] if class_name[0] == '^' class_name = class_name[1..-1] type = :nonposixclass end unless POSIX_CLASSES[class_name] raise ValidationError.for(:posix_class, text) end emit(type, class_name.to_sym, text) end end when 67 begin te = p+1 end begin self.set_depth = set_depth + 1 end when 2 begin te = p+1 end begin act = 58; end end end end if _goto_level <= _again case _re_scanner_to_state_actions[cs] when 64 begin ts = nil; end when 36 begin ts = nil; end begin act = 0 end end if cs == 0 _goto_level = _out next end p += 1 if p != pe _goto_level = _resume next end end if _goto_level <= _test_eof if p == eof if _re_scanner_eof_trans[cs] > 0 _trans = _re_scanner_eof_trans[cs] - 1; _goto_level = _eof_trans next; end case _re_scanner_eof_actions[cs] when 12 begin raise PrematureEndError.new('unicode property') end when 25 begin text = copy(data, ts ? ts-1 : 0,-1) raise PrematureEndError.new(text) end end end end if _goto_level <= _out break end end end # to avoid "warning: assigned but unused variable - testEof" testEof = testEof if cs == re_scanner_error text = copy(data, ts ? ts-1 : 0,-1) raise ScannerError.new("Scan error at '#{text}'") end raise PrematureEndError.new("(missing group closing paranthesis) "+ "[#{group_depth}]") if in_group? raise PrematureEndError.new("(missing set closing bracket) "+ "[#{set_depth}]") if in_set? # when the entire expression is a literal run emit_literal if literal_run tokens end
Private Instance Methods
Source
# File lib/regexp_parser/scanner.rb, line 2554 def append_literal(data, ts, te) (self.literal_run ||= []) << copy(data, ts, te) end
Appends one or more characters to the literal buffer, to be emitted later by a call to emit_literal.
Source
# File lib/regexp_parser/scanner.rb, line 2548 def copy(data, ts, te) data[ts...te].pack('c*').force_encoding('utf-8') end
Copy from ts to te from data as text
Source
# File lib/regexp_parser/scanner.rb, line 2559 def emit_literal text = literal_run.join self.literal_run = nil emit(:literal, :literal, text) end
Emits the literal run collected by calls to the append_literal method.
Source
# File lib/regexp_parser/scanner.rb, line 2594 def emit_meta_control_sequence(data, ts, te, token) if data.last < 0x00 || data.last > 0x7F raise ValidationError.for(:sequence, 'escape', token.to_s) end emit(:escape, token, copy(data, ts-1,te)) end
Source
# File lib/regexp_parser/scanner.rb, line 2565 def emit_options(text) token = nil # Ruby allows things like '(?-xxxx)' or '(?xx-xx--xx-:abc)'. text =~ /\(\?([mixdau]*)(-(?:[mix]*))*(:)?/ positive, negative, group_local = $1,$2,$3 if positive.include?('x') self.free_spacing = true end # If the x appears in both, treat it like ruby does, the second cancels # the first. if negative && negative.include?('x') self.free_spacing = false end if group_local spacing_stack << {:free_spacing => free_spacing, :depth => group_depth} token = :options else # switch for parent group level spacing_stack.last[:free_spacing] = free_spacing token = :options_switch end emit(:group, token, text) end
Source
# File lib/regexp_parser/scanner.rb, line 2519 def extract_encoding(input_object, options) if input_object.is_a?(::Regexp) input_object.encoding elsif options && (options & Regexp::NOENCODING) Encoding::BINARY end end
Source
# File lib/regexp_parser/scanner.rb, line 2527 def free_spacing?(input_object, options) if options && !input_object.is_a?(String) raise ArgumentError, 'options cannot be supplied unless scanning a String' end options = input_object.options if input_object.is_a?(::Regexp) return false unless options options & Regexp::EXTENDED != 0 end
Source
# File lib/regexp_parser/scanner.rb, line 2539 def in_group? group_depth > 0 end