1 module dparse.lexer;
2 
3 import std.typecons;
4 import std.typetuple;
5 import std.array;
6 import std.algorithm;
7 import std.range;
8 import std.experimental.lexer;
9 import std.traits;
10 import core.cpuid : sse42;
11 version (D_InlineAsm_X86_64)
12 {
13     version (Windows) {}
14     else version = iasm64NotWindows;
15 }
16 
17 /// Operators
18 private enum operators = [
19     ",", ".", "..", "...", "/", "/=", "!", "!<", "!<=", "!<>", "!<>=", "!=",
20     "!>", "!>=", "$", "%", "%=", "&", "&&", "&=", "(", ")", "*", "*=", "+", "++",
21     "+=", "-", "--", "-=", ":", ";", "<", "<<", "<<=", "<=", "<>", "<>=", "=",
22     "==", "=>", ">", ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", "]", "^",
23     "^=", "^^", "^^=", "{", "|", "|=", "||", "}", "~", "~="
24 ];
25 
26 /// Kewords
27 private enum keywords = [
28     "abstract", "alias", "align", "asm", "assert", "auto", "bool",
29     "break", "byte", "case", "cast", "catch", "cdouble", "cent", "cfloat",
30     "char", "class", "const", "continue", "creal", "dchar", "debug", "default",
31     "delegate", "delete", "deprecated", "do", "double", "else", "enum",
32     "export", "extern", "false", "final", "finally", "float", "for", "foreach",
33     "foreach_reverse", "function", "goto", "idouble", "if", "ifloat",
34     "immutable", "import", "in", "inout", "int", "interface", "invariant",
35     "ireal", "is", "lazy", "long", "macro", "mixin", "module", "new", "nothrow",
36     "null", "out", "override", "package", "pragma", "private", "protected",
37     "public", "pure", "real", "ref", "return", "scope", "shared", "short",
38     "static", "struct", "super", "switch", "synchronized", "template", "this",
39     "throw", "true", "try", "typedef", "typeid", "typeof", "ubyte", "ucent",
40     "uint", "ulong", "union", "unittest", "ushort", "version", "void",
41     "volatile", "wchar", "while", "with", "__DATE__", "__EOF__", "__FILE__",
42     "__FILE_FULL_PATH__", "__FUNCTION__", "__gshared", "__LINE__", "__MODULE__",
43     "__parameters", "__PRETTY_FUNCTION__", "__TIME__", "__TIMESTAMP__", "__traits",
44     "__vector", "__VENDOR__", "__VERSION__"
45 ];
46 
47 /// Other tokens
48 private enum dynamicTokens = [
49     "specialTokenSequence", "comment", "identifier", "scriptLine",
50     "whitespace", "doubleLiteral", "floatLiteral", "idoubleLiteral",
51     "ifloatLiteral", "intLiteral", "longLiteral", "realLiteral",
52     "irealLiteral", "uintLiteral", "ulongLiteral", "characterLiteral",
53     "dstringLiteral", "stringLiteral", "wstringLiteral"
54 ];
55 
56 private enum pseudoTokenHandlers = [
57     "\"", "lexStringLiteral",
58     "`", "lexWysiwygString",
59     "//", "lexSlashSlashComment",
60     "/*", "lexSlashStarComment",
61     "/+", "lexSlashPlusComment",
62     ".", "lexDot",
63     "'", "lexCharacterLiteral",
64     "0", "lexNumber",
65     "1", "lexDecimal",
66     "2", "lexDecimal",
67     "3", "lexDecimal",
68     "4", "lexDecimal",
69     "5", "lexDecimal",
70     "6", "lexDecimal",
71     "7", "lexDecimal",
72     "8", "lexDecimal",
73     "9", "lexDecimal",
74     "q\"", "lexDelimitedString",
75     "q{", "lexTokenString",
76     "r\"", "lexWysiwygString",
77     "x\"", "lexHexString",
78     " ", "lexWhitespace",
79     "\t", "lexWhitespace",
80     "\r", "lexWhitespace",
81     "\n", "lexWhitespace",
82     "\v", "lexWhitespace",
83     "\f", "lexWhitespace",
84     "\u2028", "lexLongNewline",
85     "\u2029", "lexLongNewline",
86     "#!", "lexScriptLine",
87     "#line", "lexSpecialTokenSequence"
88 ];
89 
90 /// Token ID type for the D lexer.
91 public alias IdType = TokenIdType!(operators, dynamicTokens, keywords);
92 
93 /**
94  * Function used for converting an IdType to a string.
95  *
96  * Examples:
97  * ---
98  * IdType c = tok!"case";
99  * assert (str(c) == "case");
100  * ---
101  */
102 public alias str = tokenStringRepresentation!(IdType, operators, dynamicTokens, keywords);
103 
104 /**
105  * Template used to refer to D token types.
106  *
107  * See the $(B operators), $(B keywords), and $(B dynamicTokens) enums for
108  * values that can be passed to this template.
109  * Example:
110  * ---
111  * import dparse.lexer;
112  * IdType t = tok!"floatLiteral";
113  * ---
114  */
115 public template tok(string token)
116 {
117     alias tok = TokenId!(IdType, operators, dynamicTokens, keywords, token);
118 }
119 
120 private enum extraFields = q{
121     string comment;
122     string trailingComment;
123 
124     int opCmp(size_t i) const pure nothrow @safe {
125         if (index < i) return -1;
126         if (index > i) return 1;
127         return 0;
128     }
129 
130     int opCmp(ref const typeof(this) other) const pure nothrow @safe {
131         return opCmp(other.index);
132     }
133 };
134 
135 /// The token type in the D lexer
136 public alias Token = std.experimental.lexer.TokenStructure!(IdType, extraFields);
137 
138 /**
139  * Configure whitespace handling
140  */
141 public enum WhitespaceBehavior : ubyte
142 {
143     include = 0b0000_0000,
144     skip = 0b0000_0001,
145 }
146 
147 /**
148  * Configure string lexing behavior
149  */
150 public enum StringBehavior : ubyte
151 {
152     /// Do not include quote characters, process escape sequences
153     compiler = 0b0000_0000,
154     /// Opening quotes, closing quotes, and string suffixes are included in the
155     /// string token
156     includeQuoteChars = 0b0000_0001,
157     /// String escape sequences are not replaced
158     notEscaped = 0b0000_0010,
159     /// Not modified at all. Useful for formatters or highlighters
160     source = includeQuoteChars | notEscaped
161 }
162 
163 public enum CommentBehavior : bool
164 {
165     intern = true,
166     noIntern = false
167 }
168 /**
169  * Lexer configuration struct
170  */
171 public struct LexerConfig
172 {
173     string fileName;
174     StringBehavior stringBehavior;
175     WhitespaceBehavior whitespaceBehavior;
176     CommentBehavior commentBehavior = CommentBehavior.intern;
177 }
178 
179 /**
180  * Basic type token types.
181  */
182 public alias BasicTypes = AliasSeq!(tok!"int", tok!"bool", tok!"byte",
183         tok!"cdouble", tok!"cent", tok!"cfloat", tok!"char", tok!"creal",
184         tok!"dchar", tok!"double", tok!"float", tok!"idouble",
185         tok!"ifloat", tok!"ireal", tok!"long", tok!"real", tok!"short",
186         tok!"ubyte", tok!"ucent", tok!"uint", tok!"ulong", tok!"ushort",
187         tok!"void", tok!"wchar");
188 
189 /**
190  * Returns: true if the given ID is for a basic type.
191  */
192 public bool isBasicType(IdType type) nothrow pure @safe @nogc
193 {
194     switch (type)
195     {
196     foreach (T; BasicTypes)
197     {
198     case T:
199         return true;
200     }
201     default:
202         return false;
203     }
204 }
205 
206 /**
207  * Number literal token types.
208  */
209 public alias NumberLiterals = AliasSeq!(tok!"doubleLiteral",
210         tok!"floatLiteral", tok!"idoubleLiteral", tok!"ifloatLiteral",
211         tok!"intLiteral", tok!"longLiteral", tok!"realLiteral",
212         tok!"irealLiteral", tok!"uintLiteral", tok!"ulongLiteral");
213 
214 /**
215  * Returns: true if the given ID type is for a number literal.
216  */
217 public bool isNumberLiteral(IdType type) nothrow pure @safe @nogc
218 {
219     switch (type)
220     {
221     foreach (T; NumberLiterals)
222     {
223     case T:
224         return true;
225     }
226     default:
227         return false;
228     }
229 }
230 
231 /**
232  * Number literal token types.
233  */
234 public alias IntegerLiterals = AliasSeq!(tok!"intLiteral", tok!"longLiteral",
235         tok!"uintLiteral", tok!"ulongLiteral");
236 
237 /**
238  * Returns: true if the given ID type is for a integer literal.
239  */
240 public bool isIntegerLiteral(IdType type) nothrow pure @safe @nogc
241 {
242     switch (type)
243     {
244     foreach (T; IntegerLiterals)
245     {
246     case T:
247         return true;
248     }
249     default:
250         return false;
251     }
252 }
253 
254 /**
255  * Operator token types.
256  */
257 public alias Operators = AliasSeq!(tok!",", tok!".", tok!"..", tok!"...",
258         tok!"/", tok!"/=", tok!"!", tok!"!<", tok!"!<=", tok!"!<>",
259         tok!"!<>=", tok!"!=", tok!"!>", tok!"!>=", tok!"$", tok!"%",
260         tok!"%=", tok!"&", tok!"&&", tok!"&=", tok!"(", tok!")",
261         tok!"*", tok!"*=", tok!"+", tok!"++", tok!"+=", tok!"-",
262         tok!"--", tok!"-=", tok!":", tok!";", tok!"<", tok!"<<",
263         tok!"<<=", tok!"<=", tok!"<>", tok!"<>=", tok!"=", tok!"==",
264         tok!"=>", tok!">", tok!">=", tok!">>", tok!">>=", tok!">>>",
265         tok!">>>=", tok!"?", tok!"@", tok!"[", tok!"]", tok!"^",
266         tok!"^=", tok!"^^", tok!"^^=", tok!"{", tok!"|", tok!"|=",
267         tok!"||", tok!"}", tok!"~", tok!"~=");
268 
269 /**
270  * Returns: true if the given ID type is for an operator.
271  */
272 public bool isOperator(IdType type) nothrow pure @safe @nogc
273 {
274     switch (type)
275     {
276     foreach (T; Operators)
277     {
278     case T:
279         return true;
280     }
281     default:
282         return false;
283     }
284 }
285 
286 /**
287  * Keyword token types.
288  */
289 public alias Keywords = AliasSeq!(tok!"abstract", tok!"alias", tok!"align",
290         tok!"asm", tok!"assert", tok!"auto", tok!"break",
291         tok!"case", tok!"cast", tok!"catch", tok!"class", tok!"const",
292         tok!"continue", tok!"debug", tok!"default", tok!"delegate",
293         tok!"delete", tok!"deprecated", tok!"do", tok!"else", tok!"enum",
294         tok!"export", tok!"extern", tok!"false", tok!"final", tok!"finally",
295         tok!"for", tok!"foreach", tok!"foreach_reverse", tok!"function",
296         tok!"goto", tok!"if", tok!"immutable", tok!"import", tok!"in",
297         tok!"inout", tok!"interface", tok!"invariant", tok!"is",
298         tok!"lazy", tok!"macro", tok!"mixin", tok!"module", tok!"new",
299         tok!"nothrow", tok!"null", tok!"out", tok!"override", tok!"package",
300         tok!"pragma", tok!"private", tok!"protected", tok!"public",
301         tok!"pure", tok!"ref", tok!"return", tok!"scope", tok!"shared",
302         tok!"static", tok!"struct", tok!"super", tok!"switch", tok!"synchronized",
303         tok!"template", tok!"this", tok!"throw", tok!"true", tok!"try",
304         tok!"typedef", tok!"typeid", tok!"typeof", tok!"union", tok!"unittest",
305         tok!"version", tok!"volatile", tok!"while", tok!"with", tok!"__DATE__",
306         tok!"__EOF__", tok!"__FILE__", tok!"__FILE_FULL_PATH__", tok!"__FUNCTION__",
307         tok!"__gshared", tok!"__LINE__", tok!"__MODULE__", tok!"__parameters",
308         tok!"__PRETTY_FUNCTION__", tok!"__TIME__", tok!"__TIMESTAMP__",
309         tok!"__traits", tok!"__vector", tok!"__VENDOR__", tok!"__VERSION__");
310 
311 /**
312  * Returns: true if the given ID type is for a keyword.
313  */
314 public bool isKeyword(IdType type) pure nothrow @safe @nogc
315 {
316     switch (type)
317     {
318     foreach (T; Keywords)
319     {
320     case T:
321         return true;
322     }
323     default:
324         return false;
325     }
326 }
327 
328 /**
329  * String literal token types
330  */
331 public alias StringLiterals = AliasSeq!(tok!"dstringLiteral",
332         tok!"stringLiteral", tok!"wstringLiteral");
333 
334 /**
335  * Returns: true if the given ID type is for a string literal.
336  */
337 public bool isStringLiteral(IdType type) pure nothrow @safe @nogc
338 {
339     switch (type)
340     {
341     foreach (T; StringLiterals)
342     {
343     case T:
344         return true;
345     }
346     default:
347         return false;
348     }
349 }
350 
351 /**
352  * Protection token types.
353  */
354 public alias Protections = AliasSeq!(tok!"export", tok!"package",
355         tok!"private", tok!"public", tok!"protected");
356 
357 /**
358  * Returns: true if the given ID type is for a protection attribute.
359  */
360 public bool isProtection(IdType type) pure nothrow @safe @nogc
361 {
362     switch (type)
363     {
364     foreach (T; Protections)
365     {
366     case T:
367         return true;
368     }
369     default:
370         return false;
371     }
372 }
373 
374 public alias SpecialTokens = AliasSeq!(tok!"__DATE__", tok!"__TIME__",
375     tok!"__TIMESTAMP__", tok!"__VENDOR__", tok!"__VERSION__", tok!"__FILE__",
376     tok!"__FILE_FULL_PATH__", tok!"__LINE__", tok!"__MODULE__",
377     tok!"__FUNCTION__", tok!"__PRETTY_FUNCTION__");
378 
379 public bool isSpecialToken(IdType type) pure nothrow @safe @nogc
380 {
381     switch (type)
382     {
383     foreach (T; SpecialTokens)
384     {
385     case T:
386         return true;
387     }
388     default:
389         return false;
390     }
391 }
392 
393 public alias Literals = AliasSeq!(StringLiterals, NumberLiterals, tok!"characterLiteral",
394         SpecialTokens, tok!"true", tok!"false", tok!"null", tok!"$");
395 
396 public bool isLiteral(IdType type) pure nothrow @safe @nogc
397 {
398     switch (type)
399     {
400     foreach (T; Literals)
401     {
402     case T:
403         return true;
404     }
405     default:
406         return false;
407     }
408 }
409 
410 /**
411  * Returns: an array of tokens lexed from the given source code to the output range. All
412  * whitespace tokens are skipped and comments are attached to the token nearest
413  * to them.
414  */
415 const(Token)[] getTokensForParser(R)(R sourceCode, LexerConfig config, StringCache* cache)
416 if (is(Unqual!(ElementEncodingType!R) : ubyte) && isDynamicArray!R)
417 {
418     enum CommentType : ubyte
419     {
420         notDoc,
421         line,
422         block
423     }
424 
425     static CommentType commentType(string comment) pure nothrow @safe
426     {
427         if (comment.length < 3)
428             return CommentType.notDoc;
429         if (comment[0 ..3] == "///")
430             return CommentType.line;
431         if (comment[0 ..3] == "/++" || comment[0 ..3] == "/**")
432             return CommentType.block;
433         return CommentType.notDoc;
434     }
435 
436     config.whitespaceBehavior = WhitespaceBehavior.skip;
437     config.commentBehavior = CommentBehavior.noIntern;
438 
439     auto leadingCommentAppender = appender!(char[])();
440     leadingCommentAppender.reserve(1024);
441     auto trailingCommentAppender = appender!(char[])();
442     trailingCommentAppender.reserve(1024);
443     bool hadDdoc;
444     string empty = cache.intern("");
445     auto output = appender!(typeof(return))();
446     auto lexer = DLexer(sourceCode, config, cache);
447     size_t tokenCount;
448     loop: while (!lexer.empty) switch (lexer.front.type)
449     {
450     case tok!"specialTokenSequence":
451     case tok!"whitespace":
452         lexer.popFront();
453         break;
454     case tok!"comment":
455         final switch (commentType(lexer.front.text))
456         {
457         case CommentType.block:
458         case CommentType.line:
459             if (tokenCount > 0 && lexer.front.line == output.data[tokenCount - 1].line)
460             {
461                 if (!trailingCommentAppender.data.empty)
462                     trailingCommentAppender.put('\n');
463                 unDecorateComment(lexer.front.text, trailingCommentAppender);
464                 hadDdoc = true;
465             }
466             else
467             {
468                 if (!leadingCommentAppender.data.empty)
469                     leadingCommentAppender.put('\n');
470                 unDecorateComment(lexer.front.text, leadingCommentAppender);
471                 hadDdoc = true;
472             }
473             lexer.popFront();
474             break;
475         case CommentType.notDoc:
476             lexer.popFront();
477             break;
478         }
479         break;
480     case tok!"__EOF__":
481         if (!trailingCommentAppender.data.empty)
482             (cast() output.data[$ - 1].trailingComment) = cache.intern(cast(string) trailingCommentAppender.data);
483         break loop;
484     default:
485         Token t = lexer.front;
486         lexer.popFront();
487         tokenCount++;
488         if (!output.data.empty && !trailingCommentAppender.data.empty)
489         {
490             (cast() output.data[$ - 1].trailingComment) =
491                 cache.intern(cast(string) trailingCommentAppender.data);
492             hadDdoc = false;
493         }
494         t.comment = leadingCommentAppender.data.length > 0
495             ? cache.intern(cast(string) leadingCommentAppender.data) : (hadDdoc ? empty : null);
496         leadingCommentAppender.clear();
497         trailingCommentAppender.clear();
498         hadDdoc = false;
499         output.put(t);
500         break;
501     }
502     return output.data;
503 }
504 
505 /**
506  * The D lexer struct.
507  */
508 public struct DLexer
509 {
510     mixin Lexer!(Token, lexIdentifier, isSeparating, operators, dynamicTokens,
511         keywords, pseudoTokenHandlers);
512 
513     ///
514     @disable this();
515 
516     /**
517      * Params:
518      *     range = the bytes that compose the source code that will be lexed.
519      *     config = the lexer configuration to use.
520      *     cache = the string interning cache for de-duplicating identifiers and
521      *         other token text.
522      *     haveSSE42 = Parse streaming SIMD Extensions 4.2 in inline assembly
523      */
524     this(R)(R range, const LexerConfig config, StringCache* cache,
525         bool haveSSE42 = sse42()) pure nothrow @safe
526     if (is(Unqual!(ElementEncodingType!R) : ubyte) && isDynamicArray!R)
527     {
528         this.haveSSE42 = haveSSE42;
529         auto r = (range.length >= 3 && range[0] == 0xef && range[1] == 0xbb && range[2] == 0xbf)
530             ? range[3 .. $] : range;
531         this.range = LexerRange(cast(const(ubyte)[]) r);
532         this.config = config;
533         this.cache = cache;
534         popFront();
535     }
536 
537     ///
538     public void popFront()() pure nothrow @safe
539     {
540         do
541             _popFront();
542         while (config.whitespaceBehavior == WhitespaceBehavior.skip
543             && _front.type == tok!"whitespace");
544     }
545 
546 private pure nothrow @safe:
547 
548     bool isWhitespace()
549     {
550         switch (range.bytes[range.index])
551         {
552         case ' ':
553         case '\r':
554         case '\n':
555         case '\t':
556         case '\v':
557         case '\f':
558             return true;
559         case 0xe2:
560             auto peek = range.peek(2);
561             return peek.length == 2
562                 && peek[0] == 0x80
563                 && (peek[1] == 0xa8 || peek[1] == 0xa9);
564         default:
565             return false;
566         }
567     }
568 
569     void popFrontWhitespaceAware()
570     {
571         switch (range.bytes[range.index])
572         {
573         case '\r':
574             range.popFront();
575             if (!(range.index >= range.bytes.length) && range.bytes[range.index] == '\n')
576             {
577                 range.popFront();
578                 range.incrementLine();
579             }
580             else
581                 range.incrementLine();
582             return;
583         case '\n':
584             range.popFront();
585             range.incrementLine();
586             return;
587         case 0xe2:
588             auto lookahead = range.peek(3);
589             if (lookahead.length == 3 && lookahead[1] == 0x80
590                 && (lookahead[2] == 0xa8 || lookahead[2] == 0xa9))
591             {
592                 range.index+=3;
593                 range.column+=3;
594                 range.incrementLine();
595                 return;
596             }
597             else
598             {
599                 range.popFront();
600                 return;
601             }
602         default:
603             range.popFront();
604             return;
605         }
606     }
607 
608     void lexWhitespace(ref Token token) @trusted
609     {
610         mixin (tokenStart);
611         loop: do
612         {
613             version (iasm64NotWindows)
614             {
615                 if (haveSSE42 && range.index + 16 < range.bytes.length)
616                 {
617                     skip!(true, '\t', ' ', '\v', '\f')(range.bytes.ptr + range.index,
618                         &range.index, &range.column);
619                 }
620             }
621             switch (range.bytes[range.index])
622             {
623             case '\r':
624                 range.popFront();
625                 if (!(range.index >= range.bytes.length) && range.bytes[range.index] == '\n')
626                 {
627                     range.popFront();
628                 }
629                 range.column = 1;
630                 range.line += 1;
631                 break;
632             case '\n':
633                 range.popFront();
634                 range.column = 1;
635                 range.line += 1;
636                 break;
637             case ' ':
638             case '\t':
639             case '\v':
640             case '\f':
641                 range.popFront();
642                 break;
643             case 0xe2:
644                 if (range.index + 2 >= range.bytes.length)
645                     break loop;
646                 if (range.bytes[range.index + 1] != 0x80)
647                     break loop;
648                 if (range.bytes[range.index + 2] == 0xa8 || range.bytes[range.index + 2] == 0xa9)
649                 {
650                     range.index += 3;
651                     range.column += 3;
652                     range.column = 1;
653                     range.line += 1;
654                     break;
655                 }
656                 break loop;
657             default:
658                 break loop;
659             }
660         } while (!(range.index >= range.bytes.length));
661         string text = config.whitespaceBehavior == WhitespaceBehavior.include
662             ? cache.intern(range.slice(mark)) : "";
663         token = Token(tok!"whitespace", text, line, column, index);
664     }
665 
666     void lexNumber(ref Token token)
667     {
668         mixin (tokenStart);
669         if (range.bytes[range.index] == '0' && range.index + 1 < range.bytes.length)
670         {
671             immutable ahead = range.bytes[range.index + 1];
672             switch (ahead)
673             {
674             case 'x':
675             case 'X':
676                 range.index += 2;
677                 range.column += 2;
678                 lexHex(token, mark, line, column, index);
679                 return;
680             case 'b':
681             case 'B':
682                 range.index += 2;
683                 range.column += 2;
684                 lexBinary(token, mark, line, column, index);
685                 return;
686             default:
687                 lexDecimal(token, mark, line, column, index);
688                 return;
689             }
690         }
691         else
692             lexDecimal(token, mark, line, column, index);
693     }
694 
695     void lexHex(ref Token token)
696     {
697         mixin (tokenStart);
698         lexHex(token, mark, line, column, index);
699     }
700 
701     void lexHex(ref Token token, size_t mark, size_t line, size_t column,
702         size_t index) @trusted
703     {
704         IdType type = tok!"intLiteral";
705         bool foundDot;
706         hexLoop: while (!(range.index >= range.bytes.length))
707         {
708             switch (range.bytes[range.index])
709             {
710             case 'a': .. case 'f':
711             case 'A': .. case 'F':
712             case '0': .. case '9':
713             case '_':
714                 version (iasm64NotWindows)
715                 {
716                     if (haveSSE42 && range.index + 16 < range.bytes.length)
717                     {
718                         immutable ulong i = rangeMatch!(false, '0', '9', 'a', 'f', 'A', 'F', '_', '_')
719                             (range.bytes.ptr + range.index);
720                         range.column += i;
721                         range.index += i;
722                     }
723                     else
724                         range.popFront();
725                 }
726                 else
727                     range.popFront();
728                 break;
729             case 'u':
730             case 'U':
731                 lexIntSuffix(type);
732                 break hexLoop;
733             case 'i':
734                 if (foundDot)
735                     lexFloatSuffix(type);
736                 break hexLoop;
737             case 'L':
738                 if (foundDot)
739                     lexFloatSuffix(type);
740                 else
741                     lexIntSuffix(type);
742                 break hexLoop;
743             case 'p':
744             case 'P':
745                 lexExponent(type);
746                 break hexLoop;
747             case '.':
748                 if (foundDot || !(range.index + 1 < range.bytes.length) || range.peekAt(1) == '.')
749                     break hexLoop;
750                 else
751                 {
752                     // The following bit of silliness tries to tell the
753                     // difference between "int dot identifier" and
754                     // "double identifier".
755                     if (range.index + 1 < range.bytes.length)
756                     {
757                         switch (range.peekAt(1))
758                         {
759                         case '0': .. case '9':
760                         case 'A': .. case 'F':
761                         case 'a': .. case 'f':
762                             goto doubleLiteral;
763                         default:
764                             break hexLoop;
765                         }
766                     }
767                     else
768                     {
769                     doubleLiteral:
770                         range.popFront();
771                         foundDot = true;
772                         type = tok!"doubleLiteral";
773                     }
774                 }
775                 break;
776             default:
777                 break hexLoop;
778             }
779         }
780         token = Token(type, cache.intern(range.slice(mark)), line, column,
781             index);
782     }
783 
784     void lexBinary(ref Token token)
785     {
786         mixin (tokenStart);
787         return lexBinary(token, mark, line, column, index);
788     }
789 
790     void lexBinary(ref Token token, size_t mark, size_t line, size_t column,
791         size_t index) @trusted
792     {
793         IdType type = tok!"intLiteral";
794         binaryLoop: while (!(range.index >= range.bytes.length))
795         {
796             switch (range.bytes[range.index])
797             {
798             case '0':
799             case '1':
800             case '_':
801                 version (iasm64NotWindows)
802                 {
803                     if (haveSSE42 && range.index + 16 < range.bytes.length)
804                     {
805                         immutable ulong i = rangeMatch!(false, '0', '1', '_', '_')(
806                             range.bytes.ptr + range.index);
807                         range.column += i;
808                         range.index += i;
809                     }
810                     else
811                         range.popFront();
812                 }
813                 else
814                     range.popFront();
815                 break;
816             case 'u':
817             case 'U':
818             case 'L':
819                 lexIntSuffix(type);
820                 break binaryLoop;
821             default:
822                 break binaryLoop;
823             }
824         }
825         token = Token(type, cache.intern(range.slice(mark)), line, column,
826             index);
827     }
828 
829     void lexDecimal(ref Token token)
830     {
831         mixin (tokenStart);
832         lexDecimal(token, mark, line, column, index);
833     }
834 
835     void lexDecimal(ref Token token, size_t mark, size_t line, size_t column,
836         size_t index) @trusted
837     {
838         bool foundDot = range.bytes[range.index] == '.';
839         IdType type = tok!"intLiteral";
840         if (foundDot)
841         {
842             range.popFront();
843             type = tok!"doubleLiteral";
844         }
845 
846         decimalLoop: while (!(range.index >= range.bytes.length))
847         {
848             switch (range.bytes[range.index])
849             {
850             case '0': .. case '9':
851             case '_':
852                 version (iasm64NotWindows)
853                 {
854                     if (haveSSE42 && range.index + 16 < range.bytes.length)
855                     {
856                         immutable ulong i = rangeMatch!(false, '0', '9', '_', '_')(range.bytes.ptr + range.index);
857                         range.column += i;
858                         range.index += i;
859                     }
860                     else
861                         range.popFront();
862                 }
863                 else
864                     range.popFront();
865                 break;
866             case 'u':
867             case 'U':
868                 if (!foundDot)
869                     lexIntSuffix(type);
870                 break decimalLoop;
871             case 'i':
872                 lexFloatSuffix(type);
873                 break decimalLoop;
874             case 'L':
875                 if (foundDot)
876                     lexFloatSuffix(type);
877                 else
878                     lexIntSuffix(type);
879                 break decimalLoop;
880             case 'f':
881             case 'F':
882                 lexFloatSuffix(type);
883                 break decimalLoop;
884             case 'e':
885             case 'E':
886                 lexExponent(type);
887                 break decimalLoop;
888             case '.':
889                 if (foundDot || !(range.index + 1 < range.bytes.length) || range.peekAt(1) == '.')
890                     break decimalLoop;
891                 else
892                 {
893                     // The following bit of silliness tries to tell the
894                     // difference between "int dot identifier" and
895                     // "double identifier".
896                     if (range.index + 1 < range.bytes.length)
897                     {
898                         immutable ch = range.peekAt(1);
899                         if (ch <= 0x2f
900                             || (ch >= '0' && ch <= '9')
901                             || (ch >= ':' && ch <= '@')
902                             || (ch >= '[' && ch <= '^')
903                             || (ch >= '{' && ch <= '~')
904                             || ch == '`' || ch == '_')
905                         {
906                             goto doubleLiteral;
907                         }
908                         else
909                             break decimalLoop;
910                     }
911                     else
912                     {
913                     doubleLiteral:
914                         range.popFront();
915                         foundDot = true;
916                         type = tok!"doubleLiteral";
917                     }
918                 }
919                 break;
920             default:
921                 break decimalLoop;
922             }
923         }
924         token = Token(type, cache.intern(range.slice(mark)), line, column,
925             index);
926     }
927 
928     void lexIntSuffix(ref IdType type) pure nothrow @safe
929     {
930         bool secondPass;
931         if (range.bytes[range.index] == 'u' || range.bytes[range.index] == 'U')
932         {
933     U:
934             if (type == tok!"intLiteral")
935                 type = tok!"uintLiteral";
936             else
937                 type = tok!"ulongLiteral";
938             range.popFront();
939             if (secondPass)
940                 return;
941             if (range.index < range.bytes.length
942                     && (range.bytes[range.index] == 'L' || range.bytes[range.index] == 'l'))
943                 goto L;
944             goto I;
945         }
946         if (range.bytes[range.index] == 'L' || range.bytes[range.index] == 'l')
947         {
948     L:
949             if (type == tok!"uintLiteral")
950                 type = tok!"ulongLiteral";
951             else
952                 type = tok!"longLiteral";
953             range.popFront();
954             if (range.index < range.bytes.length
955                     && (range.bytes[range.index] == 'U' || range.bytes[range.index] == 'u'))
956             {
957                 secondPass = true;
958                 goto U;
959             }
960             goto I;
961         }
962     I:
963         if (range.index < range.bytes.length && range.bytes[range.index] == 'i')
964         {
965             warning("Complex number literals are deprecated");
966             range.popFront();
967             if (type == tok!"longLiteral" || type == tok!"ulongLiteral")
968                 type = tok!"idoubleLiteral";
969             else
970                 type = tok!"ifloatLiteral";
971         }
972     }
973 
974     void lexFloatSuffix(ref IdType type) pure nothrow @safe
975     {
976         switch (range.bytes[range.index])
977         {
978         case 'L':
979             range.popFront();
980             type = tok!"doubleLiteral";
981             break;
982         case 'f':
983         case 'F':
984             range.popFront();
985             type = tok!"floatLiteral";
986             break;
987         default:
988             break;
989         }
990         if (range.index < range.bytes.length && range.bytes[range.index] == 'i')
991         {
992             warning("Complex number literals are deprecated");
993             range.popFront();
994             if (type == tok!"floatLiteral")
995                 type = tok!"ifloatLiteral";
996             else
997                 type = tok!"idoubleLiteral";
998         }
999     }
1000 
1001     void lexExponent(ref IdType type) pure nothrow @safe
1002     {
1003         range.popFront();
1004         bool foundSign = false;
1005         bool foundDigit = false;
1006         while (range.index < range.bytes.length)
1007         {
1008             switch (range.bytes[range.index])
1009             {
1010             case '-':
1011             case '+':
1012                 if (foundSign)
1013                 {
1014                     if (!foundDigit)
1015                     error("Expected an exponent");
1016                     return;
1017                 }
1018                 foundSign = true;
1019                 range.popFront();
1020                 break;
1021             case '0': .. case '9':
1022             case '_':
1023                 foundDigit = true;
1024                 range.popFront();
1025                 break;
1026             case 'L':
1027             case 'f':
1028             case 'F':
1029             case 'i':
1030                 lexFloatSuffix(type);
1031                 return;
1032             default:
1033                 if (!foundDigit)
1034                     error("Expected an exponent");
1035                 return;
1036             }
1037         }
1038     }
1039 
1040     void lexScriptLine(ref Token token)
1041     {
1042         mixin (tokenStart);
1043         while (!(range.index >= range.bytes.length) && !isNewline)
1044         {
1045             range.popFront();
1046         }
1047         token = Token(tok!"scriptLine", cache.intern(range.slice(mark)),
1048             line, column, index);
1049     }
1050 
1051     void lexSpecialTokenSequence(ref Token token)
1052     {
1053         mixin (tokenStart);
1054         while (!(range.index >= range.bytes.length) && !isNewline)
1055         {
1056             range.popFront();
1057         }
1058         token = Token(tok!"specialTokenSequence", cache.intern(range.slice(mark)),
1059             line, column, index);
1060     }
1061 
1062     void lexSlashStarComment(ref Token token) @trusted
1063     {
1064         mixin (tokenStart);
1065         IdType type = tok!"comment";
1066         range.popFrontN(2);
1067         while (range.index < range.bytes.length)
1068         {
1069             version (iasm64NotWindows)
1070             {
1071                 if (haveSSE42 && range.index + 16 < range.bytes.length)
1072                     skip!(false, '\r', '\n', '/', '*', 0xe2)(range.bytes.ptr + range.index,
1073                         &range.index, &range.column);
1074             }
1075             if (range.bytes[range.index] == '*')
1076             {
1077                 range.popFront();
1078                 if (!(range.index >= range.bytes.length) && range.bytes[range.index] == '/')
1079                 {
1080                     range.popFront();
1081                     break;
1082                 }
1083             }
1084             else
1085                 popFrontWhitespaceAware();
1086         }
1087         if (config.commentBehavior == CommentBehavior.intern)
1088             token = Token(type, cache.intern(range.slice(mark)), line, column, index);
1089         else
1090             token = Token(type, cast(string) range.slice(mark), line, column, index);
1091     }
1092 
1093     void lexSlashSlashComment(ref Token token) @trusted
1094     {
1095         mixin (tokenStart);
1096         IdType type = tok!"comment";
1097         range.popFrontN(2);
1098         while (range.index < range.bytes.length)
1099         {
1100             version (iasm64NotWindows)
1101             {
1102                 if (haveSSE42 && range.index + 16 < range.bytes.length)
1103                 {
1104                     skip!(false, '\r', '\n', 0xe2)(range.bytes.ptr + range.index,
1105                         &range.index, &range.column);
1106                 }
1107             }
1108             if (range.bytes[range.index] == '\r' || range.bytes[range.index] == '\n')
1109                 break;
1110             range.popFront();
1111         }
1112         if (config.commentBehavior == CommentBehavior.intern)
1113             token = Token(type, cache.intern(range.slice(mark)), line, column, index);
1114         else
1115             token = Token(type, cast(string) range.slice(mark), line, column, index);
1116     }
1117 
1118     void lexSlashPlusComment(ref Token token) @trusted
1119     {
1120         mixin (tokenStart);
1121         IdType type = tok!"comment";
1122         range.index += 2;
1123         range.column += 2;
1124         int depth = 1;
1125         while (depth > 0 && !(range.index >= range.bytes.length))
1126         {
1127             version (iasm64NotWindows)
1128             {
1129                 if (haveSSE42 && range.index + 16 < range.bytes.length)
1130                 {
1131                     skip!(false, '+', '/', '\\', '\r', '\n', 0xe2)(range.bytes.ptr + range.index,
1132                         &range.index, &range.column);
1133                 }
1134             }
1135             if (range.bytes[range.index] == '+')
1136             {
1137                 range.popFront();
1138                 if (!(range.index >= range.bytes.length) && range.bytes[range.index] == '/')
1139                 {
1140                     range.popFront();
1141                     depth--;
1142                 }
1143             }
1144             else if (range.bytes[range.index] == '/')
1145             {
1146                 range.popFront();
1147                 if (!(range.index >= range.bytes.length) && range.bytes[range.index] == '+')
1148                 {
1149                     range.popFront();
1150                     depth++;
1151                 }
1152             }
1153             else
1154                 popFrontWhitespaceAware();
1155         }
1156         if (config.commentBehavior == CommentBehavior.intern)
1157             token = Token(type, cache.intern(range.slice(mark)), line, column, index);
1158         else
1159             token = Token(type, cast(string) range.slice(mark), line, column, index);
1160     }
1161 
1162     void lexStringLiteral(ref Token token) @trusted
1163     {
1164         mixin (tokenStart);
1165         range.popFront();
1166         while (true)
1167         {
1168             if (range.index >= range.bytes.length)
1169             {
1170                 error("Error: unterminated string literal");
1171                 token = Token(tok!"");
1172                 return;
1173             }
1174             version (iasm64NotWindows)
1175             {
1176                 if (haveSSE42 && range.index + 16 < range.bytes.length)
1177                 {
1178                     skip!(false, '"', '\\', '\r', '\n', 0xe2)(range.bytes.ptr + range.index,
1179                         &range.index, &range.column);
1180                 }
1181             }
1182             if (range.bytes[range.index] == '"')
1183             {
1184                 range.popFront();
1185                 break;
1186             }
1187             else if (range.bytes[range.index] == '\\')
1188             {
1189                 lexEscapeSequence();
1190             }
1191             else
1192                 popFrontWhitespaceAware();
1193         }
1194         IdType type = tok!"stringLiteral";
1195         lexStringSuffix(type);
1196         token = Token(type, cache.intern(range.slice(mark)), line, column,
1197             index);
1198     }
1199 
1200     void lexWysiwygString(ref Token token) @trusted
1201     {
1202         mixin (tokenStart);
1203         IdType type = tok!"stringLiteral";
1204         immutable bool backtick = range.bytes[range.index] == '`';
1205         if (backtick)
1206         {
1207             range.popFront();
1208             while (true)
1209             {
1210                 if (range.index >= range.bytes.length)
1211                 {
1212                     error("Error: unterminated string literal");
1213                     token = Token(tok!"");
1214                     return;
1215                 }
1216                 version (iasm64NotWindows)
1217                 {
1218                     if (haveSSE42 && range.index + 16 < range.bytes.length)
1219                     {
1220                         skip!(false, '\r', '\n', 0xe2, '`')(range.bytes.ptr + range.index,
1221                             &range.index, &range.column);
1222                     }
1223                 }
1224                 if (range.bytes[range.index] == '`')
1225                 {
1226                     range.popFront();
1227                     break;
1228                 }
1229                 else
1230                     popFrontWhitespaceAware();
1231             }
1232         }
1233         else
1234         {
1235             range.popFront();
1236             if (range.index >= range.bytes.length)
1237             {
1238                 error("Error: unterminated string literal");
1239                 token = Token(tok!"");
1240                 return;
1241             }
1242             range.popFront();
1243             while (true)
1244             {
1245                 if (range.index >= range.bytes.length)
1246                 {
1247                     error("Error: unterminated string literal");
1248                     token = Token(tok!"");
1249                     return;
1250                 }
1251                 else if (range.bytes[range.index] == '"')
1252                 {
1253                     range.popFront();
1254                     break;
1255                 }
1256                 else
1257                     popFrontWhitespaceAware();
1258             }
1259         }
1260         lexStringSuffix(type);
1261         token = Token(type, cache.intern(range.slice(mark)), line, column,
1262             index);
1263     }
1264 
1265     private ubyte lexStringSuffix(ref IdType type) pure nothrow @safe
1266     {
1267         if (range.index >= range.bytes.length)
1268         {
1269             type = tok!"stringLiteral";
1270             return 0;
1271         }
1272         else
1273         {
1274             switch (range.bytes[range.index])
1275             {
1276             case 'w': range.popFront(); type = tok!"wstringLiteral"; return 'w';
1277             case 'd': range.popFront(); type = tok!"dstringLiteral"; return 'd';
1278             case 'c': range.popFront(); type = tok!"stringLiteral"; return 'c';
1279             default: type = tok!"stringLiteral"; return 0;
1280             }
1281         }
1282     }
1283 
1284     void lexDelimitedString(ref Token token)
1285     {
1286         mixin (tokenStart);
1287         range.index += 2;
1288         range.column += 2;
1289         ubyte open;
1290         ubyte close;
1291         switch (range.bytes[range.index])
1292         {
1293         case '<':
1294             open = '<';
1295             close = '>';
1296             range.popFront();
1297             lexNormalDelimitedString(token, mark, line, column, index, open, close);
1298             break;
1299         case '{':
1300             open = '{';
1301             close = '}';
1302             range.popFront();
1303             lexNormalDelimitedString(token, mark, line, column, index, open, close);
1304             break;
1305         case '[':
1306             open = '[';
1307             close = ']';
1308             range.popFront();
1309             lexNormalDelimitedString(token, mark, line, column, index, open, close);
1310             break;
1311         case '(':
1312             open = '(';
1313             close = ')';
1314             range.popFront();
1315             lexNormalDelimitedString(token, mark, line, column, index, open, close);
1316             break;
1317         default:
1318             lexHeredocString(token, mark, line, column, index);
1319             break;
1320         }
1321     }
1322 
1323     void lexNormalDelimitedString(ref Token token, size_t mark, size_t line, size_t column,
1324         size_t index, ubyte open, ubyte close)
1325     {
1326         int depth = 1;
1327         while (!(range.index >= range.bytes.length) && depth > 0)
1328         {
1329             if (range.bytes[range.index] == open)
1330             {
1331                 depth++;
1332                 range.popFront();
1333             }
1334             else if (range.bytes[range.index] == close)
1335             {
1336                 depth--;
1337                 range.popFront();
1338                 if (depth <= 0)
1339                 {
1340                     if (range.bytes[range.index] == '"')
1341                     {
1342                         range.popFront();
1343                     }
1344                     else
1345                     {
1346                         error("Error: `\"` expected to end delimited string literal");
1347                         token = Token(tok!"");
1348                         return;
1349                     }
1350                 }
1351             }
1352             else
1353                 popFrontWhitespaceAware();
1354         }
1355         IdType type = tok!"stringLiteral";
1356         lexStringSuffix(type);
1357         token = Token(type, cache.intern(range.slice(mark)), line, column, index);
1358     }
1359 
1360     void lexHeredocString(ref Token token, size_t mark, size_t line, size_t column, size_t index)
1361     {
1362         Token ident;
1363         lexIdentifier(ident);
1364         if (isNewline())
1365             popFrontWhitespaceAware();
1366         else
1367             error("Newline expected");
1368         while (!(range.index >= range.bytes.length))
1369         {
1370             if (isNewline())
1371             {
1372                 popFrontWhitespaceAware();
1373                 if (!range.canPeek(ident.text.length))
1374                 {
1375                     error(ident.text ~ " expected");
1376                     break;
1377                 }
1378                 if (range.peek(ident.text.length - 1) == ident.text)
1379                 {
1380                     range.popFrontN(ident.text.length);
1381                     break;
1382                 }
1383             }
1384             else
1385             {
1386                 range.popFront();
1387             }
1388         }
1389         if (!(range.index >= range.bytes.length) && range.bytes[range.index] == '"')
1390         {
1391             range.popFront();
1392         }
1393         else
1394             error("`\"` expected");
1395         IdType type = tok!"stringLiteral";
1396         lexStringSuffix(type);
1397         token = Token(type, cache.intern(range.slice(mark)), line, column, index);
1398     }
1399 
1400     void lexTokenString(ref Token token)
1401     {
1402         mixin (tokenStart);
1403         assert (range.bytes[range.index] == 'q');
1404         range.popFront();
1405         assert (range.bytes[range.index] == '{');
1406         range.popFront();
1407         auto app = appender!string();
1408         app.put("q{");
1409         int depth = 1;
1410 
1411         immutable WhitespaceBehavior oldWhitespace = config.whitespaceBehavior;
1412         immutable StringBehavior oldString = config.stringBehavior;
1413         config.whitespaceBehavior = WhitespaceBehavior.include;
1414         config.stringBehavior = StringBehavior.source;
1415         scope (exit)
1416         {
1417             config.whitespaceBehavior = oldWhitespace;
1418             config.stringBehavior = oldString;
1419         }
1420 
1421         advance(_front);
1422         while (depth > 0 && !empty)
1423         {
1424             auto t = front();
1425             if (t.text is null)
1426                 app.put(str(t.type));
1427             else
1428                 app.put(t.text);
1429             if (t.type == tok!"}")
1430             {
1431                 depth--;
1432                 if (depth > 0)
1433                 popFront();
1434             }
1435             else if (t.type == tok!"{")
1436             {
1437                 depth++;
1438                 popFront();
1439             }
1440             else
1441                 popFront();
1442         }
1443         IdType type = tok!"stringLiteral";
1444         auto b = lexStringSuffix(type);
1445         if (b != 0)
1446             app.put(b);
1447         token = Token(type, cache.intern(cast(const(ubyte)[]) app.data), line,
1448             column, index);
1449     }
1450 
1451     void lexHexString(ref Token token)
1452     {
1453         mixin (tokenStart);
1454         range.index += 2;
1455         range.column += 2;
1456 
1457         loop: while (true)
1458         {
1459             if (range.index >= range.bytes.length)
1460             {
1461                 error("Error: unterminated hex string literal");
1462                 token = Token(tok!"");
1463                 return;
1464             }
1465             else if (isWhitespace())
1466                 popFrontWhitespaceAware();
1467             else switch (range.bytes[range.index])
1468             {
1469             case '0': .. case '9':
1470             case 'A': .. case 'F':
1471             case 'a': .. case 'f':
1472                 range.popFront();
1473                 break;
1474             case '"':
1475                 range.popFront();
1476                 break loop;
1477             default:
1478                 error("Error: invalid character in hex string");
1479                 token = Token(tok!"");
1480                 return;
1481             }
1482         }
1483 
1484         IdType type = tok!"stringLiteral";
1485         lexStringSuffix(type);
1486         token = Token(type, cache.intern(range.slice(mark)), line, column,
1487             index);
1488     }
1489 
1490     bool lexEscapeSequence()
1491     {
1492         range.popFront();
1493         if (range.index >= range.bytes.length)
1494         {
1495             error("Error: non-terminated character escape sequence.");
1496             return false;
1497         }
1498         switch (range.bytes[range.index])
1499         {
1500         case '\'':
1501         case '"':
1502         case '?':
1503         case '\\':
1504         case 'a':
1505         case 'b':
1506         case 'f':
1507         case 'n':
1508         case 'r':
1509         case 't':
1510         case 'v':
1511             range.popFront();
1512             break;
1513         case 'x':
1514             range.popFront();
1515             foreach (i; 0 .. 2)
1516             {
1517                 if (range.index >= range.bytes.length)
1518                 {
1519                     error("Error: 2 hex digits expected.");
1520                     return false;
1521                 }
1522                 switch (range.bytes[range.index])
1523                 {
1524                 case '0': .. case '9':
1525                 case 'a': .. case 'f':
1526                 case 'A': .. case 'F':
1527                     range.popFront();
1528                     break;
1529                 default:
1530                     error("Error: 2 hex digits expected.");
1531                     return false;
1532                 }
1533             }
1534             break;
1535         case '0':
1536             if (!(range.index + 1 < range.bytes.length)
1537                 || ((range.index + 1 < range.bytes.length) && range.peekAt(1) == '\''))
1538             {
1539                 range.popFront();
1540                 break;
1541             }
1542             goto case;
1543         case '1': .. case '7':
1544             for (size_t i = 0; i < 3 && !(range.index >= range.bytes.length)
1545                     && range.bytes[range.index] >= '0' && range.bytes[range.index] <= '7'; i++)
1546                 range.popFront();
1547             break;
1548         case 'u':
1549             range.popFront();
1550             foreach (i; 0 .. 4)
1551             {
1552                 if (range.index >= range.bytes.length)
1553                 {
1554                     error("Error: at least 4 hex digits expected.");
1555                     return false;
1556                 }
1557                 switch (range.bytes[range.index])
1558                 {
1559                 case '0': .. case '9':
1560                 case 'a': .. case 'f':
1561                 case 'A': .. case 'F':
1562                     range.popFront();
1563                     break;
1564                 default:
1565                     error("Error: at least 4 hex digits expected.");
1566                     return false;
1567                 }
1568             }
1569             break;
1570         case 'U':
1571             range.popFront();
1572             foreach (i; 0 .. 8)
1573             {
1574                 if (range.index >= range.bytes.length)
1575                 {
1576                     error("Error: at least 8 hex digits expected.");
1577                     return false;
1578                 }
1579                 switch (range.bytes[range.index])
1580                 {
1581                 case '0': .. case '9':
1582                 case 'a': .. case 'f':
1583                 case 'A': .. case 'F':
1584                     range.popFront();
1585                     break;
1586                 default:
1587                     error("Error: at least 8 hex digits expected.");
1588                     return false;
1589                 }
1590             }
1591             break;
1592         default:
1593             while (true)
1594             {
1595                 if (range.index >= range.bytes.length)
1596                 {
1597                     error("Error: non-terminated character escape sequence.");
1598                     return false;
1599                 }
1600                 if (range.bytes[range.index] == ';')
1601                 {
1602                     range.popFront();
1603                     break;
1604                 }
1605                 else
1606                 {
1607                     range.popFront();
1608                 }
1609             }
1610         }
1611         return true;
1612     }
1613 
1614     void lexCharacterLiteral(ref Token token)
1615     {
1616         mixin (tokenStart);
1617         range.popFront();
1618         if (range.empty)
1619             goto err;
1620         if (range.bytes[range.index] == '\\')
1621             lexEscapeSequence();
1622         else if (range.bytes[range.index] == '\'')
1623         {
1624             range.popFront();
1625             token = Token(tok!"characterLiteral", cache.intern(range.slice(mark)),
1626                 line, column, index);
1627         }
1628         else if (range.bytes[range.index] & 0x80)
1629         {
1630             while (range.bytes[range.index] & 0x80)
1631                 range.popFront();
1632         }
1633         else
1634             popFrontWhitespaceAware();
1635 
1636         if (range.index < range.bytes.length && range.bytes[range.index] == '\'')
1637         {
1638             range.popFront();
1639             token = Token(tok!"characterLiteral", cache.intern(range.slice(mark)),
1640                 line, column, index);
1641         }
1642         else
1643         {
1644     err:
1645             error("Error: Expected `'` to end character literal");
1646             token = Token(tok!"");
1647         }
1648     }
1649 
1650     void lexIdentifier(ref Token token) @trusted
1651     {
1652         mixin (tokenStart);
1653         if (isSeparating(0))
1654         {
1655             error("Invalid identifier");
1656             range.popFront();
1657         }
1658         while (true)
1659         {
1660             version (iasm64NotWindows)
1661             {
1662                 if (haveSSE42 && range.index + 16 < range.bytes.length)
1663                 {
1664                     immutable ulong i = rangeMatch!(false, 'a', 'z', 'A', 'Z', '_', '_')
1665                         (range.bytes.ptr + range.index);
1666                     range.column += i;
1667                     range.index += i;
1668                 }
1669             }
1670             if (isSeparating(0))
1671                 break;
1672             else
1673                 range.popFront();
1674         }
1675         token = Token(tok!"identifier", cache.intern(range.slice(mark)), line,
1676             column, index);
1677     }
1678 
1679     void lexDot(ref Token token)
1680     {
1681         mixin (tokenStart);
1682         if (!(range.index + 1 < range.bytes.length))
1683         {
1684             range.popFront();
1685             token = Token(tok!".", null, line, column, index);
1686             return;
1687         }
1688         switch (range.peekAt(1))
1689         {
1690         case '0': .. case '9':
1691             lexNumber(token);
1692             return;
1693         case '.':
1694             range.popFront();
1695             range.popFront();
1696             if (!(range.index >= range.bytes.length) && range.bytes[range.index] == '.')
1697             {
1698                 range.popFront();
1699                 token = Token(tok!"...", null, line, column, index);
1700             }
1701             else
1702                 token = Token(tok!"..", null, line, column, index);
1703             return;
1704         default:
1705             range.popFront();
1706             token = Token(tok!".", null, line, column, index);
1707             return;
1708         }
1709     }
1710 
1711     void lexLongNewline(ref Token token) @nogc
1712     {
1713         mixin (tokenStart);
1714         range.popFront();
1715         range.popFront();
1716         range.popFront();
1717         range.incrementLine();
1718         string text = config.whitespaceBehavior == WhitespaceBehavior.include
1719             ? cache.intern(range.slice(mark)) : "";
1720         token = Token(tok!"whitespace", text, line,
1721             column, index);
1722     }
1723 
1724     bool isNewline() @nogc
1725     {
1726         if (range.bytes[range.index] == '\n') return true;
1727         if (range.bytes[range.index] == '\r') return true;
1728         return (range.bytes[range.index] & 0x80) && (range.index + 2 < range.bytes.length)
1729             && (range.peek(2) == "\u2028" || range.peek(2) == "\u2029");
1730     }
1731 
1732     bool isSeparating(size_t offset) @nogc
1733     {
1734         enum : ubyte
1735         {
1736             n, y, m // no, yes, maybe
1737         }
1738 
1739         if (range.index + offset >= range.bytes.length)
1740             return true;
1741         auto c = range.bytes[range.index + offset];
1742         static immutable ubyte[256] LOOKUP_TABLE = [
1743             y, y, y, y, y, y, y, y, y, y, y, y, y, y, y, y,
1744             y, y, y, y, y, y, y, y, y, y, y, y, y, y, y, y,
1745             y, y, y, y, y, y, y, y, y, y, y, y, y, y, y, y,
1746             n, n, n, n, n, n, n, n, n, n, y, y, y, y, y, y,
1747             y, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n,
1748             n, n, n, n, n, n, n, n, n, n, n, y, y, y, y, n,
1749             y, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n,
1750             n, n, n, n, n, n, n, n, n, n, n, y, y, y, y, y,
1751             m, m, m, m, m, m, m, m, m, m, m, m, m, m, m, m,
1752             m, m, m, m, m, m, m, m, m, m, m, m, m, m, m, m,
1753             m, m, m, m, m, m, m, m, m, m, m, m, m, m, m, m,
1754             m, m, m, m, m, m, m, m, m, m, m, m, m, m, m, m,
1755             m, m, m, m, m, m, m, m, m, m, m, m, m, m, m, m,
1756             m, m, m, m, m, m, m, m, m, m, m, m, m, m, m, m,
1757             m, m, m, m, m, m, m, m, m, m, m, m, m, m, m, m,
1758             m, m, m, m, m, m, m, m, m, m, m, m, m, m, m, m
1759         ];
1760         immutable ubyte result = LOOKUP_TABLE[c];
1761         if (result == n)
1762             return false;
1763         if (result == y)
1764             return true;
1765         if (result == m)
1766         {
1767             auto r = range;
1768             range.popFrontN(offset);
1769             return (r.canPeek(2) && (r.peek(2) == "\u2028"
1770                 || r.peek(2) == "\u2029"));
1771         }
1772         assert (false);
1773     }
1774 
1775 
1776 
1777     enum tokenStart = q{
1778         size_t index = range.index;
1779         size_t column = range.column;
1780         size_t line = range.line;
1781         auto mark = range.mark();
1782     };
1783 
1784     void error(string message)
1785     {
1786         messages ~= Message(range.line, range.column, message, true);
1787     }
1788 
1789     void warning(string message)
1790     {
1791         messages ~= Message(range.line, range.column, message, false);
1792         assert (messages.length > 0);
1793     }
1794 
1795     static struct Message
1796     {
1797         size_t line;
1798         size_t column;
1799         string message;
1800         bool isError;
1801     }
1802 
1803     Message[] messages;
1804     StringCache* cache;
1805     LexerConfig config;
1806     bool haveSSE42;
1807 }
1808 
1809 /// copy from phobos b/c we need to build on older versions of dmd
1810 /// Returns : the next power of two from a given value
1811 private static size_t nextPow2(size_t value)
1812 {
1813     import core.bitop : bsr;
1814     return 1 << bsr(value) + 1;
1815 }
1816 
1817 /**
1818  * Creates a token range from the given source code. Creates a default lexer
1819  * configuration and a GC-managed string cache.
1820  */
1821 public auto byToken(R)(R range)
1822 if (is(Unqual!(ElementEncodingType!R) : ubyte) && isDynamicArray!R)
1823 {
1824     uint bc = cast(uint)((range.length > 2^^31UL) ? 2^^31
1825         : nextPow2(1 + range.length / 32));
1826     LexerConfig config;
1827     StringCache* cache = new StringCache(bc);
1828     return DLexer(range, config, cache);
1829 }
1830 
1831 /**
1832  * Creates a token range from the given source code. Uses the given string
1833  * cache.
1834  */
1835 public auto byToken(R)(R range, StringCache* cache)
1836 if (is(Unqual!(ElementEncodingType!R) : ubyte) && isDynamicArray!R)
1837 {
1838     LexerConfig config;
1839     return DLexer(range, config, cache);
1840 }
1841 
1842 /**
1843  * Creates a token range from the given source code. Uses the provided lexer
1844  * configuration and string cache.
1845  */
1846 public auto byToken(R)(R range, const LexerConfig config, StringCache* cache)
1847 if (is(Unqual!(ElementEncodingType!R) : ubyte) && isDynamicArray!R)
1848 {
1849     return DLexer(range, config, cache);
1850 }
1851 
1852 /**
1853  * Removes "decoration" such as leading whitespace, leading + and * characters,
1854  * and places the result into the given output range
1855  */
1856 public void unDecorateComment(T)(string comment, auto ref T outputRange)
1857     if (isOutputRange!(T, string))
1858 in
1859 {
1860     assert (comment.length >= 3);
1861 }
1862 body
1863 {
1864     import std.string : lineSplitter, stripRight;
1865 
1866     static void adjustBeginningAndEnd(string s, ref size_t a, ref size_t b) pure nothrow @nogc @safe
1867     {
1868         immutable char c = s[1];
1869         while (a < b && s[a] == c) a++;
1870         while (b > a && s[b] == c) b--;
1871         b++;
1872     }
1873 
1874     string leadingChars;
1875     size_t i = 3;
1876     size_t j;
1877     bool hasOutput = false;
1878     bool lastWasBlank = false;
1879     switch (comment[0 .. 3])
1880     {
1881     case "///":
1882         j = comment.length;
1883 
1884         foreach (line; lineSplitter(comment))
1885         {
1886             auto l = line[3 .. $];
1887             if (leadingChars.empty)
1888             {
1889                 size_t k = 0;
1890                 while (k < l.length && (l[k] == ' ' || l[k] == '\t')) k++;
1891                 leadingChars = l[0 .. k];
1892             }
1893             immutable string stripped = l.stripRight();
1894             if (hasOutput)
1895                 outputRange.put('\n');
1896             else
1897                 hasOutput = true;
1898             if (stripped.length >= leadingChars.length && stripped.startsWith(leadingChars))
1899                 outputRange.put(stripped[leadingChars.length .. $]);
1900             else
1901                 outputRange.put(stripped);
1902         }
1903         break;
1904     case "/++":
1905     case "/**":
1906         if (comment.length == 3)
1907         {
1908             comment = "";
1909             goto default;
1910         }
1911         j = comment.length - 2;
1912         // Skip beginning and ending stars and plusses
1913         adjustBeginningAndEnd(comment, i, j);
1914         foreach (line; lineSplitter(comment[i .. j]))
1915         {
1916             immutable string stripped = line.stripRight();
1917             if (leadingChars.empty)
1918             {
1919                 size_t k = 0;
1920                 while (k < line.length && (line[k] == ' ' || line[k] == '\t')) k++;
1921                 if (k < line.length && line[k] == comment[1])
1922                 {
1923                     k++;
1924                     while (k < line.length && (line[k] == ' ' || line[k] == '\t')) k++;
1925                 }
1926                 if (k == stripped.length)
1927                     continue;
1928                 leadingChars = line[0 .. k];
1929             }
1930 
1931             if (stripped.startsWith(leadingChars))
1932             {
1933                 if (stripped.length > leadingChars.length)
1934                 {
1935                     if (hasOutput)
1936                         outputRange.put('\n');
1937                     hasOutput = true;
1938                     if (lastWasBlank)
1939                         outputRange.put('\n');
1940                     lastWasBlank = false;
1941                     outputRange.put(stripped[leadingChars.length .. $]);
1942                 }
1943             }
1944             else if (hasOutput && stripped.length == leadingChars.stripRight().length)
1945                 lastWasBlank = true;
1946             else if (!stripped.empty && !leadingChars.startsWith(stripped))
1947             {
1948                 if (hasOutput)
1949                     outputRange.put('\n');
1950                 hasOutput = true;
1951                 if (lastWasBlank)
1952                     outputRange.put('\n');
1953                 lastWasBlank = false;
1954                 outputRange.put(stripped);
1955             }
1956             else
1957                 lastWasBlank = false;
1958         }
1959         break;
1960     default:
1961         outputRange.put(comment);
1962         break;
1963     }
1964 }
1965 
1966 ///
1967 unittest
1968 {
1969     import std.array:array, appender;
1970     import std.stdio:stderr;
1971     stderr.writeln("Running unittest for unDecorateComment...");
1972 
1973 
1974     string[] inputs = [
1975         "/***************\n*******************/",
1976         "/***************\n *\n ******************/",
1977         "/**\n*/",
1978         "/** */",
1979         "/***/",
1980         "/** abcde */",
1981         "/// abcde\n/// abcde",
1982         "/**\n * stuff\n */",
1983         "/**\n *\n * stuff\n */",
1984         "/**\n *\n * stuff\n *\n */",
1985         "/**\n *\n * stuff\n *\n*/",
1986         "/**\n *  abcde\n *    abcde \n */",
1987         "/**\n * abcde\n *\n * abcde\n */",
1988     ];
1989     string[] outputs = [
1990         "",
1991         "",
1992         "",
1993         "",
1994         "",
1995         "abcde",
1996         "abcde\nabcde",
1997         "stuff",
1998         "stuff",
1999         "stuff",
2000         "stuff",
2001         "abcde\n  abcde",
2002         "abcde\n\nabcde"
2003     ];
2004     assert(inputs.length == outputs.length);
2005     foreach (pair; zip(inputs, outputs))
2006     {
2007         foreach (b; [true, false])
2008         {
2009             auto app = appender!string();
2010             unDecorateComment(b ? pair[0] : pair[0].replace("*", "+"), app);
2011             assert(pair[1] == app.data, "[[" ~ pair[0] ~ "]] => [[" ~ app.data ~ "]]");
2012         }
2013     }
2014     stderr.writeln("Unittest for unDecorateComment passed.");
2015 }
2016 
2017 
2018 /**
2019  * The string cache is used for string interning.
2020  *
2021  * It will only store a single copy of any string that it is asked to hold.
2022  * Interned strings can be compared for equality by comparing their $(B .ptr)
2023  * field.
2024  *
2025  * Default and postbilt constructors are disabled. When a StringCache goes out
2026  * of scope, the memory held by it is freed.
2027  *
2028  * See_also: $(LINK http://en.wikipedia.org/wiki/String_interning)
2029  */
2030 struct StringCache
2031 {
2032 public pure nothrow @nogc:
2033 
2034     @disable this();
2035     @disable this(this);
2036 
2037     /**
2038      * Params: bucketCount = the initial number of buckets. Must be a
2039      * power of two
2040      */
2041     this(size_t bucketCount) nothrow @trusted @nogc
2042     in
2043     {
2044         import core.bitop : popcnt;
2045         static if (size_t.sizeof == 8)
2046         {
2047             immutable low = popcnt(cast(uint) bucketCount);
2048             immutable high = popcnt(cast(uint) (bucketCount >> 32));
2049             assert ((low == 0 && high == 1) || (low == 1 && high == 0));
2050         }
2051         else
2052         {
2053             static assert (size_t.sizeof == 4);
2054             assert (popcnt(cast(uint) bucketCount) == 1);
2055         }
2056     }
2057     body
2058     {
2059         buckets = (cast(Node**) calloc((Node*).sizeof, bucketCount))[0 .. bucketCount];
2060     }
2061 
2062     ~this()
2063     {
2064         Block* current = rootBlock;
2065         while (current !is null)
2066         {
2067             Block* prev = current;
2068             current = current.next;
2069             free(cast(void*) prev);
2070         }
2071         foreach (nodePointer; buckets)
2072         {
2073             Node* currentNode = nodePointer;
2074             while (currentNode !is null)
2075             {
2076                 if (currentNode.mallocated)
2077                     free(currentNode.str.ptr);
2078                 Node* prev = currentNode;
2079                 currentNode = currentNode.next;
2080                 free(prev);
2081             }
2082         }
2083         rootBlock = null;
2084         free(buckets.ptr);
2085         buckets = null;
2086     }
2087 
2088     /**
2089      * Caches a string.
2090      */
2091     string intern(const(ubyte)[] str) @safe
2092     {
2093         if (str is null || str.length == 0)
2094             return "";
2095         return _intern(str);
2096     }
2097 
2098     /**
2099      * ditto
2100      */
2101     string intern(string str) @trusted
2102     {
2103         return intern(cast(ubyte[]) str);
2104     }
2105 
2106     /**
2107      * The default bucket count for the string cache.
2108      */
2109     static enum defaultBucketCount = 4096;
2110 
2111 private:
2112 
2113     string _intern(const(ubyte)[] bytes) @trusted
2114     {
2115         immutable uint hash = hashBytes(bytes);
2116         immutable size_t index = hash & (buckets.length - 1);
2117         Node* s = find(bytes, hash);
2118         if (s !is null)
2119             return cast(string) s.str;
2120         ubyte[] mem = void;
2121         bool mallocated = bytes.length > BIG_STRING;
2122         if (mallocated)
2123             mem = (cast(ubyte*) malloc(bytes.length))[0 .. bytes.length];
2124         else
2125             mem = allocate(bytes.length);
2126         mem[] = bytes[];
2127         Node* node = cast(Node*) malloc(Node.sizeof);
2128         node.str = mem;
2129         node.hash = hash;
2130         node.next = buckets[index];
2131         node.mallocated = mallocated;
2132         buckets[index] = node;
2133         return cast(string) mem;
2134     }
2135 
2136     Node* find(const(ubyte)[] bytes, uint hash) @trusted
2137     {
2138         import std.algorithm : equal;
2139         immutable size_t index = hash & (buckets.length - 1);
2140         Node* node = buckets[index];
2141         while (node !is null)
2142         {
2143             if (node.hash == hash && bytes == cast(ubyte[]) node.str)
2144                 return node;
2145             node = node.next;
2146         }
2147         return node;
2148     }
2149 
2150     static uint hashBytes(const(ubyte)[] data) pure nothrow @trusted @nogc
2151     in
2152     {
2153         assert (data !is null);
2154         assert (data.length > 0);
2155     }
2156     body
2157     {
2158         immutable uint m = 0x5bd1e995;
2159         immutable int r = 24;
2160         uint h = cast(uint) data.length;
2161         while (data.length >= 4)
2162         {
2163             uint k = (cast(ubyte) data[3]) << 24
2164                 | (cast(ubyte) data[2]) << 16
2165                 | (cast(ubyte) data[1]) << 8
2166                 | (cast(ubyte) data[0]);
2167             k *= m;
2168             k ^= k >> r;
2169             k *= m;
2170             h *= m;
2171             h ^= k;
2172             data = data[4 .. $];
2173         }
2174         switch (data.length & 3)
2175         {
2176         case 3:
2177             h ^= data[2] << 16;
2178             goto case;
2179         case 2:
2180             h ^= data[1] << 8;
2181             goto case;
2182         case 1:
2183             h ^= data[0];
2184             h *= m;
2185             break;
2186         default:
2187             break;
2188         }
2189         h ^= h >> 13;
2190         h *= m;
2191         h ^= h >> 15;
2192         return h;
2193     }
2194 
2195     ubyte[] allocate(size_t numBytes) pure nothrow @trusted @nogc
2196     in
2197     {
2198         assert (numBytes != 0);
2199     }
2200     out (result)
2201     {
2202         assert (result.length == numBytes);
2203     }
2204     body
2205     {
2206         Block* r = rootBlock;
2207         size_t i = 0;
2208         while  (i <= 3 && r !is null)
2209         {
2210             immutable size_t available = r.bytes.length;
2211             immutable size_t oldUsed = r.used;
2212             immutable size_t newUsed = oldUsed + numBytes;
2213             if (newUsed <= available)
2214             {
2215                 r.used = newUsed;
2216                 return r.bytes[oldUsed .. newUsed];
2217             }
2218             i++;
2219             r = r.next;
2220         }
2221         Block* b = cast(Block*) calloc(Block.sizeof, 1);
2222         b.used = numBytes;
2223         b.next = rootBlock;
2224         rootBlock = b;
2225         return b.bytes[0 .. numBytes];
2226     }
2227 
2228     static struct Node
2229     {
2230         ubyte[] str = void;
2231         Node* next = void;
2232         uint hash = void;
2233         bool mallocated = void;
2234     }
2235 
2236     static struct Block
2237     {
2238         Block* next;
2239         size_t used;
2240         enum BLOCK_CAPACITY = BLOCK_SIZE - size_t.sizeof - (void*).sizeof;
2241         ubyte[BLOCK_CAPACITY] bytes;
2242     }
2243 
2244     static assert (BLOCK_SIZE == Block.sizeof);
2245 
2246     enum BLOCK_SIZE = 1024 * 16;
2247 
2248     // If a string would take up more than 1/4 of a block, allocate it outside
2249     // of the block.
2250     enum BIG_STRING = BLOCK_SIZE / 4;
2251 
2252     Node*[] buckets;
2253     Block* rootBlock;
2254 }
2255 
2256 private extern(C) void* calloc(size_t, size_t) nothrow pure @nogc @trusted;
2257 private extern(C) void* malloc(size_t) nothrow pure @nogc @trusted;
2258 private extern(C) void free(void*) nothrow pure @nogc @trusted;
2259 
2260 unittest
2261 {
2262     auto source = cast(ubyte[]) q{ import std.stdio;}};
2263     auto tokens = getTokensForParser(source, LexerConfig(),
2264         new StringCache(StringCache.defaultBucketCount));
2265     assert (tokens.map!"a.type"().equal([tok!"import", tok!"identifier", tok!".",
2266         tok!"identifier", tok!";"]));
2267 }
2268 
2269 /// Test \x char sequence
2270 unittest
2271 {
2272     auto toks = (string s) => byToken(cast(ubyte[])s);
2273 
2274     // valid
2275     immutable hex = ['0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f','A','B','C','D','E','F'];
2276     auto source = "";
2277     foreach (h1; hex)
2278         foreach (h2; hex)
2279             source ~= "'\\x" ~ h1 ~ h2 ~ "'";
2280     assert (toks(source).filter!(t => t.type != tok!"characterLiteral").empty);
2281 
2282     // invalid
2283     assert (toks(`'\x'`).messages[0] == DLexer.Message(1,4,"Error: 2 hex digits expected.",true));
2284     assert (toks(`'\x_'`).messages[0] == DLexer.Message(1,4,"Error: 2 hex digits expected.",true));
2285     assert (toks(`'\xA'`).messages[0] == DLexer.Message(1,5,"Error: 2 hex digits expected.",true));
2286     assert (toks(`'\xAY'`).messages[0] == DLexer.Message(1,5,"Error: 2 hex digits expected.",true));
2287     assert (toks(`'\xXX'`).messages[0] == DLexer.Message(1,4,"Error: 2 hex digits expected.",true));
2288 }
2289 
2290 version (iasm64NotWindows)
2291 {
2292     /**
2293      * Returns:
2294      */
2295     ushort newlineMask(const ubyte*) pure nothrow @trusted @nogc
2296     {
2297         asm pure nothrow @nogc
2298         {
2299             naked;
2300             movdqu XMM1, [RDI];
2301             mov RAX, 3;
2302             mov RDX, 16;
2303             mov R8, 0x0d0d0d0d0d0d0d0dL;
2304             movq XMM2, R8;
2305             shufpd XMM2, XMM2, 0;
2306             pcmpeqb XMM2, XMM1;
2307             mov R9, 0x0a0a0a0a0a0a0a0aL;
2308             movq XMM3, R9;
2309             shufpd XMM3, XMM3, 0;
2310             pcmpeqb XMM3, XMM1;
2311             mov R10, 0xe280a8L;
2312             movq XMM4, R10;
2313             pcmpestrm XMM4, XMM1, 0b01001100;
2314             movdqa XMM4, XMM0;
2315             mov R11, 0xe280a9L;
2316             movq XMM5, R11;
2317             pcmpestrm XMM5, XMM1, 0b01001100;
2318             movdqa XMM5, XMM0;
2319             mov RCX, 0x0a0d;
2320             dec RAX;
2321             movq XMM6, RCX;
2322             pcmpestrm XMM6, XMM1, 0b01001100;
2323             movdqa XMM6, XMM0;
2324             movdqa XMM7, XMM6;
2325             pslldq XMM7, 1;
2326             movdqa XMM0, XMM4;
2327             por XMM0, XMM5;
2328             por XMM7, XMM6;
2329             movdqa XMM1, XMM2;
2330             por XMM1, XMM3;
2331             pxor XMM7, XMM1;
2332             por XMM7, XMM0;
2333             por XMM7, XMM6;
2334             pmovmskb RAX, XMM7;
2335             and RAX, 0b0011_1111_1111_1111;
2336             ret;
2337         }
2338     }
2339 
2340     /**
2341      * Skips between 0 and 16 bytes that match (or do not match) one of the
2342      * given $(B chars).
2343      */
2344     void skip(bool matching, chars...)(const ubyte*, ulong*, ulong*) pure nothrow
2345         @trusted @nogc if (chars.length <= 8)
2346     {
2347         enum constant = ByteCombine!chars;
2348         enum charsLength = chars.length;
2349         static if (matching)
2350             enum flags = 0b0001_0000;
2351         else
2352             enum flags = 0b0000_0000;
2353         asm pure nothrow @nogc
2354         {
2355             naked;
2356             movdqu XMM1, [RDX];
2357             mov R10, constant;
2358             movq XMM2, R10;
2359             mov RAX, charsLength;
2360             mov RDX, 16;
2361             pcmpestri XMM2, XMM1, flags;
2362             add [RSI], RCX;
2363             add [RDI], RCX;
2364             ret;
2365         }
2366     }
2367 
2368     /**
2369      * Returns: the number of bytes starting at the given location that match
2370      *     (or do not match if $(B invert) is true) the byte ranges in $(B chars).
2371      */
2372     ulong rangeMatch(bool invert, chars...)(const ubyte*) pure nothrow @trusted @nogc
2373     {
2374         static assert (chars.length % 2 == 0);
2375         enum constant = ByteCombine!chars;
2376         static if (invert)
2377             enum rangeMatchFlags = 0b0000_0100;
2378         else
2379             enum rangeMatchFlags = 0b0001_0100;
2380         enum charsLength = chars.length;
2381         asm pure nothrow @nogc
2382         {
2383             naked;
2384             movdqu XMM1, [RDI];
2385             mov R10, constant;
2386             movq XMM2, R10;
2387             mov RAX, charsLength;
2388             mov RDX, 16;
2389             pcmpestri XMM2, XMM1, rangeMatchFlags;
2390             mov RAX, RCX;
2391             ret;
2392         }
2393     }
2394 
2395     template ByteCombine(c...)
2396     {
2397         static assert (c.length <= 8);
2398         static if (c.length > 1)
2399             enum ulong ByteCombine = c[0] | (ByteCombine!(c[1..$]) << 8);
2400         else
2401             enum ulong ByteCombine = c[0];
2402     }
2403 }
2404 
2405 unittest
2406 {
2407     import core.exception : RangeError;
2408     import std.exception : assertNotThrown;
2409 
2410     static immutable src1 = "/++";
2411     static immutable src2 = "/**";
2412 
2413     LexerConfig cf;
2414     StringCache ca = StringCache(16);
2415 
2416     assertNotThrown!RangeError(getTokensForParser(src1, cf, &ca));
2417     assertNotThrown!RangeError(getTokensForParser(src2, cf, &ca));
2418 }