1 /// Utility for unescaping D string literals of any kind
2 module dparse.strings;
3 
4 import std.algorithm;
5 import std.array;
6 import std.ascii : isAlphaNum, isHexDigit, isWhite;
7 import std.conv;
8 import std.range;
9 import std.string;
10 import std.utf;
11 
12 /**
13  * Checks if a string literal input has correct start/end sequences (quotes) to
14  * be any kind of D string literal.
15  *
16  * Bugs: doesn't check for validity of token strings.
17  *
18  * Standards: $(LINK https://dlang.org/spec/lex.html#string_literals)
19  */
20 bool isStringLiteral(const(char)[] literal, out char stringCloseChar,
21 		out bool hasPostfix, out bool parseEscapes, out int prefixLength)
22 {
23 	// there are no 1 character strings
24 	if (literal.length < 2)
25 		return false;
26 
27 	// check for valid start
28 	bool allowPostfix;
29 	switch (literal[0])
30 	{
31 	case 'r': // WysiwygString
32 	case 'x': // HexString
33 		if (literal[1] != '"')
34 			return false;
35 		stringCloseChar = '"';
36 		allowPostfix = true;
37 		prefixLength = 2;
38 		break;
39 	case 'q': // DelimitedString
40 		if (literal[1] == '{')
41 			stringCloseChar = '}';
42 		else if (literal[1] == '"')
43 			stringCloseChar = '"';
44 		else
45 			return false;
46 
47 		allowPostfix = false;
48 		prefixLength = 2;
49 		break;
50 	case '`':
51 	case '"':
52 		stringCloseChar = literal[0];
53 		allowPostfix = true;
54 		parseEscapes = stringCloseChar == '"';
55 		prefixLength = 1;
56 		break;
57 	default:
58 		return false;
59 	}
60 
61 	if (allowPostfix && literal[$ - 1].among!('c', 'w', 'd'))
62 	{
63 		hasPostfix = true;
64 		literal = literal[0 .. $ - 1];
65 	}
66 
67 	if (literal.length <= prefixLength || literal[$ - 1] != stringCloseChar)
68 		return false;
69 
70 	if (parseEscapes)
71 	{
72 		size_t countBackslashes = 0;
73 		foreach_reverse (dchar c; literal[0 .. $ - 1])
74 		{
75 			if (c != '\\')
76 				break;
77 			countBackslashes++;
78 		}
79 
80 		// check if end escapes the quote, making this an invalid string
81 		if ((countBackslashes % 2) != 0)
82 			return false; // uneven backslash count -> invalid end
83 	}
84 
85 	return true;
86 }
87 
88 /// ditto
89 bool isStringLiteral(const(char)[] literal)
90 {
91 	char stringCloseChar;
92 	bool hasPostfix, parseEscapes;
93 	int prefixLength;
94 	return isStringLiteral(literal, stringCloseChar, hasPostfix, parseEscapes,
95 		prefixLength);
96 }
97 
98 ///
99 unittest
100 {
101 	assert(isStringLiteral(`"hello"`));
102 	assert(isStringLiteral(`"ñ"`));
103 	assert(isStringLiteral(`"hello world!"`));
104 	assert(isStringLiteral(`r"hello world!"c`));
105 	assert(isStringLiteral(`r"hello world!"d`));
106 	assert(isStringLiteral(`q{cool}`));
107 	assert(isStringLiteral(`q{cool\}`));
108 	assert(isStringLiteral(`"\\"`));
109 	assert(!isStringLiteral(`"\\\"`));
110 	assert(isStringLiteral(`"\\\\"`));
111 	assert(isStringLiteral(`"a\\\\"`));
112 	assert(!isStringLiteral(`"ñ\"`));
113 	assert(isStringLiteral(`"ñ\\"`));
114 	assert(isStringLiteral(`""`));
115 	assert(isStringLiteral(`q""`));
116 	assert(isStringLiteral(`x""`));
117 	assert(!isStringLiteral(``));
118 	assert(!isStringLiteral(`"`));
119 	assert(!isStringLiteral(`w""`));
120 	assert(!isStringLiteral(`hello"`));
121 	assert(!isStringLiteral(`"hello`));
122 	assert(!isStringLiteral(`"hello world`));
123 	assert(!isStringLiteral(`hello world`));
124 	assert(!isStringLiteral(`r"`));
125 	assert(!isStringLiteral(`rr"ok"`));
126 	assert(!isStringLiteral(`x"`));
127 	assert(!isStringLiteral(`x" `));
128 	assert(!isStringLiteral(`qqqq`));
129 }
130 
131 /// Defines different handler types what to do when invalid escape sequences are
132 /// found inside $(LREF unescapeString).
133 enum InvalidEscapeAction
134 {
135 	/// keep the backslash character as well as the escape characters in the
136 	/// string like in the input string.
137 	keep = 0,
138 	/// Ignore and skip offending characters, drop them from the output. Named
139 	/// character entities are still being included like $(LREF keep) as they
140 	/// are not currently implemented.
141 	skip,
142 	/// Throw a ConvException on invalid escape sequences. Does not throw
143 	/// anything on unknown named character entities as they are not currently
144 	/// implemented but instead treats them like $(LREF keep).
145 	error
146 }
147 
148 /**
149  * Unescapes a D string, effectively being the same as mixing in the string into
150  * some function call, but only for single string literals.
151  *
152  * Strips quotes, prefixes and suffixes, interprets escape sequences in normal
153  * double quoted strings and interprets hex strings. Returns simple slices for
154  * non-escaped strings.
155  *
156  * It's undefined how invalid/malformed strings are evaluated.
157  *
158  * Bugs: doesn't check for validity of token strings, doesn't interpret named
159  * character entity escape sequences, (HTML-kind escape sequences) doesn't check
160  * nesting level of delimited strings.
161  *
162  * Standards: $(LINK https://dlang.org/spec/lex.html#string_literals)
163  */
164 string unescapeString(
165 	InvalidEscapeAction invalidEscapeAction = InvalidEscapeAction.error
166 )(
167 	string input
168 )
169 in
170 {
171 	assert(isStringLiteral(input));
172 }
173 do
174 {
175 	char stringCloseChar;
176 	bool hasPostfix, parseEscapes;
177 	int prefixLength;
178 	isStringLiteral(input, stringCloseChar, hasPostfix, parseEscapes,
179 		prefixLength);
180 
181 	if (hasPostfix)
182 		input = input[0 .. $ - 1];
183 
184 	auto content = input[prefixLength .. $ - 1];
185 
186 	if (!content.length)
187 		return content;
188 
189 	if (input[0] == 'x')
190 	{
191 		// hex string, obsolete but still implemented
192 		return parseHexStringContent!invalidEscapeAction(content);
193 	}
194 	else if (input[0] == 'q' && input[1] == '"')
195 	{
196 		content = content.normalizeNewLines;
197 		if (isIdentifierChar(content[0]))
198 		{
199 			auto ln = content.indexOf('\n');
200 			if (ln == -1)
201 			{
202 				final switch (invalidEscapeAction)
203 				{
204 				case InvalidEscapeAction.keep:
205 					return content;
206 				case InvalidEscapeAction.skip:
207 					return null;
208 				case InvalidEscapeAction.error:
209 					throw new ConvException("Invalid delimited escape string");
210 				}
211 			}
212 			auto delimiter = content[0 .. ln];
213 			content = content[ln + 1 .. $];
214 			if (!content.endsWith(chain("\n", delimiter)))
215 			{
216 				final switch (invalidEscapeAction)
217 				{
218 				case InvalidEscapeAction.keep:
219 					return content;
220 				case InvalidEscapeAction.skip:
221 					auto lastNl = content.lastIndexOf('\n');
222 					if (lastNl == -1)
223 						return content;
224 					else
225 						return content[0 .. lastNl];
226 				case InvalidEscapeAction.error:
227 					throw new ConvException("Delimited escape string not ending correctly");
228 				}
229 			}
230 			return content[0 .. $ - delimiter.length];
231 		}
232 		else
233 		{
234 			char delimiterChar = content[0];
235 			char endChar;
236 			switch (delimiterChar)
237 			{
238 			case '[': endChar = ']'; break;
239 			case '(': endChar = ')'; break;
240 			case '<': endChar = '>'; break;
241 			case '{': endChar = '}'; break;
242 			default: endChar = delimiterChar; break;
243 			}
244 
245 			if (content[1 .. $].endsWith(endChar))
246 				return content[1 .. $ - 1];
247 			else
248 			{
249 				final switch (invalidEscapeAction)
250 				{
251 				case InvalidEscapeAction.keep:
252 					return content;
253 				case InvalidEscapeAction.skip:
254 					return content[1 .. $];
255 				case InvalidEscapeAction.error:
256 					throw new ConvException("Invalid delimited escape string");
257 				}
258 			}
259 		}
260 	}
261 	else
262 	{
263 		if (!parseEscapes)
264 			return content.normalizeNewLines;
265 		else
266 			return unescapeDoubleQuotedContent!invalidEscapeAction(
267 					content.normalizeNewLines);
268 	}
269 }
270 
271 ///
272 unittest
273 {
274 	assert(unescapeString(q{r"I am Oz"}) == r"I am Oz");
275 	assert(unescapeString(q{r"c:\games\Sudoku.exe"}) == r"c:\games\Sudoku.exe");
276 	assert(unescapeString(q{r"ab\n"}) == r"ab\n");
277 
278 	assert(unescapeString(q{`the Great and Powerful.`}) == `the Great and Powerful.`);
279 	assert(unescapeString(q{`c:\games\Empire.exe`}) == `c:\games\Empire.exe`);
280 	assert(unescapeString(q{`The "lazy" dog`}) == `The "lazy" dog`);
281 	assert(unescapeString(q{`a"b\n`}) == `a"b\n`);
282 
283 	assert(unescapeString(q{"Who are you?"}) == "Who are you?");
284 	assert(unescapeString(q{"c:\\games\\Doom.exe"}) == "c:\\games\\Doom.exe");
285 	assert(unescapeString(q{"ab\n"}) == "ab\n");
286 
287 	assert(unescapeString(`x"0A"`) == hexString!"0A");
288 	assert(unescapeString(`x"00 FBCD 32FD 0A"`) == hexString!"00 FBCD 32FD 0A");
289 
290 	assert(unescapeString(`q"(foo(xxx))"`) == q"(foo(xxx))");
291 	assert(unescapeString(`q"[foo{]"`) == q"[foo{]");
292 	assert(unescapeString(`q"<foo{>"`) == q"<foo{>");
293 	assert(unescapeString(`q"{foo(}"`) == q"{foo(}");
294 	assert(unescapeString(`q"EOS
295 This
296 is a multi-line
297 heredoc string
298 EOS"`) == q"EOS
299 This
300 is a multi-line
301 heredoc string
302 EOS");
303 	assert(unescapeString(`q"/foo]/"`) == `foo]`);
304 
305 	assert(unescapeString(`q{this is the voice of}`) == q{this is the voice of});
306 	assert(unescapeString(`q{/*}*/ }`) == q{/*}*/ });
307 	assert(unescapeString(`q{ world(q{control}); }`) == q{ world(q{control}); });
308 	assert(unescapeString(`q{ __TIME__ }`) == q{ __TIME__ });
309 
310 	assert(unescapeString(q{"hello"c}) == "hello");
311 	assert(unescapeString(q{"hello"w}) == "hello");
312 	assert(unescapeString(q{"hello"d}) == "hello");
313 
314 	assert(unescapeString(`""`) == "");
315 	assert(unescapeString(`"hello\'world\"cool\""`) == "hello\'world\"cool\"");
316 	assert(unescapeString(`"\x0A"`) == "\x0A");
317 	assert(unescapeString(`"\u200b"`) == "\u200b");
318 	assert(unescapeString(`"\U0001F4A9"`) == "\U0001F4A9");
319 	assert(unescapeString(`"\0"`) == "\0");
320 	assert(unescapeString(`"\1"`) == "\1");
321 	assert(unescapeString(`"\12"`) == "\12");
322 	assert(unescapeString(`"\127"`) == "\127");
323 	assert(unescapeString(`"\1278"`) == "\1278");
324 	assert(unescapeString(`"\12a8"`) == "\12a8");
325 	assert(unescapeString(`"\1a28"`) == "\1a28");
326 	assert(unescapeString(`x"afDE"`) == "\xaf\xDE");
327 	assert(unescapeString("\"hello\nworld\rfoo\r\nbar\u2028ok\u2029\"")
328 			== "hello\nworld\nfoo\nbar\nok\n");
329 }
330 
331 unittest
332 {
333 	import std.exception : assertThrown;
334 
335 	// unimplemented named characters
336 	assert(unescapeString(`"\&foo;"`) == "\\&foo;");
337 
338 	assertThrown!ConvException(unescapeString(`"\&foo"`));
339 	assert(unescapeString!(InvalidEscapeAction.keep)(`"\&foo"`) == "\\&foo");
340 	assert(unescapeString!(InvalidEscapeAction.skip)(`"\&foo"`) == "");
341 }
342 
343 unittest
344 {
345 	import std.exception : assertThrown;
346 
347 	assertThrown!ConvException(unescapeString(`q"EOS"`));
348 	assert(unescapeString!(InvalidEscapeAction.keep)(`q"EOS"`) == "EOS");
349 	assert(unescapeString!(InvalidEscapeAction.skip)(`q"EOS"`) == "");
350 
351 	assertThrown!ConvException(unescapeString(`q"EOS
352 hello"`));
353 	assert(unescapeString!(InvalidEscapeAction.keep)(`q"EOS
354 hello"`) == "hello");
355 	assert(unescapeString!(InvalidEscapeAction.skip)(`q"EOS
356 hello"`) == "hello");
357 	assert(unescapeString!(InvalidEscapeAction.skip)(`q"EOS
358 hello
359 world"`) == "hello");
360 
361 	assertThrown!ConvException(unescapeString(`q"/xd"`));
362 	assert(unescapeString!(InvalidEscapeAction.keep)(`q"/xd"`) == "/xd");
363 	assert(unescapeString!(InvalidEscapeAction.skip)(`q"/xd"`) == "xd");
364 
365 	assertThrown!ConvException(unescapeString(`"\x"`));
366 	assert(unescapeString!(InvalidEscapeAction.keep)(`"\x"`) == "\\x");
367 	assert(unescapeString!(InvalidEscapeAction.skip)(`"\x"`) == "");
368 
369 	assertThrown!ConvException(unescapeString(`"\u0"`));
370 	assert(unescapeString!(InvalidEscapeAction.keep)(`"\u0"`) == "\\u0");
371 	assert(unescapeString!(InvalidEscapeAction.skip)(`"\u0"`) == "");
372 
373 	assertThrown!ConvException(unescapeString(`"\U0000000"`));
374 	assert(unescapeString!(InvalidEscapeAction.keep)(`"\U0000000"`) == "\\U0000000");
375 	assert(unescapeString!(InvalidEscapeAction.skip)(`"\U0000000"`) == "");
376 
377 	assertThrown!ConvException(unescapeString(`"\xAG"`));
378 	assert(unescapeString!(InvalidEscapeAction.keep)(`"\xAG"`) == "\\xAG");
379 	assert(unescapeString!(InvalidEscapeAction.skip)(`"\xAG"`) == "");
380 
381 	assertThrown!ConvException(unescapeString(`"\u00AG"`));
382 	assert(unescapeString!(InvalidEscapeAction.keep)(`"\u00AG"`) == "\\u00AG");
383 	assert(unescapeString!(InvalidEscapeAction.skip)(`"\u00AG"`) == "");
384 
385 	assertThrown!ConvException(unescapeDoubleQuotedContent(`a\`));
386 	assert(unescapeDoubleQuotedContent!(InvalidEscapeAction.keep)(`a\`) == "a\\");
387 	assert(unescapeDoubleQuotedContent!(InvalidEscapeAction.skip)(`a\`) == "a");
388 
389 	assertThrown!ConvException(unescapeString(`"\z"`));
390 	assert(unescapeString!(InvalidEscapeAction.keep)(`"\z"`) == "\\z");
391 	assert(unescapeString!(InvalidEscapeAction.skip)(`"\z"`) == "z");
392 
393 	assert(parseHexStringContent("") == "");
394 
395 	assertThrown!ConvException(unescapeString(`x"AG"`));
396 	assert(unescapeString!(InvalidEscapeAction.keep)(`x"AG"`) == "AG");
397 	assert(unescapeString!(InvalidEscapeAction.skip)(`x"AG"`) == "");
398 
399 	assertThrown!ConvException(unescapeString(`x"A"`));
400 	assert(unescapeString!(InvalidEscapeAction.keep)(`x"A"`) == "A");
401 	assert(unescapeString!(InvalidEscapeAction.skip)(`x"A"`) == "");
402 }
403 
404 private string unescapeDoubleQuotedContent(
405 	InvalidEscapeAction invalidEscapeAction = InvalidEscapeAction.error
406 )(
407 	string input
408 )
409 {
410 	auto escape = input.indexOf('\\');
411 	if (escape == -1)
412 		return input;
413 
414 	auto ret = appender!string;
415 	ret.reserve(input.length);
416 	size_t start = 0;
417 
418 	bool requireMinLength(size_t length)
419 	{
420 		if (escape + length >= input.length)
421 		{
422 			final switch (invalidEscapeAction)
423 			{
424 			case InvalidEscapeAction.keep:
425 				ret ~= input[start .. $];
426 				start = input.length;
427 				return false;
428 			case InvalidEscapeAction.skip:
429 				start = input.length;
430 				return false;
431 			case InvalidEscapeAction.error:
432 				throw new ConvException("Unfinished escape at end of string");
433 			}
434 		}
435 		else
436 		{
437 			return true;
438 		}
439 	}
440 
441 	void errorInvalidCharacter(size_t continueAt)
442 	{
443 		final switch (invalidEscapeAction)
444 		{
445 		case InvalidEscapeAction.keep:
446 			ret ~= input[start .. continueAt];
447 			start = continueAt;
448 			break;
449 		case InvalidEscapeAction.skip:
450 			start = continueAt;
451 			break;
452 		case InvalidEscapeAction.error:
453 			throw new ConvException("Invalid escape character before index "
454 					~ continueAt.to!string);
455 		}
456 	}
457 
458 	bool parseUnicode(size_t length)
459 	{
460 		auto c = input[escape + 2 .. escape + 2 + length];
461 		if (!c.all!isHexDigit)
462 		{
463 			errorInvalidCharacter(escape + 2 + length);
464 			return false;
465 		}
466 		dchar ch = cast(dchar) c.to!uint(16);
467 		char[4] buf;
468 		auto size = encode(buf, ch);
469 		ret ~= buf[0 .. size];
470 		start = escape + 2 + length;
471 		return true;
472 	}
473 
474 	Loop: while (escape != -1)
475 	{
476 		ret ~= input[start .. escape];
477 		start = escape;
478 
479 		if (!requireMinLength(1))
480 			break;
481 
482 	Switch:
483 		switch (input[escape + 1])
484 		{
485 		case '\'':
486 		case '"':
487 		case '?':
488 		case '\\':
489 			ret ~= input[escape + 1];
490 			start = escape + 2;
491 			break;
492 
493 		case 'a': ret ~= '\a'; start = escape + 2; break;
494 		case 'b': ret ~= '\b'; start = escape + 2; break;
495 		case 'f': ret ~= '\f'; start = escape + 2; break;
496 		case 'n': ret ~= '\n'; start = escape + 2; break;
497 		case 'r': ret ~= '\r'; start = escape + 2; break;
498 		case 't': ret ~= '\t'; start = escape + 2; break;
499 		case 'v': ret ~= '\v'; start = escape + 2; break;
500 
501 		case 'x':
502 			if (!requireMinLength(3))
503 				break Loop;
504 			char a = input[escape + 2];
505 			char b = input[escape + 3];
506 			if (!a.isHexDigit || !b.isHexDigit)
507 			{
508 				errorInvalidCharacter(escape + 4);
509 				break;
510 			}
511 			ret ~= cast(char)(a.parseHexChar << 4 | b.parseHexChar);
512 			start = escape + 4;
513 			break;
514 		case 'u':
515 			if (!requireMinLength(1 + 4))
516 				break Loop;
517 			parseUnicode(4);
518 			break;
519 		case 'U':
520 			if (!requireMinLength(1 + 8))
521 				break Loop;
522 			parseUnicode(8);
523 			break;
524 		case '0': .. case '7':
525 			int length = 1;
526 			foreach (n; 2 .. 4)
527 			{
528 				if (escape + 1 + n > input.length)
529 					break;
530 				char c = input[escape + n];
531 				if (c >= '0' && c <= '7')
532 					length = n;
533 				else
534 					break;
535 			}
536 			int c = input[escape + 1 .. escape + 1 + length].to!int(8);
537 			ret ~= cast(char) c;
538 			start = escape + 1 + length;
539 			break;
540 		case '&':
541 			auto end = input.indexOf(';', escape + 2);
542 			if (end == -1)
543 			{
544 				errorInvalidCharacter(input.length);
545 			}
546 			else
547 			{
548 				ret ~= input[escape .. end + 1];
549 				start = end + 1;
550 			}
551 			break;
552 		default:
553 			errorInvalidCharacter(escape + 1);
554 			break;
555 		}
556 
557 		escape = input.indexOf('\\', start);
558 	}
559 	ret ~= input[start .. $];
560 	return ret.data;
561 }
562 
563 unittest
564 {
565 	assert(unescapeDoubleQuotedContent(`hello world`) == "hello world");
566 	assert(unescapeDoubleQuotedContent(`hello\nworld`) == "hello\nworld");
567 	assert(unescapeDoubleQuotedContent(`hello\tworld`) == "hello\tworld");
568 	assert(unescapeDoubleQuotedContent(`hello\u200bworld`) == "hello\u200bworld");
569 	assert(unescapeDoubleQuotedContent(`hello \"\\ok`) == "hello \"\\ok");
570 	assert(unescapeDoubleQuotedContent(`こんにちは \"\\ñ`) == "こんにちは \"\\ñ");
571 }
572 
573 private string parseHexStringContent(
574 	InvalidEscapeAction invalidEscapeAction = InvalidEscapeAction.error
575 )(
576 	string input
577 )
578 {
579 	if (!input.length)
580 		return input;
581 
582 	auto ret = appender!string;
583 	ret.reserve(input.length / 3);
584 	char buf;
585 	foreach (i, char c; input)
586 	{
587 		if (c.isWhite)
588 			continue;
589 
590 		if (!c.isHexDigit)
591 		{
592 			final switch (invalidEscapeAction)
593 			{
594 			case InvalidEscapeAction.keep:
595 				if (buf != char.init)
596 				{
597 					ret ~= buf;
598 					buf = char.init;
599 				}
600 				ret ~= c;
601 				break;
602 			case InvalidEscapeAction.skip:
603 				break;
604 			case InvalidEscapeAction.error:
605 				throw new ConvException("Invalid hex character at index "
606 						~ i.to!string);
607 			}
608 		}
609 		else
610 		{
611 			if (buf == char.init)
612 			{
613 				buf = c;
614 			}
615 			else
616 			{
617 				ret ~= cast(char)(buf.parseHexChar << 4 | c.parseHexChar);
618 				buf = char.init;
619 			}
620 		}
621 	}
622 
623 	if (buf != char.init)
624 	{
625 		final switch (invalidEscapeAction)
626 		{
627 		case InvalidEscapeAction.keep:
628 			ret ~= buf;
629 			break;
630 		case InvalidEscapeAction.skip:
631 			break;
632 		case InvalidEscapeAction.error:
633 			throw new ConvException("Unterminated hex character at end of string");
634 		}
635 	}
636 
637 	return ret.data;
638 }
639 
640 private int parseHexChar(char c)
641 in
642 {
643 	assert(c.isHexDigit);
644 	assert('a' > 'A' && 'A' > '0'); // just checking that ASCII doesn't suddenly change
645 }
646 do
647 {
648 	// can omit range ends and digit check because of function preconditions
649 	if (c >= 'a')
650 		return (c - 'a') + 10;
651 	else if (c >= 'A')
652 		return (c - 'A') + 10;
653 	else
654 		return c - '0';
655 }
656 
657 private bool isIdentifierChar(char c)
658 {
659 	return isAlphaNum(c) || c == '_';
660 }
661 
662 /// normalizes all line endings with \n, as parsed in D strings
663 private string normalizeNewLines(string text)
664 {
665 	import std.utf : codeLength;
666 
667 	enum exoticLineBreakLength = codeLength!char('\u2028');
668 	static immutable dchar[] nlCharacters = ['\r', '\u2028', '\u2029'];
669 
670 	auto end = text.indexOfAny(nlCharacters);
671 	if (end == -1)
672 		return text;
673 	auto ret = appender!string;
674 	ret.reserve(text.length);
675 	size_t start = 0;
676 	while (end != -1)
677 	{
678 		ret ~= text[start .. end];
679 		ret ~= '\n';
680 		if (end + 1 < text.length && text[end] == '\r' && text[end + 1] == '\n')
681 			end++;
682 		else if (text[end] != '\r')
683 			end += exoticLineBreakLength - 1;
684 		start = end + 1;
685 		end = text.indexOfAny(nlCharacters, start);
686 	}
687 	ret ~= text[start .. $];
688 	return ret.data;
689 }
690 
691 ///
692 unittest
693 {
694 	string testNoChange = "hello\nworld!";
695 	assert(normalizeNewLines(testNoChange).ptr is testNoChange.ptr);
696 
697 	assert(normalizeNewLines("hello\rworld") == "hello\nworld");
698 	assert(normalizeNewLines("hello\r\nworld") == "hello\nworld");
699 	assert(normalizeNewLines("hello\r\n\nworld") == "hello\n\nworld");
700 	assert(normalizeNewLines("hello\u2028\nworld") == "hello\n\nworld");
701 	assert(normalizeNewLines("hello\u2029\nworld") == "hello\n\nworld");
702 	assert(normalizeNewLines("hello\r") == "hello\n");
703 }