dparse.strings source code

1 /// Utility for unescaping D string literals of any kind
2 module dparse.strings;
3 
4 import std.algorithm;
5 import std.array;
6 import std.ascii : isAlphaNum, isHexDigit, isWhite;
7 import std.conv;
8 import std.range;
9 import std..string;
10 import std.utf;
11 
12 /**
13  * Checks if a string literal input has correct start/end sequences (quotes) to
14  * be any kind of D string literal.
15  *
16  * Bugs: doesn't check for validity of token strings.
17  *
18  * Standards: $(LINK https://dlang.org/spec/lex.html#string_literals)
19  */
20 bool isStringLiteral(const(char)[] literal, out char stringCloseChar,
21 		out bool hasPostfix, out bool parseEscapes, out int prefixLength)
22 {
23 	// there are no 1 character strings
24 	if (literal.length < 2)
25 		return false;
26 
27 	// check for valid start
28 	bool allowPostfix;
29 	switch (literal[0])
30 	{
31 	case 'r': // WysiwygString
32 	case 'x': // HexString
33 		if (literal[1] != '"')
34 			return false;
35 		stringCloseChar = '"';
36 		allowPostfix = true;
37 		prefixLength = 2;
38 		break;
39 	case 'q': // DelimitedString
40 		if (literal[1] == '{')
41 			stringCloseChar = '}';
42 		else if (literal[1] == '"')
43 			stringCloseChar = '"';
44 		else
45 			return false;
46 
47 		allowPostfix = false;
48 		prefixLength = 2;
49 		break;
50 	case '`':
51 	case '"':
52 		stringCloseChar = literal[0];
53 		allowPostfix = true;
54 		parseEscapes = stringCloseChar == '"';
55 		prefixLength = 1;
56 		break;
57 	default:
58 		return false;
59 	}
60 
61 	if (allowPostfix && literal[$ - 1].among!('c', 'w', 'd'))
62 	{
63 		hasPostfix = true;
64 		literal = literal[0 .. $ - 1];
65 	}
66 
67 	if (literal.length <= prefixLength || literal[$ - 1] != stringCloseChar)
68 		return false;
69 
70 	if (parseEscapes)
71 	{
72 		// check if end escapes the quote, making this an invalid string
73 		auto end = literal[0 .. $ - 1].lastIndexOfNeither("\\");
74 		if (end != -1)
75 		{
76 			// don't need to subtract 1
77 			size_t countBackslashes = literal.length - end;
78 
79 			if ((countBackslashes % 2) != 0)
80 				return false; // uneven backslash count -> invalid end
81 		}
82 	}
83 
84 	return true;
85 }
86 
87 /// ditto
88 bool isStringLiteral(const(char)[] literal)
89 {
90 	char stringCloseChar;
91 	bool hasPostfix, parseEscapes;
92 	int prefixLength;
93 	return isStringLiteral(literal, stringCloseChar, hasPostfix, parseEscapes,
94 		prefixLength);
95 }
96 
97 ///
98 unittest
99 {
100 	assert(isStringLiteral(`"hello"`));
101 	assert(isStringLiteral(`"hello world!"`));
102 	assert(isStringLiteral(`r"hello world!"c`));
103 	assert(isStringLiteral(`r"hello world!"d`));
104 	assert(isStringLiteral(`q{cool}`));
105 	assert(isStringLiteral(`q{cool\}`));
106 	assert(isStringLiteral(`"\\"`));
107 	assert(!isStringLiteral(`"\\\"`));
108 	assert(isStringLiteral(`"\\\\"`));
109 	assert(isStringLiteral(`"a\\\\"`));
110 	assert(isStringLiteral(`""`));
111 	assert(isStringLiteral(`q""`));
112 	assert(isStringLiteral(`x""`));
113 	assert(!isStringLiteral(``));
114 	assert(!isStringLiteral(`"`));
115 	assert(!isStringLiteral(`w""`));
116 	assert(!isStringLiteral(`hello"`));
117 	assert(!isStringLiteral(`"hello`));
118 	assert(!isStringLiteral(`"hello world`));
119 	assert(!isStringLiteral(`hello world`));
120 	assert(!isStringLiteral(`r"`));
121 	assert(!isStringLiteral(`rr"ok"`));
122 	assert(!isStringLiteral(`x"`));
123 	assert(!isStringLiteral(`x" `));
124 	assert(!isStringLiteral(`qqqq`));
125 }
126 
127 /// Defines different handler types what to do when invalid escape sequences are
128 /// found inside $(LREF unescapeString).
129 enum InvalidEscapeAction
130 {
131 	/// keep the backslash character as well as the escape characters in the
132 	/// string like in the input string.
133 	keep = 0,
134 	/// Ignore and skip offending characters, drop them from the output. Named
135 	/// character entities are still being included like $(LREF keep) as they
136 	/// are not currently implemented.
137 	skip,
138 	/// Throw a ConvException on invalid escape sequences. Does not throw
139 	/// anything on unknown named character entities as they are not currently
140 	/// implemented but instead treats them like $(LREF keep).
141 	error
142 }
143 
144 /**
145  * Unescapes a D string, effectively being the same as mixing in the string into
146  * some function call, but only for single string literals.
147  *
148  * Strips quotes, prefixes and suffixes, interprets escape sequences in normal
149  * double quoted strings and interprets hex strings. Returns simple slices for
150  * non-escaped strings.
151  *
152  * It's undefined how invalid/malformed strings are evaluated.
153  *
154  * Bugs: doesn't check for validity of token strings, doesn't interpret named
155  * character entity escape sequences, (HTML-kind escape sequences) doesn't check
156  * nesting level of delimited strings.
157  *
158  * Standards: $(LINK https://dlang.org/spec/lex.html#string_literals)
159  */
160 string unescapeString(
161 	InvalidEscapeAction invalidEscapeAction = InvalidEscapeAction.error
162 )(
163 	string input
164 )
165 in
166 {
167 	assert(isStringLiteral(input));
168 }
169 do
170 {
171 	char stringCloseChar;
172 	bool hasPostfix, parseEscapes;
173 	int prefixLength;
174 	isStringLiteral(input, stringCloseChar, hasPostfix, parseEscapes,
175 		prefixLength);
176 
177 	if (hasPostfix)
178 		input = input[0 .. $ - 1];
179 
180 	auto content = input[prefixLength .. $ - 1];
181 
182 	if (!content.length)
183 		return content;
184 
185 	if (input[0] == 'x')
186 	{
187 		// hex string, obsolete but still implemented
188 		return parseHexStringContent!invalidEscapeAction(content);
189 	}
190 	else if (input[0] == 'q' && input[1] == '"')
191 	{
192 		content = content.normalizeNewLines;
193 		if (isIdentifierChar(content[0]))
194 		{
195 			auto ln = content.indexOf('\n');
196 			if (ln == -1)
197 			{
198 				final switch (invalidEscapeAction)
199 				{
200 				case InvalidEscapeAction.keep:
201 					return content;
202 				case InvalidEscapeAction.skip:
203 					return null;
204 				case InvalidEscapeAction.error:
205 					throw new ConvException("Invalid delimited escape string");
206 				}
207 			}
208 			auto delimiter = content[0 .. ln];
209 			content = content[ln + 1 .. $];
210 			if (!content.endsWith(chain("\n", delimiter)))
211 			{
212 				final switch (invalidEscapeAction)
213 				{
214 				case InvalidEscapeAction.keep:
215 					return content;
216 				case InvalidEscapeAction.skip:
217 					auto lastNl = content.lastIndexOf('\n');
218 					if (lastNl == -1)
219 						return content;
220 					else
221 						return content[0 .. lastNl];
222 				case InvalidEscapeAction.error:
223 					throw new ConvException("Delimited escape string not ending correctly");
224 				}
225 			}
226 			return content[0 .. $ - delimiter.length];
227 		}
228 		else
229 		{
230 			char delimiterChar = content[0];
231 			char endChar;
232 			switch (delimiterChar)
233 			{
234 			case '[': endChar = ']'; break;
235 			case '(': endChar = ')'; break;
236 			case '<': endChar = '>'; break;
237 			case '{': endChar = '}'; break;
238 			default: endChar = delimiterChar; break;
239 			}
240 
241 			if (content[1 .. $].endsWith(endChar))
242 				return content[1 .. $ - 1];
243 			else
244 			{
245 				final switch (invalidEscapeAction)
246 				{
247 				case InvalidEscapeAction.keep:
248 					return content;
249 				case InvalidEscapeAction.skip:
250 					return content[1 .. $];
251 				case InvalidEscapeAction.error:
252 					throw new ConvException("Invalid delimited escape string");
253 				}
254 			}
255 		}
256 	}
257 	else
258 	{
259 		if (!parseEscapes)
260 			return content.normalizeNewLines;
261 		else
262 			return unescapeDoubleQuotedContent!invalidEscapeAction(
263 					content.normalizeNewLines);
264 	}
265 }
266 
267 ///
268 unittest
269 {
270 	assert(unescapeString(q{r"I am Oz"}) == r"I am Oz");
271 	assert(unescapeString(q{r"c:\games\Sudoku.exe"}) == r"c:\games\Sudoku.exe");
272 	assert(unescapeString(q{r"ab\n"}) == r"ab\n");
273 
274 	assert(unescapeString(q{`the Great and Powerful.`}) == `the Great and Powerful.`);
275 	assert(unescapeString(q{`c:\games\Empire.exe`}) == `c:\games\Empire.exe`);
276 	assert(unescapeString(q{`The "lazy" dog`}) == `The "lazy" dog`);
277 	assert(unescapeString(q{`a"b\n`}) == `a"b\n`);
278 
279 	assert(unescapeString(q{"Who are you?"}) == "Who are you?");
280 	assert(unescapeString(q{"c:\\games\\Doom.exe"}) == "c:\\games\\Doom.exe");
281 	assert(unescapeString(q{"ab\n"}) == "ab\n");
282 
283 	assert(unescapeString(`x"0A"`) == hexString!"0A");
284 	assert(unescapeString(`x"00 FBCD 32FD 0A"`) == hexString!"00 FBCD 32FD 0A");
285 
286 	assert(unescapeString(`q"(foo(xxx))"`) == q"(foo(xxx))");
287 	assert(unescapeString(`q"[foo{]"`) == q"[foo{]");
288 	assert(unescapeString(`q"<foo{>"`) == q"<foo{>");
289 	assert(unescapeString(`q"{foo(}"`) == q"{foo(}");
290 	assert(unescapeString(`q"EOS
291 This
292 is a multi-line
293 heredoc string
294 EOS"`) == q"EOS
295 This
296 is a multi-line
297 heredoc string
298 EOS");
299 	assert(unescapeString(`q"/foo]/"`) == `foo]`);
300 
301 	assert(unescapeString(`q{this is the voice of}`) == q{this is the voice of});
302 	assert(unescapeString(`q{/*}*/ }`) == q{/*}*/ });
303 	assert(unescapeString(`q{ world(q{control}); }`) == q{ world(q{control}); });
304 	assert(unescapeString(`q{ __TIME__ }`) == q{ __TIME__ });
305 
306 	assert(unescapeString(q{"hello"c}) == "hello");
307 	assert(unescapeString(q{"hello"w}) == "hello");
308 	assert(unescapeString(q{"hello"d}) == "hello");
309 
310 	assert(unescapeString(`""`) == "");
311 	assert(unescapeString(`"hello\'world\"cool\""`) == "hello\'world\"cool\"");
312 	assert(unescapeString(`"\x0A"`) == "\x0A");
313 	assert(unescapeString(`"\u200b"`) == "\u200b");
314 	assert(unescapeString(`"\U0001F4A9"`) == "\U0001F4A9");
315 	assert(unescapeString(`"\0"`) == "\0");
316 	assert(unescapeString(`"\1"`) == "\1");
317 	assert(unescapeString(`"\12"`) == "\12");
318 	assert(unescapeString(`"\127"`) == "\127");
319 	assert(unescapeString(`"\1278"`) == "\1278");
320 	assert(unescapeString(`"\12a8"`) == "\12a8");
321 	assert(unescapeString(`"\1a28"`) == "\1a28");
322 	assert(unescapeString(`x"afDE"`) == "\xaf\xDE");
323 	assert(unescapeString("\"hello\nworld\rfoo\r\nbar\u2028ok\u2029\"")
324 			== "hello\nworld\nfoo\nbar\nok\n");
325 }
326 
327 unittest
328 {
329 	import std.exception : assertThrown;
330 
331 	// unimplemented named characters
332 	assert(unescapeString(`"\&foo;"`) == "\\&foo;");
333 
334 	assertThrown!ConvException(unescapeString(`"\&foo"`));
335 	assert(unescapeString!(InvalidEscapeAction.keep)(`"\&foo"`) == "\\&foo");
336 	assert(unescapeString!(InvalidEscapeAction.skip)(`"\&foo"`) == "");
337 }
338 
339 unittest
340 {
341 	import std.exception : assertThrown;
342 
343 	assertThrown!ConvException(unescapeString(`q"EOS"`));
344 	assert(unescapeString!(InvalidEscapeAction.keep)(`q"EOS"`) == "EOS");
345 	assert(unescapeString!(InvalidEscapeAction.skip)(`q"EOS"`) == "");
346 
347 	assertThrown!ConvException(unescapeString(`q"EOS
348 hello"`));
349 	assert(unescapeString!(InvalidEscapeAction.keep)(`q"EOS
350 hello"`) == "hello");
351 	assert(unescapeString!(InvalidEscapeAction.skip)(`q"EOS
352 hello"`) == "hello");
353 	assert(unescapeString!(InvalidEscapeAction.skip)(`q"EOS
354 hello
355 world"`) == "hello");
356 
357 	assertThrown!ConvException(unescapeString(`q"/xd"`));
358 	assert(unescapeString!(InvalidEscapeAction.keep)(`q"/xd"`) == "/xd");
359 	assert(unescapeString!(InvalidEscapeAction.skip)(`q"/xd"`) == "xd");
360 
361 	assertThrown!ConvException(unescapeString(`"\x"`));
362 	assert(unescapeString!(InvalidEscapeAction.keep)(`"\x"`) == "\\x");
363 	assert(unescapeString!(InvalidEscapeAction.skip)(`"\x"`) == "");
364 
365 	assertThrown!ConvException(unescapeString(`"\u0"`));
366 	assert(unescapeString!(InvalidEscapeAction.keep)(`"\u0"`) == "\\u0");
367 	assert(unescapeString!(InvalidEscapeAction.skip)(`"\u0"`) == "");
368 
369 	assertThrown!ConvException(unescapeString(`"\U0000000"`));
370 	assert(unescapeString!(InvalidEscapeAction.keep)(`"\U0000000"`) == "\\U0000000");
371 	assert(unescapeString!(InvalidEscapeAction.skip)(`"\U0000000"`) == "");
372 
373 	assertThrown!ConvException(unescapeString(`"\xAG"`));
374 	assert(unescapeString!(InvalidEscapeAction.keep)(`"\xAG"`) == "\\xAG");
375 	assert(unescapeString!(InvalidEscapeAction.skip)(`"\xAG"`) == "");
376 
377 	assertThrown!ConvException(unescapeString(`"\u00AG"`));
378 	assert(unescapeString!(InvalidEscapeAction.keep)(`"\u00AG"`) == "\\u00AG");
379 	assert(unescapeString!(InvalidEscapeAction.skip)(`"\u00AG"`) == "");
380 
381 	assertThrown!ConvException(unescapeDoubleQuotedContent(`a\`));
382 	assert(unescapeDoubleQuotedContent!(InvalidEscapeAction.keep)(`a\`) == "a\\");
383 	assert(unescapeDoubleQuotedContent!(InvalidEscapeAction.skip)(`a\`) == "a");
384 
385 	assertThrown!ConvException(unescapeString(`"\z"`));
386 	assert(unescapeString!(InvalidEscapeAction.keep)(`"\z"`) == "\\z");
387 	assert(unescapeString!(InvalidEscapeAction.skip)(`"\z"`) == "z");
388 
389 	assert(parseHexStringContent("") == "");
390 
391 	assertThrown!ConvException(unescapeString(`x"AG"`));
392 	assert(unescapeString!(InvalidEscapeAction.keep)(`x"AG"`) == "AG");
393 	assert(unescapeString!(InvalidEscapeAction.skip)(`x"AG"`) == "");
394 
395 	assertThrown!ConvException(unescapeString(`x"A"`));
396 	assert(unescapeString!(InvalidEscapeAction.keep)(`x"A"`) == "A");
397 	assert(unescapeString!(InvalidEscapeAction.skip)(`x"A"`) == "");
398 }
399 
400 private string unescapeDoubleQuotedContent(
401 	InvalidEscapeAction invalidEscapeAction = InvalidEscapeAction.error
402 )(
403 	string input
404 )
405 {
406 	auto escape = input.indexOf('\\');
407 	if (escape == -1)
408 		return input;
409 
410 	auto ret = appender!string;
411 	ret.reserve(input.length);
412 	size_t start = 0;
413 
414 	bool requireMinLength(size_t length)
415 	{
416 		if (escape + length >= input.length)
417 		{
418 			final switch (invalidEscapeAction)
419 			{
420 			case InvalidEscapeAction.keep:
421 				ret ~= input[start .. $];
422 				start = input.length;
423 				return false;
424 			case InvalidEscapeAction.skip:
425 				start = input.length;
426 				return false;
427 			case InvalidEscapeAction.error:
428 				throw new ConvException("Unfinished escape at end of string");
429 			}
430 		}
431 		else
432 		{
433 			return true;
434 		}
435 	}
436 
437 	void errorInvalidCharacter(size_t continueAt)
438 	{
439 		final switch (invalidEscapeAction)
440 		{
441 		case InvalidEscapeAction.keep:
442 			ret ~= input[start .. continueAt];
443 			start = continueAt;
444 			break;
445 		case InvalidEscapeAction.skip:
446 			start = continueAt;
447 			break;
448 		case InvalidEscapeAction.error:
449 			throw new ConvException("Invalid escape character before index "
450 					~ continueAt.to!string);
451 		}
452 	}
453 
454 	bool parseUnicode(size_t length)
455 	{
456 		auto c = input[escape + 2 .. escape + 2 + length];
457 		if (!c.all!isHexDigit)
458 		{
459 			errorInvalidCharacter(escape + 2 + length);
460 			return false;
461 		}
462 		dchar ch = cast(dchar) c.to!uint(16);
463 		char[4] buf;
464 		auto size = encode(buf, ch);
465 		ret ~= buf[0 .. size];
466 		start = escape + 2 + length;
467 		return true;
468 	}
469 
470 	Loop: while (escape != -1)
471 	{
472 		ret ~= input[start .. escape];
473 		start = escape;
474 
475 		if (!requireMinLength(1))
476 			break;
477 
478 	Switch:
479 		switch (input[escape + 1])
480 		{
481 		case '\'':
482 		case '"':
483 		case '?':
484 		case '\\':
485 			ret ~= input[escape + 1];
486 			start = escape + 2;
487 			break;
488 
489 		case 'a': ret ~= '\a'; start = escape + 2; break;
490 		case 'b': ret ~= '\b'; start = escape + 2; break;
491 		case 'f': ret ~= '\f'; start = escape + 2; break;
492 		case 'n': ret ~= '\n'; start = escape + 2; break;
493 		case 'r': ret ~= '\r'; start = escape + 2; break;
494 		case 't': ret ~= '\t'; start = escape + 2; break;
495 		case 'v': ret ~= '\v'; start = escape + 2; break;
496 
497 		case 'x':
498 			if (!requireMinLength(3))
499 				break Loop;
500 			char a = input[escape + 2];
501 			char b = input[escape + 3];
502 			if (!a.isHexDigit || !b.isHexDigit)
503 			{
504 				errorInvalidCharacter(escape + 4);
505 				break;
506 			}
507 			ret ~= cast(char)(a.parseHexChar << 4 | b.parseHexChar);
508 			start = escape + 4;
509 			break;
510 		case 'u':
511 			if (!requireMinLength(1 + 4))
512 				break Loop;
513 			parseUnicode(4);
514 			break;
515 		case 'U':
516 			if (!requireMinLength(1 + 8))
517 				break Loop;
518 			parseUnicode(8);
519 			break;
520 		case '0': .. case '7':
521 			int length = 1;
522 			foreach (n; 2 .. 4)
523 			{
524 				if (escape + 1 + n > input.length)
525 					break;
526 				char c = input[escape + n];
527 				if (c >= '0' && c <= '7')
528 					length = n;
529 				else
530 					break;
531 			}
532 			int c = input[escape + 1 .. escape + 1 + length].to!int(8);
533 			ret ~= cast(char) c;
534 			start = escape + 1 + length;
535 			break;
536 		case '&':
537 			auto end = input.indexOf(';', escape + 2);
538 			if (end == -1)
539 			{
540 				errorInvalidCharacter(input.length);
541 			}
542 			else
543 			{
544 				ret ~= input[escape .. end + 1];
545 				start = end + 1;
546 			}
547 			break;
548 		default:
549 			errorInvalidCharacter(escape + 1);
550 			break;
551 		}
552 
553 		escape = input.indexOf('\\', start);
554 	}
555 	ret ~= input[start .. $];
556 	return ret.data;
557 }
558 
559 unittest
560 {
561 	assert(unescapeDoubleQuotedContent(`hello world`) == "hello world");
562 	assert(unescapeDoubleQuotedContent(`hello\nworld`) == "hello\nworld");
563 	assert(unescapeDoubleQuotedContent(`hello\tworld`) == "hello\tworld");
564 	assert(unescapeDoubleQuotedContent(`hello\u200bworld`) == "hello\u200bworld");
565 	assert(unescapeDoubleQuotedContent(`hello \"\\ok`) == "hello \"\\ok");
566 }
567 
568 private string parseHexStringContent(
569 	InvalidEscapeAction invalidEscapeAction = InvalidEscapeAction.error
570 )(
571 	string input
572 )
573 {
574 	if (!input.length)
575 		return input;
576 
577 	auto ret = appender!string;
578 	ret.reserve(input.length / 3);
579 	char buf;
580 	foreach (i, char c; input)
581 	{
582 		if (c.isWhite)
583 			continue;
584 
585 		if (!c.isHexDigit)
586 		{
587 			final switch (invalidEscapeAction)
588 			{
589 			case InvalidEscapeAction.keep:
590 				if (buf != char.init)
591 				{
592 					ret ~= buf;
593 					buf = char.init;
594 				}
595 				ret ~= c;
596 				break;
597 			case InvalidEscapeAction.skip:
598 				break;
599 			case InvalidEscapeAction.error:
600 				throw new ConvException("Invalid hex character at index "
601 						~ i.to!string);
602 			}
603 		}
604 		else
605 		{
606 			if (buf == char.init)
607 			{
608 				buf = c;
609 			}
610 			else
611 			{
612 				ret ~= cast(char)(buf.parseHexChar << 4 | c.parseHexChar);
613 				buf = char.init;
614 			}
615 		}
616 	}
617 
618 	if (buf != char.init)
619 	{
620 		final switch (invalidEscapeAction)
621 		{
622 		case InvalidEscapeAction.keep:
623 			ret ~= buf;
624 			break;
625 		case InvalidEscapeAction.skip:
626 			break;
627 		case InvalidEscapeAction.error:
628 			throw new ConvException("Unterminated hex character at end of string");
629 		}
630 	}
631 
632 	return ret.data;
633 }
634 
635 private int parseHexChar(char c)
636 in
637 {
638 	assert(c.isHexDigit);
639 	assert('a' > 'A' && 'A' > '0'); // just checking that ASCII doesn't suddenly change
640 }
641 do
642 {
643 	// can omit range ends and digit check because of function preconditions
644 	if (c >= 'a')
645 		return (c - 'a') + 10;
646 	else if (c >= 'A')
647 		return (c - 'A') + 10;
648 	else
649 		return c - '0';
650 }
651 
652 private bool isIdentifierChar(char c)
653 {
654 	return isAlphaNum(c) || c == '_';
655 }
656 
657 /// normalizes all line endings with \n, as parsed in D strings
658 private string normalizeNewLines(string text)
659 {
660 	import std.utf : codeLength;
661 
662 	enum exoticLineBreakLength = codeLength!char('\u2028');
663 	static immutable dchar[] nlCharacters = ['\r', '\u2028', '\u2029'];
664 
665 	auto end = text.indexOfAny(nlCharacters);
666 	if (end == -1)
667 		return text;
668 	auto ret = appender!string;
669 	ret.reserve(text.length);
670 	size_t start = 0;
671 	while (end != -1)
672 	{
673 		ret ~= text[start .. end];
674 		ret ~= '\n';
675 		if (end + 1 < text.length && text[end] == '\r' && text[end + 1] == '\n')
676 			end++;
677 		else if (text[end] != '\r')
678 			end += exoticLineBreakLength - 1;
679 		start = end + 1;
680 		end = text.indexOfAny(nlCharacters, start);
681 	}
682 	ret ~= text[start .. $];
683 	return ret.data;
684 }
685 
686 ///
687 unittest
688 {
689 	string testNoChange = "hello\nworld!";
690 	assert(normalizeNewLines(testNoChange).ptr is testNoChange.ptr);
691 
692 	assert(normalizeNewLines("hello\rworld") == "hello\nworld");
693 	assert(normalizeNewLines("hello\r\nworld") == "hello\nworld");
694 	assert(normalizeNewLines("hello\r\n\nworld") == "hello\n\nworld");
695 	assert(normalizeNewLines("hello\u2028\nworld") == "hello\n\nworld");
696 	assert(normalizeNewLines("hello\u2029\nworld") == "hello\n\nworld");
697 	assert(normalizeNewLines("hello\r") == "hello\n");
698 }