Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 22 additions & 13 deletions src/parser/scanner.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { AiScriptSyntaxError, AiScriptUnexpectedEOFError } from '../error.js';
import { decodeUnicodeEscapeSequence } from '../utils/characters.js';
import { decodeUnicodeEscapeSequence, tryDecodeSingleEscapeCharacter } from '../utils/characters.js';
import { CharStream } from './streams/char-stream.js';
import { TOKEN, TokenKind } from './token.js';
import { unexpectedTokenError } from './utils.js';
Expand Down Expand Up @@ -463,6 +463,25 @@ export class Scanner implements ITokenStream {
return;
}

private decodeEscapeSequence(): string {
if (this.stream.eof) {
throw new AiScriptUnexpectedEOFError(this.stream.getPos());
}

if (this.stream.char === 'u') {
const unicodeEscapeSequence = this.readUnicodeEscapeSequence();
return String.fromCharCode(Number.parseInt(unicodeEscapeSequence.slice(1), 16));
}

const decodedSingleEscapeCharacter = tryDecodeSingleEscapeCharacter(this.stream.char);
if (decodedSingleEscapeCharacter != null) {
this.stream.next();
return decodedSingleEscapeCharacter;
}

throw new AiScriptSyntaxError(`invalid escape character: "${this.stream.char}"`, this.stream.getPos());
}

private readUnicodeEscapeSequence(): `u${string}` {
if (this.stream.eof || (this.stream.char as string) !== 'u') {
throw new AiScriptSyntaxError('character "u" expected', this.stream.getPos());
Expand Down Expand Up @@ -569,11 +588,7 @@ export class Scanner implements ITokenStream {
break;
}
case 'escape': {
if (this.stream.eof) {
throw new AiScriptUnexpectedEOFError(pos);
}
value += this.stream.char;
this.stream.next();
value += this.decodeEscapeSequence();
state = 'string';
break;
}
Expand Down Expand Up @@ -632,13 +647,7 @@ export class Scanner implements ITokenStream {
break;
}
case 'escape': {
// エスケープ対象の文字が無いままEOFに達した
if (this.stream.eof) {
throw new AiScriptUnexpectedEOFError(pos);
}
// 普通の文字として取り込み
buf += this.stream.char;
this.stream.next();
buf += this.decodeEscapeSequence();
// 通常の文字列に戻る
state = 'string';
break;
Expand Down
18 changes: 18 additions & 0 deletions src/utils/characters.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,21 @@ export function decodeUnicodeEscapeSequence(string: string): string {

return result;
}

export function tryDecodeSingleEscapeCharacter(s: string): string | null {
switch (s) {
// case 'b': return '\b';
case 't': return '\t';
case 'n': return '\n';
// case 'v': return '\v';
// case 'f': return '\f';
case 'r': return '\r';
case '"': return '"';
case '\'': return '\'';
case '\\': return '\\';
case '`': return '`';
case '{': return '{';
case '}': return '}';
default: return null;
}
}
80 changes: 80 additions & 0 deletions test/literals.ts
Original file line number Diff line number Diff line change
Expand Up @@ -341,3 +341,83 @@ describe('Template syntax', () => {
});
});

describe('Escape sequence', () => {
describe('valid', () => {
const cases: [string, string][] = [
['\\t', '\t'], // horizontal tab
['\\n', '\n'], // line feed
['\\r', '\r'], // carriage return
['\\"', '"'],
['\\\'', '\''],
['\\\\', '\\'],
['\\`', '`'],
['\\{', '{'],
['\\}', '}'],
['\\u0041', 'A'],
['\\u85cd', '藍'],
['\\u85CD', '藍'],
['\\ud842\\udfb7', '𠮷'],
['\\uD842\\uDFB7', '𠮷'],
];

describe('double quote', () => {
test.each(cases)('value of escape sequence "%s" must be "%s"', async (char, expected) => {
const res = await exe(`
<: "${char}"
`);
eq(res, STR(expected));
});
});

describe('single quote', () => {
test.each(cases)('value of escape sequence "%s" must be "%s"', async (char, expected) => {
const res = await exe(`
<: '${char}'
`);
eq(res, STR(expected));
});
});

describe('template', () => {
test.each(cases)('value of escape sequence "%s" must be "%s"', async (string, expected) => {
const res = await exe(`
<: \`${string}\`
`);
eq(res, STR(expected));
});
});
});

describe('invalid', () => {
const cases: [string][] = [
['\\x'],
['\\b'],
['\\v'],
['\\f'],
];

describe('double quote', () => {
test.each(cases)('value of escape sequence "%s" must not be allowed', async (char) => {
await expect(async () => await exe(`
<: "${char}"
`)).rejects.toThrow(AiScriptSyntaxError);
});
});

describe('single quote', () => {
test.each(cases)('value of escape sequence "%s" must not be allowed', async (char) => {
await expect(async () => await exe(`
<: '${char}'
`)).rejects.toThrow(AiScriptSyntaxError);
});
});

describe('template', () => {
test.each(cases)('value of escape sequence "%s" must not be allowed', async (string) => {
await expect(async () => await exe(`
<: \`${string}\`
`)).rejects.toThrow(AiScriptSyntaxError);
});
});
});
});
8 changes: 8 additions & 0 deletions unreleased/str_escape_sequnece.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
- **Breaking change** 文字列リテラルやテンプレートリテラルにおけるエスケープシーケンスの仕様を変更しました。
- 以下のエスケープシーケンスが追加されました。
- `\t` - 水平タブ (U+0009)
- `\n` - 改行 (U+000A)
- `\r` - 復帰 (U+000D)
- `\u`とそれに続く4桁の16進数の英数字 - 与えられた値を持つUTF-16コード単位として解釈されます。
- `\"`, `\'`, `` \` ``, `\{`, `\}` - それぞれ、`\`の直後の文字そのものとなります。
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

\\\ になることも書いてあったほうがいいと思います

- `\`とそれに続く文字列が上記のいずれにも一致しない場合、文法エラーが発生するようになりました。
Loading