Skip to content

Commit

Permalink
Add support for raw strings
Browse files Browse the repository at this point in the history
  • Loading branch information
irh committed Jan 16, 2024
1 parent 10d620f commit bf64ac5
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 31 deletions.
48 changes: 24 additions & 24 deletions grammar.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ module.exports = grammar({
$.comment,
$._string_start,
$._string_end,
$._raw_string_start,
$._raw_string_end,
$._interpolation_start,
$._interpolation_end,
$.error_sentinel,
Expand Down Expand Up @@ -388,8 +390,28 @@ module.exports = grammar({
),

string: $ => choice(
string($, '\''),
string($, '\"'),
seq(
$._string_start,
repeat(choice(
$.escape,
seq('$', $.identifier),
seq(
'${',
$._interpolation_start,
$._expressions,
$._interpolation_end,
'}',
),
/./,
/\s/
)),
$._string_end,
),
seq(
$._raw_string_start,
repeat(/./),
$._raw_string_end,
),
),

if: $ => choice(
Expand Down Expand Up @@ -623,28 +645,6 @@ module.exports = grammar({
}
});

function string($, quote) {
return seq(
$._string_start,
quote,
repeat(choice(
$.escape,
seq('$', $.identifier),
seq(
'${',
$._interpolation_start,
$._expressions,
$._interpolation_end,
'}',
),
/./,
/\s/
)),
$._string_end,
quote,
);
}

function any_amount_of() {
return repeat(seq(...arguments));
}
Expand Down
4 changes: 2 additions & 2 deletions src/grammar.json
Git LFS file not shown
4 changes: 2 additions & 2 deletions src/node-types.json
Git LFS file not shown
4 changes: 2 additions & 2 deletions src/parser.c
Git LFS file not shown
61 changes: 60 additions & 1 deletion src/scanner.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ enum TokenType {
COMMENT,
STRING_START,
STRING_END,
RAW_STRING_START,
RAW_STRING_END,
INTERPOLATION_START,
INTERPOLATION_END,
ERROR_SENTINEL,
Expand Down Expand Up @@ -91,13 +93,15 @@ typedef struct {
// is empty; during an interpolated expression we're within quotes, while parsing
// non-string expressions.
bool in_string;
uint8_t raw_string_hash_count;
} Scanner;

static void initialize_scanner(Scanner* scanner) {
VEC_CLEAR(scanner->indents);
VEC_CLEAR(scanner->quotes);
scanner->block_level_just_changed = false;
scanner->in_string = false;
scanner->raw_string_hash_count = 0;
}

static void skip_whitespace(TSLexer* lexer) {
Expand Down Expand Up @@ -229,9 +233,60 @@ bool tree_sitter_koto_external_scanner_scan(
}

// String start/end detection
if (valid_symbols[STRING_START] && !scanner->in_string
if (valid_symbols[RAW_STRING_START] && !scanner->in_string && next == 'r') {
advance(lexer);
uint8_t hash_count = 0;
while (lexer->lookahead == '#') {
if (hash_count == 255) {
printf("scanner.scan: reached raw string hash limit\n");
return false;
}
hash_count++;
advance(lexer);
}
next = lexer->lookahead;
if (next == '"' || next == '\'') {
printf(">>>> raw string start\n");
advance(lexer);
VEC_PUSH(scanner->quotes, next);
scanner->in_string = true;
scanner->raw_string_hash_count = hash_count;
lexer->mark_end(lexer);
lexer->result_symbol = RAW_STRING_START;
return true;
} else {
printf("scanner.scan: rejected raw string start\n");
return false;
}
} else if (
valid_symbols[RAW_STRING_END] && scanner->in_string
&& next == VEC_BACK(scanner->quotes)) {
printf(">>>> raw string end\n");
advance(lexer);
uint8_t hash_count = 0;
while (lexer->lookahead == '#') {
if (hash_count == 255) {
break;
}
hash_count++;
advance(lexer);
}
if (hash_count != scanner->raw_string_hash_count) {
printf("scanner.scan: rejected raw string end\n");
return false;
}
VEC_POP(scanner->quotes);
scanner->in_string = false;
scanner->raw_string_hash_count = 0;
lexer->mark_end(lexer);
lexer->result_symbol = RAW_STRING_END;
return true;
} else if (
valid_symbols[STRING_START] && !scanner->in_string
&& (next == '"' || next == '\'')) {
printf(">>>> string start\n");
advance(lexer);
lexer->mark_end(lexer);
scanner->in_string = true;
VEC_PUSH(scanner->quotes, next);
lexer->result_symbol = STRING_START;
Expand All @@ -240,6 +295,8 @@ bool tree_sitter_koto_external_scanner_scan(
valid_symbols[STRING_END] && scanner->in_string
&& next == VEC_BACK(scanner->quotes)) {
printf(">>>> string end\n");
advance(lexer);
lexer->mark_end(lexer);
VEC_POP(scanner->quotes);
scanner->in_string = false;
lexer->result_symbol = STRING_END;
Expand Down Expand Up @@ -386,6 +443,7 @@ unsigned tree_sitter_koto_external_scanner_serialize(void* payload, char* buffer

*write_ptr++ = scanner->block_level_just_changed;
*write_ptr++ = scanner->in_string;
*write_ptr++ = scanner->raw_string_hash_count;

return write_ptr - buffer;
}
Expand Down Expand Up @@ -423,6 +481,7 @@ void tree_sitter_koto_external_scanner_deserialize(

scanner->block_level_just_changed = *read_ptr++;
scanner->in_string = *read_ptr++;
scanner->raw_string_hash_count = *read_ptr++;

printf("scanner.deserialize: in_string %i\n", scanner->in_string);
}
Expand Down
18 changes: 18 additions & 0 deletions test/corpus/strings.txt
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,21 @@ escapes
(escape)
)
)

===========================================================================================
raw_strings
===========================================================================================

r'$foo\r\n'
r"'\#"
r#""bar""#
r##"#"hi"#"##

---

(module
(string)
(string)
(string)
(string)
)

0 comments on commit bf64ac5

Please sign in to comment.