Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/bin/syntax.js
Original file line number Diff line number Diff line change
Expand Up @@ -554,7 +554,10 @@ function getGrammar(grammarFile, mode) {
return null;
}

const grammarData = Grammar.dataFromGrammarFile(grammarFile, 'bnf');
const grammarData = Grammar.dataFromGrammarFile(grammarFile, {
Comment thread
misdocumeno marked this conversation as resolved.
grammarType: 'bnf',
useLocation: options.loc,
});

// If explicit lexical grammar file was passed, use it.
const lexGrammarData = getLexGrammarData(options);
Expand Down
32 changes: 27 additions & 5 deletions src/grammar/grammar.js
Original file line number Diff line number Diff line change
Expand Up @@ -176,11 +176,33 @@ export default class Grammar {
* Reads grammar file data. Supports reading `bnf`,
* and `lex` grammars based on mode.
*/
static dataFromGrammarFile(grammarFile, grammarType = 'bnf') {
return Grammar.dataFromString(
fs.readFileSync(grammarFile, 'utf-8'),
grammarType
);
static dataFromGrammarFile(grammarFile, { grammarType = 'bnf', useLocation = false }) {
const grammar = fs.readFileSync(grammarFile, 'utf8');

// check if the bnf grammar contains location capture characters
if (grammarType === 'bnf' && !useLocation) {
const bnf = grammar
// lexer rules
.replace(/%lex[\n\s\S]*?\/lex/g, '')
// comments
.replace(/\/\*[\n\s\S]*?\*\//g, '')
.replace(/\/\/.*?\n/g, '')
//strings
.replace(/'(\\.|[^'\\])*'/g, '')
.replace(/"(\\.|[^"\\])*"/g, '')
.replace(/`(\\.|[^`\\])*`/g, '')
// module include
.replace(/%{[\n\s\S]*?%}/g, '');

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if we need to use all these replace calls, probably just can just do a simple regexp test for:

if (/@\d+/.test(grammar) {
  // Warning
}

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

without those replaces, the warning would be displayed even when finding @ inside strings, comments, lex rules, etc, which isn't good, for example, css uses that character a lot, for media queries and stuff, so it could be part of a lexer rule if someone tries to make a parser for that language. im sure there are more languages that use that character.
also just using \d in the regex would not show the error when using named tokens (which i do a lot)

let me know if you want me to make that change anyway tho, after all its just a warning, not an error

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, sounds good - the only thing we need to make sure those replaces don't slow down much the handling.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i tested it and it doesn't take even 0.015ms in a file of 10 millon characters, at least on my pc. also thats executed only once, and when generating the parser, it won't affect the parser itself at all, so i don't think it's a problem


if (/@\w+/.test(bnf)) {
console.info(colors.red(
'The grammar file contains location capture characters (@), which require the ' +
'"--loc" option, but it has not been provided. The generated parser will throw an error.'
));
}
}

return Grammar.dataFromString(grammar, grammarType);
}

/**
Expand Down