Doesn't handle embedded comments #1

chocolateboy · 2015-03-21T23:39:28Z

There are several issues with the regex:

It doesn't handle embedded comments
It considers functionfoo () { } to be a valid function declaration
~~Nit: \{([\w\W\s\S]*)\} is a roundabout way of saying \{(.*)\} (though I'd go with \{(.*?)\})~~

test

var re = require('function-regex')();

function /* 1 */ test /* 2 */ (/* 3 */ foo /* 4 */ ) /* 5 */ {
    return foo;
}

var match1 = re.exec(test.toString());
var match2 = re.exec('functionfoo(){}');

console.log("should match:", match1); // should match but doesn't
console.log("shouldn't match:", match2); // shouldn't match but does

output:

should match: null
shouldn't match: [ 'functionfoo(){}',
  'foo',
  '',
  '',
  index: 0,
  input: 'functionfoo(){}' ]

The text was updated successfully, but these errors were encountered:

jonschlinkert · 2015-03-22T00:38:45Z

hmm, seems like a reasonable expectation. also seems like comment matching or stripping would be beyond the scope of this though. not sure what others think. perhaps we can add something to the readme that suggests that users strip comments first?

tunnckoCore · 2015-03-22T02:46:28Z

also seems like comment matching or stripping would be beyond the scope of this though ... perhaps we can add something to the readme that suggests that users strip comments first?

👍

tunnckoCore · 2015-03-22T02:57:59Z

I think the only thing that we should change is first \s* to \s+ because the

var match2 = re.exec('functionfoo(){}');

use case

tunnckoCore · 2015-03-22T03:08:05Z

I think the only thing that we should change is first \s* to \s+

haha, but it's intentional.. lol because the function() {} case.

blakeembrey · 2015-03-22T03:22:43Z

@tunnckoCore Off the top of my head, use: (?:\s+[\w$][\w\d$]*)?\s*

E.g. /^function(?:\s+[\w$][\w\d$]*)?\s*$([\w\s,$]*)$\s*\{([\w\W\s\S]*)\}$/

tunnckoCore · 2015-03-22T04:09:24Z

@blakeembrey Hmm.. nope exactly.
@chocolateboy http://regexr.com/3albo what you think?

More use cases?

tunnckoCore · 2015-03-22T04:12:00Z

Always.. regexes are not enough.

chocolateboy · 2015-03-22T04:44:58Z

Always.. regexes not enough.

I can't think of any reason why a regex shouldn't work (and correctly handle comments) for ES5 functions, but it won't work for ES6 functions:

A regex will never be able to handle destructured parameters in the general case, since regular languages don't support balanced delimiters.

chocolateboy · 2015-03-22T06:22:58Z

I was wrong about the "nit". I'd forgotten that JavaScript doesn't provide a way to make . match newline. Still, [\s\S] matches everything and [\w\W] matches everything, so only one is needed.

chocolateboy · 2015-03-22T06:23:24Z

Something like this should work (though note the parameter list includes comments):

^ function (?: space+ (ident) )? space* \( space* (params)? space* \) space* \{ (body) \} $

e.g.:

generate

function /* 1 */ test /* 2 */ ( /* 3 */ foo /* 4 */ , /* 5 */ bar /* 6 */ ) /* 7 */ {
    return foo;
}

function str (re) {
    return re.toString().slice(1, -1);
}

var fn     = str(/^ function (?: space+ (ident) )? space* \( space* (params)? space* \) space* \{ (body) \} $ /);
var params = str(/ ident (?: space* , space* ident )* /);
var space  = str(/ (?: (?: \s+ ) | (?: \/ \* [\s\S]*? \* \/ ) | (?: \/\/ [^\r\n]* ) ) /);
var ident  = str(/ (?: [a-zA-Z_$] [\w$]* ) /);
var body   = str(/ (?: [\s\S]*? ) /);

fn = fn.replace(/params/g, params)
    .replace(/space/g, space)
    .replace(/ident/g, ident)
    .replace(/body/g, body)
    .replace(/\s+/g, '');

var re = new RegExp(fn);

console.log("test: %j", test.toString());
console.log(re);
console.log(re.exec(test.toString()))

function

test: "function test( /* 3 */ foo /* 4 */ , /* 5 */ bar /* 6 */ ) /* 7 */ {\n    return foo;\n}"

regex

/^function(?:(?:(?:\s+)|(?:\/\*[\s\S]*?\*\/)|(?:\/\/[^\r\n]*))+((?:[a-zA-Z_$][\w$]*)))?(?:(?:\s+)|(?:\/\*[\s\S]*?\*\/)|(?:\/\/[^\r\n]*))*\((?:(?:\s+)|(?:\/\*[\s\S]*?\*\/)|(?:\/\/[^\r\n]*))*((?:[a-zA-Z_$][\w$]*)(?:(?:(?:\s+)|(?:\/\*[\s\S]*?\*\/)|(?:\/\/[^\r\n]*))*,(?:(?:\s+)|(?:\/\*[\s\S]*?\*\/)|(?:\/\/[^\r\n]*))*(?:[a-zA-Z_$][\w$]*))*)?(?:(?:\s+)|(?:\/\*[\s\S]*?\*\/)|(?:\/\/[^\r\n]*))*\)(?:(?:\s+)|(?:\/\*[\s\S]*?\*\/)|(?:\/\/[^\r\n]*))*\{((?:[\s\S]*?))\}$/

match

[ 'function test( /* 3 */ foo /* 4 */ , /* 5 */ bar /* 6 */ ) /* 7 */ {\n    return foo;\n}',
  'test',
  'foo /* 4 */ , /* 5 */ bar',
  '\n    return foo;\n',
  index: 0,
  input: 'function test( /* 3 */ foo /* 4 */ , /* 5 */ bar /* 6 */ ) /* 7 */ {\n    return foo;\n}' ]

tunnckoCore · 2015-03-22T10:30:06Z

Hm. Looks good, but what about /* 3 */ and /* 6 */ comments? If we will accept comments and include them in response why only these in the middle?

chocolateboy · 2015-03-22T12:22:32Z

what about /* 3 / and / 6 */ comments?

The regex originally included them. Put them back in if you want them :-)

var fn     = str(/^ function (?: space+ (ident) )? space* \( (params)? \) space* \{ (body) \} $ /);
var params = str(/ space* ident (?: space* , space* ident )* space* /);

[ 'function test( /* 3 */ foo /* 4 */ , /* 5 */ bar /* 6 */ ) /* 7 */ {\n    return foo;\n}',
  'test',
  ' /* 3 */ foo /* 4 */ , /* 5 */ bar /* 6 */ ',
  '\n    return foo;\n',
  index: 0,
  input: 'function test( /* 3 */ foo /* 4 */ , /* 5 */ bar /* 6 */ ) /* 7 */ {\n    return foo;\n}' ]

tunnckoCore · 2015-03-22T13:12:32Z

Hm, good but I keep thinking of that

also seems like comment matching or stripping would be beyond the scope of this though

we already have libs for stripping comments, so its normal first to strip them after that using this regex.

I dont know, others @regexhq/owners ?

tunnckoCore · 2015-03-22T13:13:02Z

but we should only handle this case 'functionfoo(){}'

chocolateboy · 2015-03-22T14:30:16Z

but we should only handle this case 'functionfoo(){}'

That's already handled by this regex, as well as other fixes e.g.:

current regex

// invalid identifiers: shouldn't match, but does
require('function-regex')().exec("function 1 (2, 3, 4) { }")

[ 'function 1 (2, 3, 4) { }',
  '1',
  '2, 3, 4',
  ' ',
  index: 0,
  input: 'function 1 (2, 3, 4) { }' ]

this regex

// invalid identifiers: doesn't match
re.exec("function 1 (2, 3, 4) { }")

null

At any rate, this approach is designed to be easy to read, understand and modify, which is more than can be said for hacking on the regex directly. If you're happy for this regex to break on comments, you can just remove them from the space pattern:

var space = str(/ (?: \s+ ) /);

jonschlinkert · 2015-03-22T15:52:16Z

Still, [\s\S] matches everything and [\w\W] matches everything, so only one is needed.

I noticed that as well.

Before we go to far down this path, what are the use cases we're discussing? IMHO the regex should do what's necessary to meet the requirements of the lib. is this going to be used in esprima or acorn? is the penalty of stripping comments first too much for this too be useful? or should it be used for fast parsing for code context?

I'm not really nitpicking on this, I think the dialog is great. The bigger point is that the lib should have a well-defined purpose. that will answer your questions

chocolateboy · 2015-03-22T16:00:20Z

is this going to be used in esprima or acorn?

? Why would it be used in a parser? Parsers are tools for doing this correctly in all cases. This is a tool for doing it quickly in cases where a) it's sufficient (i.e. ES5 functions) and b) the documented exceptions (e.g. not supporting comments) don't matter.

jonschlinkert · 2015-03-22T16:03:18Z

right, that's my point

tunnckoCore · 2015-03-22T16:15:47Z

I use it in parse-function and I think we should not handle cases with comments and add notice in the readme to strip them before use the regex.
Because if we handle these cases.. the regex comes too big for me.

tunnckoCore · 2015-03-22T16:17:31Z

btw function-to-string is awesome name.. when it transform string (function as string) to object LOL!

chocolateboy · 2015-03-22T16:27:13Z

I use it in parse-function

There's no harm in the name, but that's not the kind of parser I was referring to. I wouldn't expect Acorn or Esprima to silently choke on such a fundamental feature of valid/real-world JavaScript as comments :-)

btw function-to-string is awesome name.. when it transform string (function as string) to object LOL!

My thoughts exactly :-)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Doesn't handle embedded comments #1

Doesn't handle embedded comments #1

chocolateboy commented Mar 21, 2015

jonschlinkert commented Mar 22, 2015

tunnckoCore commented Mar 22, 2015

tunnckoCore commented Mar 22, 2015

tunnckoCore commented Mar 22, 2015

blakeembrey commented Mar 22, 2015

tunnckoCore commented Mar 22, 2015

tunnckoCore commented Mar 22, 2015

chocolateboy commented Mar 22, 2015

chocolateboy commented Mar 22, 2015

chocolateboy commented Mar 22, 2015

tunnckoCore commented Mar 22, 2015

chocolateboy commented Mar 22, 2015

tunnckoCore commented Mar 22, 2015 •

edited

Loading

tunnckoCore commented Mar 22, 2015

chocolateboy commented Mar 22, 2015

jonschlinkert commented Mar 22, 2015

chocolateboy commented Mar 22, 2015

jonschlinkert commented Mar 22, 2015

tunnckoCore commented Mar 22, 2015

tunnckoCore commented Mar 22, 2015

chocolateboy commented Mar 22, 2015

Doesn't handle embedded comments #1

Doesn't handle embedded comments #1

Comments

chocolateboy commented Mar 21, 2015

test

output:

jonschlinkert commented Mar 22, 2015

tunnckoCore commented Mar 22, 2015

tunnckoCore commented Mar 22, 2015

tunnckoCore commented Mar 22, 2015

blakeembrey commented Mar 22, 2015

tunnckoCore commented Mar 22, 2015

tunnckoCore commented Mar 22, 2015

chocolateboy commented Mar 22, 2015

chocolateboy commented Mar 22, 2015

chocolateboy commented Mar 22, 2015

generate

function

regex

match

tunnckoCore commented Mar 22, 2015

chocolateboy commented Mar 22, 2015

tunnckoCore commented Mar 22, 2015 • edited Loading

tunnckoCore commented Mar 22, 2015

chocolateboy commented Mar 22, 2015

current regex

this regex

jonschlinkert commented Mar 22, 2015

chocolateboy commented Mar 22, 2015

jonschlinkert commented Mar 22, 2015

tunnckoCore commented Mar 22, 2015

tunnckoCore commented Mar 22, 2015

chocolateboy commented Mar 22, 2015

tunnckoCore commented Mar 22, 2015 •

edited

Loading