Create new parse-js-unicode.js with latest changes and fixes from HEAD (including unicode suport), and backport other fixes to smaller parse-js.js (except space consuming unicode support).

This commit is contained in:
Jürg Lehni 2011-07-01 11:58:43 +02:00
parent 609f23c64d
commit 230e42ee50
3 changed files with 2116 additions and 112 deletions

2
lib/parse-js-min.js vendored

File diff suppressed because one or more lines are too long

1969
lib/parse-js-unicode.js Normal file

File diff suppressed because it is too large Load diff

View file

@ -140,7 +140,6 @@ var OPERATORS = array_to_hash([
">>=", ">>=",
"<<=", "<<=",
">>>=", ">>>=",
"%=",
"|=", "|=",
"^=", "^=",
"&=", "&=",
@ -158,22 +157,29 @@ var REGEXP_MODIFIERS = array_to_hash(characters("gmsiy"));
/* -----[ Tokenizer ]----- */ /* -----[ Tokenizer ]----- */
function is_alphanumeric_char(ch) { function is_letter(ch) {
ch = ch.charCodeAt(0); ch = ch.charCodeAt(0);
return (ch >= 48 && ch <= 57) || return (ch >= 65 && ch <= 90) ||
(ch >= 65 && ch <= 90) ||
(ch >= 97 && ch <= 122); (ch >= 97 && ch <= 122);
}; };
function is_identifier_char(ch) {
return is_alphanumeric_char(ch) || ch == "$" || ch == "_";
};
function is_digit(ch) { function is_digit(ch) {
ch = ch.charCodeAt(0); ch = ch.charCodeAt(0);
return ch >= 48 && ch <= 57; return ch >= 48 && ch <= 57;
}; };
function is_alphanumeric_char(ch) {
return is_digit(ch) || is_letter(ch);
};
function is_identifier_start(ch) {
return ch == "$" || ch == "_" || is_letter(ch);
};
function is_identifier_char(ch) {
return is_identifier_start(ch) || is_digit(ch);
};
function parse_js_number(num) { function parse_js_number(num) {
if (RE_HEX_NUMBER.test(num)) { if (RE_HEX_NUMBER.test(num)) {
return parseInt(num.substr(2), 16); return parseInt(num.substr(2), 16);
@ -308,7 +314,7 @@ function tokenizer($TEXT) {
if (ch == "+") return after_e; if (ch == "+") return after_e;
after_e = false; after_e = false;
if (ch == ".") { if (ch == ".") {
if (!has_dot) if (!has_dot && !has_x)
return has_dot = true; return has_dot = true;
return false; return false;
} }
@ -486,7 +492,7 @@ function tokenizer($TEXT) {
if (ch == ".") return handle_dot(); if (ch == ".") return handle_dot();
if (ch == "/") return handle_slash(); if (ch == "/") return handle_slash();
if (HOP(OPERATOR_CHARS, ch)) return read_operator(); if (HOP(OPERATOR_CHARS, ch)) return read_operator();
if (is_identifier_char(ch)) return read_word(); if (ch == "\\" || is_identifier_start(ch)) return read_word();
parse_error("Unexpected character '" + ch + "'"); parse_error("Unexpected character '" + ch + "'");
}; };
@ -565,7 +571,7 @@ function NodeWithToken(str, start, end) {
NodeWithToken.prototype.toString = function() { return this.name; }; NodeWithToken.prototype.toString = function() { return this.name; };
function parse($TEXT, strict_mode, embed_tokens) { function parse($TEXT, exigent_mode, embed_tokens) {
var S = { var S = {
input : typeof $TEXT == "string" ? tokenizer($TEXT, true) : $TEXT, input : typeof $TEXT == "string" ? tokenizer($TEXT, true) : $TEXT,
@ -628,7 +634,7 @@ function parse($TEXT, strict_mode, embed_tokens) {
function expect(punc) { return expect_token("punc", punc); }; function expect(punc) { return expect_token("punc", punc); };
function can_insert_semicolon() { function can_insert_semicolon() {
return !strict_mode && ( return !exigent_mode && (
S.token.nlb || is("eof") || is("punc", "}") S.token.nlb || is("eof") || is("punc", "}")
); );
}; };
@ -653,14 +659,17 @@ function parse($TEXT, strict_mode, embed_tokens) {
return str instanceof NodeWithToken ? str : new NodeWithToken(str, start, end); return str instanceof NodeWithToken ? str : new NodeWithToken(str, start, end);
}; };
var statement = embed_tokens ? function() { function maybe_embed_tokens(parser) {
var start = S.token; if (embed_tokens) return function() {
var ast = $statement.apply(this, arguments); var start = S.token;
ast[0] = add_tokens(ast[0], start, prev()); var ast = parser.apply(this, arguments);
return ast; ast[0] = add_tokens(ast[0], start, prev());
} : $statement; return ast;
};
else return parser;
};
function $statement() { var statement = maybe_embed_tokens(function() {
if (is("operator", "/")) { if (is("operator", "/")) {
S.peeked = null; S.peeked = null;
S.token = S.input(true); // force regexp S.token = S.input(true); // force regexp
@ -754,12 +763,12 @@ function parse($TEXT, strict_mode, embed_tokens) {
unexpected(); unexpected();
} }
} }
}; });
function labeled_statement(label) { function labeled_statement(label) {
S.labels.push(label); S.labels.push(label);
var start = S.token, stat = statement(); var start = S.token, stat = statement();
if (strict_mode && !HOP(STATEMENTS_WITH_LABELS, stat[0])) if (exigent_mode && !HOP(STATEMENTS_WITH_LABELS, stat[0]))
unexpected(start); unexpected(start);
S.labels.pop(); S.labels.pop();
return as("label", label, stat); return as("label", label, stat);
@ -770,7 +779,10 @@ function parse($TEXT, strict_mode, embed_tokens) {
}; };
function break_cont(type) { function break_cont(type) {
var name = is("name") ? S.token.value : null; var name;
if (!can_insert_semicolon()) {
name = is("name") ? S.token.value : null;
}
if (name != null) { if (name != null) {
next(); next();
if (!member(name, S.labels)) if (!member(name, S.labels))
@ -784,36 +796,35 @@ function parse($TEXT, strict_mode, embed_tokens) {
function for_() { function for_() {
expect("("); expect("(");
var has_var = is("keyword", "var"); var init = null;
if (has_var) if (!is("punc", ";")) {
next(); init = is("keyword", "var")
if (is("name") && is_token(peek(), "operator", "in")) { ? (next(), var_(true))
// for (i in foo) : expression(true, true);
var name = S.token.value; if (is("operator", "in"))
next(); next(); return for_in(init);
var obj = expression();
expect(")");
return as("for-in", has_var, name, obj, in_loop(statement));
} else {
// classic for
var init = is("punc", ";") ? null : has_var ? var_() : expression();
expect(";");
var test = is("punc", ";") ? null : expression();
expect(";");
var step = is("punc", ")") ? null : expression();
expect(")");
return as("for", init, test, step, in_loop(statement));
} }
return regular_for(init);
}; };
var function_ = embed_tokens ? function() { function regular_for(init) {
var start = prev(); expect(";");
var ast = $function_.apply(this, arguments); var test = is("punc", ";") ? null : expression();
ast[0] = add_tokens(ast[0], start, prev()); expect(";");
return ast; var step = is("punc", ")") ? null : expression();
} : $function_; expect(")");
return as("for", init, test, step, in_loop(statement));
};
function $function_(in_statement) { function for_in(init) {
var lhs = init[0] == "var" ? as("name", init[1][0]) : init;
next();
var obj = expression();
expect(")");
return as("for-in", init, lhs, obj, in_loop(statement));
};
var function_ = maybe_embed_tokens(function(in_statement) {
var name = is("name") ? prog1(S.token.value, next) : null; var name = is("name") ? prog1(S.token.value, next) : null;
if (in_statement && !name) if (in_statement && !name)
unexpected(); unexpected();
@ -841,7 +852,7 @@ function parse($TEXT, strict_mode, embed_tokens) {
S.in_loop = loop; S.in_loop = loop;
return a; return a;
})()); })());
}; });
function if_() { function if_() {
var cond = parenthesised(), body = statement(), belse; var cond = parenthesised(), body = statement(), belse;
@ -910,7 +921,7 @@ function parse($TEXT, strict_mode, embed_tokens) {
return as("try", body, bcatch, bfinally); return as("try", body, bcatch, bfinally);
}; };
function vardefs() { function vardefs(no_in) {
var a = []; var a = [];
for (;;) { for (;;) {
if (!is("name")) if (!is("name"))
@ -919,7 +930,7 @@ function parse($TEXT, strict_mode, embed_tokens) {
next(); next();
if (is("operator", "=")) { if (is("operator", "=")) {
next(); next();
a.push([ name, expression(false) ]); a.push([ name, expression(false, no_in) ]);
} else { } else {
a.push([ name ]); a.push([ name ]);
} }
@ -930,8 +941,8 @@ function parse($TEXT, strict_mode, embed_tokens) {
return a; return a;
}; };
function var_() { function var_(no_in) {
return as("var", vardefs()); return as("var", vardefs(no_in));
}; };
function const_() { function const_() {
@ -949,7 +960,7 @@ function parse($TEXT, strict_mode, embed_tokens) {
return subscripts(as("new", newexp, args), true); return subscripts(as("new", newexp, args), true);
}; };
function expr_atom(allow_calls) { var expr_atom = maybe_embed_tokens(function(allow_calls) {
if (is("operator", "new")) { if (is("operator", "new")) {
next(); next();
return new_(); return new_();
@ -984,7 +995,7 @@ function parse($TEXT, strict_mode, embed_tokens) {
return subscripts(prog1(atom, next), allow_calls); return subscripts(prog1(atom, next), allow_calls);
} }
unexpected(); unexpected();
}; });
function expr_list(closing, allow_trailing_comma, allow_empty) { function expr_list(closing, allow_trailing_comma, allow_empty) {
var first = true, a = []; var first = true, a = [];
@ -1002,14 +1013,14 @@ function parse($TEXT, strict_mode, embed_tokens) {
}; };
function array_() { function array_() {
return as("array", expr_list("]", !strict_mode, true)); return as("array", expr_list("]", !exigent_mode, true));
}; };
function object_() { function object_() {
var first = true, a = []; var first = true, a = [];
while (!is("punc", "}")) { while (!is("punc", "}")) {
if (first) first = false; else expect(","); if (first) first = false; else expect(",");
if (!strict_mode && is("punc", "}")) if (!exigent_mode && is("punc", "}"))
// allow trailing comma // allow trailing comma
break; break;
var type = S.token.type; var type = S.token.type;
@ -1072,64 +1083,68 @@ function parse($TEXT, strict_mode, embed_tokens) {
return as(tag, op, expr); return as(tag, op, expr);
}; };
function expr_op(left, min_prec) { function expr_op(left, min_prec, no_in) {
var op = is("operator") ? S.token.value : null; var op = is("operator") ? S.token.value : null;
if (op && op == "in" && no_in) op = null;
var prec = op != null ? PRECEDENCE[op] : null; var prec = op != null ? PRECEDENCE[op] : null;
if (prec != null && prec > min_prec) { if (prec != null && prec > min_prec) {
next(); next();
var right = expr_op(expr_atom(true), prec); var right = expr_op(expr_atom(true), prec, no_in);
return expr_op(as("binary", op, left, right), min_prec); return expr_op(as("binary", op, left, right), min_prec, no_in);
} }
return left; return left;
}; };
function expr_ops() { function expr_ops(no_in) {
return expr_op(expr_atom(true), 0); return expr_op(expr_atom(true), 0, no_in);
}; };
function maybe_conditional() { function maybe_conditional(no_in) {
var expr = expr_ops(); var expr = expr_ops(no_in);
if (is("operator", "?")) { if (is("operator", "?")) {
next(); next();
var yes = expression(false); var yes = expression(false);
expect(":"); expect(":");
return as("conditional", expr, yes, expression(false)); return as("conditional", expr, yes, expression(false, no_in));
} }
return expr; return expr;
}; };
function is_assignable(expr) { function is_assignable(expr) {
if (!exigent_mode) return true;
switch (expr[0]) { switch (expr[0]) {
case "dot": case "dot":
case "sub": case "sub":
case "new":
case "call":
return true; return true;
case "name": case "name":
return expr[1] != "this"; return expr[1] != "this";
} }
}; };
function maybe_assign() { function maybe_assign(no_in) {
var left = maybe_conditional(), val = S.token.value; var left = maybe_conditional(no_in), val = S.token.value;
if (is("operator") && HOP(ASSIGNMENT, val)) { if (is("operator") && HOP(ASSIGNMENT, val)) {
if (is_assignable(left)) { if (is_assignable(left)) {
next(); next();
return as("assign", ASSIGNMENT[val], left, maybe_assign()); return as("assign", ASSIGNMENT[val], left, maybe_assign(no_in));
} }
croak("Invalid assignment"); croak("Invalid assignment");
} }
return left; return left;
}; };
function expression(commas) { var expression = maybe_embed_tokens(function(commas, no_in) {
if (arguments.length == 0) if (arguments.length == 0)
commas = true; commas = true;
var expr = maybe_assign(); var expr = maybe_assign(no_in);
if (commas && is("punc", ",")) { if (commas && is("punc", ",")) {
next(); next();
return as("seq", expr, expression()); return as("seq", expr, expression(true, no_in));
} }
return expr; return expr;
}; });
function in_loop(cont) { function in_loop(cont) {
try { try {
@ -1159,6 +1174,12 @@ function ast_walker() {
return a; return a;
}) ]; }) ];
}; };
function _block(statements) {
var out = [ this[0] ];
if (statements != null)
out.push(MAP(statements, walk));
return out;
};
var walkers = { var walkers = {
"string": function(str) { "string": function(str) {
return [ this[0], str ]; return [ this[0], str ];
@ -1172,12 +1193,8 @@ function ast_walker() {
"toplevel": function(statements) { "toplevel": function(statements) {
return [ this[0], MAP(statements, walk) ]; return [ this[0], MAP(statements, walk) ];
}, },
"block": function(statements) { "block": _block,
var out = [ this[0] ]; "splice": _block,
if (statements != null)
out.push(MAP(statements, walk));
return out;
},
"var": _vardefs, "var": _vardefs,
"const": _vardefs, "const": _vardefs,
"try": function(t, c, f) { "try": function(t, c, f) {
@ -1230,8 +1247,8 @@ function ast_walker() {
"for": function(init, cond, step, block) { "for": function(init, cond, step, block) {
return [ this[0], walk(init), walk(cond), walk(step), walk(block) ]; return [ this[0], walk(init), walk(cond), walk(step), walk(block) ];
}, },
"for-in": function(has_var, key, hash, block) { "for-in": function(vvar, key, hash, block) {
return [ this[0], has_var, key, walk(hash), walk(block) ]; return [ this[0], walk(vvar), walk(key), walk(hash), walk(block) ];
}, },
"while": function(cond, block) { "while": function(cond, block) {
return [ this[0], walk(cond), walk(block) ]; return [ this[0], walk(cond), walk(block) ];
@ -1340,6 +1357,7 @@ function empty(b) {
var DOT_CALL_NO_PARENS = array_to_hash([ var DOT_CALL_NO_PARENS = array_to_hash([
"name", "name",
"array", "array",
"object",
"string", "string",
"dot", "dot",
"sub", "sub",
@ -1362,29 +1380,34 @@ function make_string(str) {
} }
return s; return s;
}); });
if (dq > sq) { if (dq > sq) return "'" + str.replace(/\x27/g, "\\'") + "'";
return "'" + str.replace(/\x27/g, "\\'") + "'"; else return '"' + str.replace(/\x22/g, '\\"') + '"';
} else {
return '"' + str.replace(/\x22/g, '\\"') + '"';
}
}; };
function gen_code(ast, beautify) { var SPLICE_NEEDS_BRACKETS = array_to_hash([ "if", "while", "do", "for", "for-in", "with" ]);
if (beautify) beautify = defaults(beautify, {
function gen_code(ast, options) {
options = defaults(options, {
indent_start : 0, indent_start : 0,
indent_level : 4, indent_level : 4,
quote_keys : false, quote_keys : false,
space_colon : false space_colon : false,
beautify : false
}); });
var beautify = !!options.beautify;
var indentation = 0, var indentation = 0,
newline = beautify ? "\n" : "", newline = beautify ? "\n" : "",
space = beautify ? " " : ""; space = beautify ? " " : "";
function make_name(name) {
return name.toString();
};
function indent(line) { function indent(line) {
if (line == null) if (line == null)
line = ""; line = "";
if (beautify) if (beautify)
line = new Array(beautify.indent_start + indentation * beautify.indent_level).join(" ") + line; line = repeat_string(" ", options.indent_start + indentation * options.indent_level) + line;
return line; return line;
}; };
@ -1438,7 +1461,7 @@ function gen_code(ast, beautify) {
}; };
function needs_parens(expr) { function needs_parens(expr) {
if (expr[0] == "function") { if (expr[0] == "function" || expr[0] == "object") {
// dot/call on a literal function requires the // dot/call on a literal function requires the
// function literal itself to be parenthesized // function literal itself to be parenthesized
// only if it's the first "thing" in a // only if it's the first "thing" in a
@ -1450,9 +1473,8 @@ function gen_code(ast, beautify) {
var a = slice($stack), self = a.pop(), p = a.pop(); var a = slice($stack), self = a.pop(), p = a.pop();
while (p) { while (p) {
if (p[0] == "stat") return true; if (p[0] == "stat") return true;
if ((p[0] == "seq" && p[1] === self) || if (((p[0] == "seq" || p[0] == "call" || p[0] == "dot" || p[0] == "sub" || p[0] == "conditional") && p[1] === self) ||
(p[0] == "call" && p[1] === self) || ((p[0] == "binary" || p[0] == "assign" || p[0] == "unary-postfix") && p[2] === self)) {
(p[0] == "binary" && p[2] === self)) {
self = p; self = p;
p = a.pop(); p = a.pop();
} else { } else {
@ -1486,6 +1508,19 @@ function gen_code(ast, beautify) {
return make_block_statements(statements) return make_block_statements(statements)
.join(newline + newline); .join(newline + newline);
}, },
"splice": function(statements) {
var parent = $stack[$stack.length - 2][0];
if (HOP(SPLICE_NEEDS_BRACKETS, parent)) {
// we need block brackets in this case
return make_block.apply(this, arguments);
} else {
return MAP(make_block_statements(statements, true),
function(line, i) {
// the first line is already indented
return i > 0 ? indent(line) : line;
}).join(newline);
}
},
"block": make_block, "block": make_block,
"var": function(defs) { "var": function(defs) {
return "var " + add_commas(MAP(defs, make_1vardef)) + ";"; return "var " + add_commas(MAP(defs, make_1vardef)) + ";";
@ -1547,9 +1582,10 @@ function gen_code(ast, beautify) {
}, },
"dot": function(expr) { "dot": function(expr) {
var out = make(expr), i = 1; var out = make(expr), i = 1;
if (expr[0] == "num") if (expr[0] == "num") {
out += "."; if (!/\./.test(expr[1]))
else if (needs_parens(expr)) out += ".";
} else if (needs_parens(expr))
out = "(" + out + ")"; out = "(" + out + ")";
while (i < arguments.length) while (i < arguments.length)
out += "." + make_name(arguments[i++]); out += "." + make_name(arguments[i++]);
@ -1582,12 +1618,11 @@ function gen_code(ast, beautify) {
out.push("(" + args + ")", make(block)); out.push("(" + args + ")", make(block));
return add_spaces(out); return add_spaces(out);
}, },
"for-in": function(has_var, key, hash, block) { "for-in": function(vvar, key, hash, block) {
var out = add_spaces([ "for", "(" ]); return add_spaces([ "for", "(" +
if (has_var) (vvar ? make(vvar).replace(/;+$/, "") : make(key)),
out += "var "; "in",
out += add_spaces([ make_name(key) + " in " + make(hash) + ")", make(block) ]); make(hash) + ")", make(block) ]);
return out;
}, },
"while": function(condition, block) { "while": function(condition, block) {
return add_spaces([ "while", "(" + make(condition) + ")", make(block) ]); return add_spaces([ "while", "(" + make(condition) + ")", make(block) ]);
@ -1645,7 +1680,7 @@ function gen_code(ast, beautify) {
return indent(make_function(p[0], p[1][2], p[1][3], p[2])); return indent(make_function(p[0], p[1][2], p[1][3], p[2]));
} }
var key = p[0], val = make(p[1]); var key = p[0], val = make(p[1]);
if (beautify && beautify.quote_keys) { if (options.quote_keys) {
key = make_string(key); key = make_string(key);
} else if ((typeof key == "number" || !beautify && +key + "" == key) } else if ((typeof key == "number" || !beautify && +key + "" == key)
&& parseFloat(key) >= 0) { && parseFloat(key) >= 0) {
@ -1653,7 +1688,7 @@ function gen_code(ast, beautify) {
} else if (!is_identifier(key)) { } else if (!is_identifier(key)) {
key = make_string(key); key = make_string(key);
} }
return indent(add_spaces(beautify && beautify.space_colon return indent(add_spaces(beautify && options.space_colon
? [ key, ":", val ] ? [ key, ":", val ]
: [ key + ":", val ])); : [ key + ":", val ]));
}).join("," + newline); }).join("," + newline);
@ -1726,11 +1761,7 @@ function gen_code(ast, beautify) {
return add_spaces([ out, make_block(body) ]); return add_spaces([ out, make_block(body) ]);
}; };
function make_name(name) { function make_block_statements(statements, noindent) {
return name.toString();
};
function make_block_statements(statements) {
for (var a = [], last = statements.length - 1, i = 0; i <= last; ++i) { for (var a = [], last = statements.length - 1, i = 0; i <= last; ++i) {
var stat = statements[i]; var stat = statements[i];
var code = make(stat); var code = make(stat);
@ -1748,7 +1779,7 @@ function gen_code(ast, beautify) {
a.push(code); a.push(code);
} }
} }
return MAP(a, indent); return noindent ? a : MAP(a, indent);
}; };
function make_switch_block(body) { function make_switch_block(body) {
@ -1779,7 +1810,7 @@ function gen_code(ast, beautify) {
function make_1vardef(def) { function make_1vardef(def) {
var name = def[0], val = def[1]; var name = def[0], val = def[1];
if (val != null) if (val != null)
name = add_spaces([ name, "=", make(val) ]); name = add_spaces([ make_name(name), "=", parenthesize(val, "seq") ]);
return name; return name;
}; };
@ -1836,6 +1867,10 @@ function member(name, array) {
return false; return false;
}; };
function repeat_string(str, i) {
return i < 1 ? "" : new Array(i + 1).join(str);
};
function defaults(args, defs) { function defaults(args, defs) {
var ret = {}; var ret = {};
if (args === true) if (args === true)