Create new parse-js-unicode.js with latest changes and fixes from HEAD (including unicode suport), and backport other fixes to smaller parse-js.js (except space consuming unicode support).

This commit is contained in:
Jürg Lehni 2011-07-01 11:58:43 +02:00
parent 609f23c64d
commit 230e42ee50
3 changed files with 2116 additions and 112 deletions

2
lib/parse-js-min.js vendored

File diff suppressed because one or more lines are too long

1969
lib/parse-js-unicode.js Normal file

File diff suppressed because it is too large Load diff

View file

@ -140,7 +140,6 @@ var OPERATORS = array_to_hash([
">>=",
"<<=",
">>>=",
"%=",
"|=",
"^=",
"&=",
@ -158,22 +157,29 @@ var REGEXP_MODIFIERS = array_to_hash(characters("gmsiy"));
/* -----[ Tokenizer ]----- */
function is_alphanumeric_char(ch) {
function is_letter(ch) {
ch = ch.charCodeAt(0);
return (ch >= 48 && ch <= 57) ||
(ch >= 65 && ch <= 90) ||
return (ch >= 65 && ch <= 90) ||
(ch >= 97 && ch <= 122);
};
function is_identifier_char(ch) {
return is_alphanumeric_char(ch) || ch == "$" || ch == "_";
};
function is_digit(ch) {
ch = ch.charCodeAt(0);
return ch >= 48 && ch <= 57;
};
function is_alphanumeric_char(ch) {
return is_digit(ch) || is_letter(ch);
};
function is_identifier_start(ch) {
return ch == "$" || ch == "_" || is_letter(ch);
};
function is_identifier_char(ch) {
return is_identifier_start(ch) || is_digit(ch);
};
function parse_js_number(num) {
if (RE_HEX_NUMBER.test(num)) {
return parseInt(num.substr(2), 16);
@ -308,7 +314,7 @@ function tokenizer($TEXT) {
if (ch == "+") return after_e;
after_e = false;
if (ch == ".") {
if (!has_dot)
if (!has_dot && !has_x)
return has_dot = true;
return false;
}
@ -486,7 +492,7 @@ function tokenizer($TEXT) {
if (ch == ".") return handle_dot();
if (ch == "/") return handle_slash();
if (HOP(OPERATOR_CHARS, ch)) return read_operator();
if (is_identifier_char(ch)) return read_word();
if (ch == "\\" || is_identifier_start(ch)) return read_word();
parse_error("Unexpected character '" + ch + "'");
};
@ -565,7 +571,7 @@ function NodeWithToken(str, start, end) {
NodeWithToken.prototype.toString = function() { return this.name; };
function parse($TEXT, strict_mode, embed_tokens) {
function parse($TEXT, exigent_mode, embed_tokens) {
var S = {
input : typeof $TEXT == "string" ? tokenizer($TEXT, true) : $TEXT,
@ -628,7 +634,7 @@ function parse($TEXT, strict_mode, embed_tokens) {
function expect(punc) { return expect_token("punc", punc); };
function can_insert_semicolon() {
return !strict_mode && (
return !exigent_mode && (
S.token.nlb || is("eof") || is("punc", "}")
);
};
@ -653,14 +659,17 @@ function parse($TEXT, strict_mode, embed_tokens) {
return str instanceof NodeWithToken ? str : new NodeWithToken(str, start, end);
};
var statement = embed_tokens ? function() {
var start = S.token;
var ast = $statement.apply(this, arguments);
ast[0] = add_tokens(ast[0], start, prev());
return ast;
} : $statement;
function maybe_embed_tokens(parser) {
if (embed_tokens) return function() {
var start = S.token;
var ast = parser.apply(this, arguments);
ast[0] = add_tokens(ast[0], start, prev());
return ast;
};
else return parser;
};
function $statement() {
var statement = maybe_embed_tokens(function() {
if (is("operator", "/")) {
S.peeked = null;
S.token = S.input(true); // force regexp
@ -754,12 +763,12 @@ function parse($TEXT, strict_mode, embed_tokens) {
unexpected();
}
}
};
});
function labeled_statement(label) {
S.labels.push(label);
var start = S.token, stat = statement();
if (strict_mode && !HOP(STATEMENTS_WITH_LABELS, stat[0]))
if (exigent_mode && !HOP(STATEMENTS_WITH_LABELS, stat[0]))
unexpected(start);
S.labels.pop();
return as("label", label, stat);
@ -770,7 +779,10 @@ function parse($TEXT, strict_mode, embed_tokens) {
};
function break_cont(type) {
var name = is("name") ? S.token.value : null;
var name;
if (!can_insert_semicolon()) {
name = is("name") ? S.token.value : null;
}
if (name != null) {
next();
if (!member(name, S.labels))
@ -784,36 +796,35 @@ function parse($TEXT, strict_mode, embed_tokens) {
function for_() {
expect("(");
var has_var = is("keyword", "var");
if (has_var)
next();
if (is("name") && is_token(peek(), "operator", "in")) {
// for (i in foo)
var name = S.token.value;
next(); next();
var obj = expression();
expect(")");
return as("for-in", has_var, name, obj, in_loop(statement));
} else {
// classic for
var init = is("punc", ";") ? null : has_var ? var_() : expression();
expect(";");
var test = is("punc", ";") ? null : expression();
expect(";");
var step = is("punc", ")") ? null : expression();
expect(")");
return as("for", init, test, step, in_loop(statement));
var init = null;
if (!is("punc", ";")) {
init = is("keyword", "var")
? (next(), var_(true))
: expression(true, true);
if (is("operator", "in"))
return for_in(init);
}
return regular_for(init);
};
var function_ = embed_tokens ? function() {
var start = prev();
var ast = $function_.apply(this, arguments);
ast[0] = add_tokens(ast[0], start, prev());
return ast;
} : $function_;
function regular_for(init) {
expect(";");
var test = is("punc", ";") ? null : expression();
expect(";");
var step = is("punc", ")") ? null : expression();
expect(")");
return as("for", init, test, step, in_loop(statement));
};
function $function_(in_statement) {
function for_in(init) {
var lhs = init[0] == "var" ? as("name", init[1][0]) : init;
next();
var obj = expression();
expect(")");
return as("for-in", init, lhs, obj, in_loop(statement));
};
var function_ = maybe_embed_tokens(function(in_statement) {
var name = is("name") ? prog1(S.token.value, next) : null;
if (in_statement && !name)
unexpected();
@ -841,7 +852,7 @@ function parse($TEXT, strict_mode, embed_tokens) {
S.in_loop = loop;
return a;
})());
};
});
function if_() {
var cond = parenthesised(), body = statement(), belse;
@ -910,7 +921,7 @@ function parse($TEXT, strict_mode, embed_tokens) {
return as("try", body, bcatch, bfinally);
};
function vardefs() {
function vardefs(no_in) {
var a = [];
for (;;) {
if (!is("name"))
@ -919,7 +930,7 @@ function parse($TEXT, strict_mode, embed_tokens) {
next();
if (is("operator", "=")) {
next();
a.push([ name, expression(false) ]);
a.push([ name, expression(false, no_in) ]);
} else {
a.push([ name ]);
}
@ -930,8 +941,8 @@ function parse($TEXT, strict_mode, embed_tokens) {
return a;
};
function var_() {
return as("var", vardefs());
function var_(no_in) {
return as("var", vardefs(no_in));
};
function const_() {
@ -949,7 +960,7 @@ function parse($TEXT, strict_mode, embed_tokens) {
return subscripts(as("new", newexp, args), true);
};
function expr_atom(allow_calls) {
var expr_atom = maybe_embed_tokens(function(allow_calls) {
if (is("operator", "new")) {
next();
return new_();
@ -984,7 +995,7 @@ function parse($TEXT, strict_mode, embed_tokens) {
return subscripts(prog1(atom, next), allow_calls);
}
unexpected();
};
});
function expr_list(closing, allow_trailing_comma, allow_empty) {
var first = true, a = [];
@ -1002,14 +1013,14 @@ function parse($TEXT, strict_mode, embed_tokens) {
};
function array_() {
return as("array", expr_list("]", !strict_mode, true));
return as("array", expr_list("]", !exigent_mode, true));
};
function object_() {
var first = true, a = [];
while (!is("punc", "}")) {
if (first) first = false; else expect(",");
if (!strict_mode && is("punc", "}"))
if (!exigent_mode && is("punc", "}"))
// allow trailing comma
break;
var type = S.token.type;
@ -1072,64 +1083,68 @@ function parse($TEXT, strict_mode, embed_tokens) {
return as(tag, op, expr);
};
function expr_op(left, min_prec) {
function expr_op(left, min_prec, no_in) {
var op = is("operator") ? S.token.value : null;
if (op && op == "in" && no_in) op = null;
var prec = op != null ? PRECEDENCE[op] : null;
if (prec != null && prec > min_prec) {
next();
var right = expr_op(expr_atom(true), prec);
return expr_op(as("binary", op, left, right), min_prec);
var right = expr_op(expr_atom(true), prec, no_in);
return expr_op(as("binary", op, left, right), min_prec, no_in);
}
return left;
};
function expr_ops() {
return expr_op(expr_atom(true), 0);
function expr_ops(no_in) {
return expr_op(expr_atom(true), 0, no_in);
};
function maybe_conditional() {
var expr = expr_ops();
function maybe_conditional(no_in) {
var expr = expr_ops(no_in);
if (is("operator", "?")) {
next();
var yes = expression(false);
expect(":");
return as("conditional", expr, yes, expression(false));
return as("conditional", expr, yes, expression(false, no_in));
}
return expr;
};
function is_assignable(expr) {
if (!exigent_mode) return true;
switch (expr[0]) {
case "dot":
case "sub":
case "new":
case "call":
return true;
case "name":
return expr[1] != "this";
}
};
function maybe_assign() {
var left = maybe_conditional(), val = S.token.value;
function maybe_assign(no_in) {
var left = maybe_conditional(no_in), val = S.token.value;
if (is("operator") && HOP(ASSIGNMENT, val)) {
if (is_assignable(left)) {
next();
return as("assign", ASSIGNMENT[val], left, maybe_assign());
return as("assign", ASSIGNMENT[val], left, maybe_assign(no_in));
}
croak("Invalid assignment");
}
return left;
};
function expression(commas) {
var expression = maybe_embed_tokens(function(commas, no_in) {
if (arguments.length == 0)
commas = true;
var expr = maybe_assign();
var expr = maybe_assign(no_in);
if (commas && is("punc", ",")) {
next();
return as("seq", expr, expression());
return as("seq", expr, expression(true, no_in));
}
return expr;
};
});
function in_loop(cont) {
try {
@ -1159,6 +1174,12 @@ function ast_walker() {
return a;
}) ];
};
function _block(statements) {
var out = [ this[0] ];
if (statements != null)
out.push(MAP(statements, walk));
return out;
};
var walkers = {
"string": function(str) {
return [ this[0], str ];
@ -1172,12 +1193,8 @@ function ast_walker() {
"toplevel": function(statements) {
return [ this[0], MAP(statements, walk) ];
},
"block": function(statements) {
var out = [ this[0] ];
if (statements != null)
out.push(MAP(statements, walk));
return out;
},
"block": _block,
"splice": _block,
"var": _vardefs,
"const": _vardefs,
"try": function(t, c, f) {
@ -1230,8 +1247,8 @@ function ast_walker() {
"for": function(init, cond, step, block) {
return [ this[0], walk(init), walk(cond), walk(step), walk(block) ];
},
"for-in": function(has_var, key, hash, block) {
return [ this[0], has_var, key, walk(hash), walk(block) ];
"for-in": function(vvar, key, hash, block) {
return [ this[0], walk(vvar), walk(key), walk(hash), walk(block) ];
},
"while": function(cond, block) {
return [ this[0], walk(cond), walk(block) ];
@ -1340,6 +1357,7 @@ function empty(b) {
var DOT_CALL_NO_PARENS = array_to_hash([
"name",
"array",
"object",
"string",
"dot",
"sub",
@ -1362,29 +1380,34 @@ function make_string(str) {
}
return s;
});
if (dq > sq) {
return "'" + str.replace(/\x27/g, "\\'") + "'";
} else {
return '"' + str.replace(/\x22/g, '\\"') + '"';
}
if (dq > sq) return "'" + str.replace(/\x27/g, "\\'") + "'";
else return '"' + str.replace(/\x22/g, '\\"') + '"';
};
function gen_code(ast, beautify) {
if (beautify) beautify = defaults(beautify, {
var SPLICE_NEEDS_BRACKETS = array_to_hash([ "if", "while", "do", "for", "for-in", "with" ]);
function gen_code(ast, options) {
options = defaults(options, {
indent_start : 0,
indent_level : 4,
quote_keys : false,
space_colon : false
space_colon : false,
beautify : false
});
var beautify = !!options.beautify;
var indentation = 0,
newline = beautify ? "\n" : "",
space = beautify ? " " : "";
function make_name(name) {
return name.toString();
};
function indent(line) {
if (line == null)
line = "";
if (beautify)
line = new Array(beautify.indent_start + indentation * beautify.indent_level).join(" ") + line;
line = repeat_string(" ", options.indent_start + indentation * options.indent_level) + line;
return line;
};
@ -1438,7 +1461,7 @@ function gen_code(ast, beautify) {
};
function needs_parens(expr) {
if (expr[0] == "function") {
if (expr[0] == "function" || expr[0] == "object") {
// dot/call on a literal function requires the
// function literal itself to be parenthesized
// only if it's the first "thing" in a
@ -1450,9 +1473,8 @@ function gen_code(ast, beautify) {
var a = slice($stack), self = a.pop(), p = a.pop();
while (p) {
if (p[0] == "stat") return true;
if ((p[0] == "seq" && p[1] === self) ||
(p[0] == "call" && p[1] === self) ||
(p[0] == "binary" && p[2] === self)) {
if (((p[0] == "seq" || p[0] == "call" || p[0] == "dot" || p[0] == "sub" || p[0] == "conditional") && p[1] === self) ||
((p[0] == "binary" || p[0] == "assign" || p[0] == "unary-postfix") && p[2] === self)) {
self = p;
p = a.pop();
} else {
@ -1486,6 +1508,19 @@ function gen_code(ast, beautify) {
return make_block_statements(statements)
.join(newline + newline);
},
"splice": function(statements) {
var parent = $stack[$stack.length - 2][0];
if (HOP(SPLICE_NEEDS_BRACKETS, parent)) {
// we need block brackets in this case
return make_block.apply(this, arguments);
} else {
return MAP(make_block_statements(statements, true),
function(line, i) {
// the first line is already indented
return i > 0 ? indent(line) : line;
}).join(newline);
}
},
"block": make_block,
"var": function(defs) {
return "var " + add_commas(MAP(defs, make_1vardef)) + ";";
@ -1547,9 +1582,10 @@ function gen_code(ast, beautify) {
},
"dot": function(expr) {
var out = make(expr), i = 1;
if (expr[0] == "num")
out += ".";
else if (needs_parens(expr))
if (expr[0] == "num") {
if (!/\./.test(expr[1]))
out += ".";
} else if (needs_parens(expr))
out = "(" + out + ")";
while (i < arguments.length)
out += "." + make_name(arguments[i++]);
@ -1582,12 +1618,11 @@ function gen_code(ast, beautify) {
out.push("(" + args + ")", make(block));
return add_spaces(out);
},
"for-in": function(has_var, key, hash, block) {
var out = add_spaces([ "for", "(" ]);
if (has_var)
out += "var ";
out += add_spaces([ make_name(key) + " in " + make(hash) + ")", make(block) ]);
return out;
"for-in": function(vvar, key, hash, block) {
return add_spaces([ "for", "(" +
(vvar ? make(vvar).replace(/;+$/, "") : make(key)),
"in",
make(hash) + ")", make(block) ]);
},
"while": function(condition, block) {
return add_spaces([ "while", "(" + make(condition) + ")", make(block) ]);
@ -1645,7 +1680,7 @@ function gen_code(ast, beautify) {
return indent(make_function(p[0], p[1][2], p[1][3], p[2]));
}
var key = p[0], val = make(p[1]);
if (beautify && beautify.quote_keys) {
if (options.quote_keys) {
key = make_string(key);
} else if ((typeof key == "number" || !beautify && +key + "" == key)
&& parseFloat(key) >= 0) {
@ -1653,7 +1688,7 @@ function gen_code(ast, beautify) {
} else if (!is_identifier(key)) {
key = make_string(key);
}
return indent(add_spaces(beautify && beautify.space_colon
return indent(add_spaces(beautify && options.space_colon
? [ key, ":", val ]
: [ key + ":", val ]));
}).join("," + newline);
@ -1726,11 +1761,7 @@ function gen_code(ast, beautify) {
return add_spaces([ out, make_block(body) ]);
};
function make_name(name) {
return name.toString();
};
function make_block_statements(statements) {
function make_block_statements(statements, noindent) {
for (var a = [], last = statements.length - 1, i = 0; i <= last; ++i) {
var stat = statements[i];
var code = make(stat);
@ -1748,7 +1779,7 @@ function gen_code(ast, beautify) {
a.push(code);
}
}
return MAP(a, indent);
return noindent ? a : MAP(a, indent);
};
function make_switch_block(body) {
@ -1779,7 +1810,7 @@ function gen_code(ast, beautify) {
function make_1vardef(def) {
var name = def[0], val = def[1];
if (val != null)
name = add_spaces([ name, "=", make(val) ]);
name = add_spaces([ make_name(name), "=", parenthesize(val, "seq") ]);
return name;
};
@ -1836,6 +1867,10 @@ function member(name, array) {
return false;
};
function repeat_string(str, i) {
return i < 1 ? "" : new Array(i + 1).join(str);
};
function defaults(args, defs) {
var ret = {};
if (args === true)