mirror of
https://github.com/isledecomp/isle.git
synced 2024-11-22 15:48:09 -05:00
improved reccomp reliability even further, added html summary generator
Will probably host the summary somewhere for easy access
This commit is contained in:
parent
ff85548c85
commit
66dd2cdeb9
2 changed files with 306 additions and 19 deletions
|
@ -10,11 +10,13 @@
|
|||
def print_usage():
|
||||
print('Usage: %s [options] <original-binary> <recompiled-binary> <recompiled-pdb> <decomp-dir>\n' % sys.argv[0])
|
||||
print('\t-v, --verbose <offset>\t\t\tPrint assembly diff for specific function (original file\'s offset)')
|
||||
print('\t-h, --html <output-file>\t\t\tGenerate searchable HTML summary of status and diffs')
|
||||
sys.exit(1)
|
||||
|
||||
positional_args = []
|
||||
verbose = None
|
||||
skip = False
|
||||
html = None
|
||||
|
||||
for i, arg in enumerate(sys.argv):
|
||||
if skip:
|
||||
|
@ -28,6 +30,9 @@ def print_usage():
|
|||
if flag == 'v' or flag == '-verbose':
|
||||
verbose = int(sys.argv[i + 1], 16)
|
||||
skip = True
|
||||
elif flag == 'h' or flag == '-html':
|
||||
html = sys.argv[i + 1]
|
||||
skip = True
|
||||
else:
|
||||
print('Unknown flag: %s' % arg)
|
||||
print_usage()
|
||||
|
@ -100,13 +105,16 @@ def get_wine_path(fn):
|
|||
def get_unix_path(fn):
|
||||
return subprocess.check_output(['winepath', fn]).decode('utf-8').strip()
|
||||
|
||||
def get_file_in_script_dir(fn):
|
||||
return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn)
|
||||
|
||||
# Declare a class that parses the output of cvdump for fast access later
|
||||
class SymInfo:
|
||||
funcs = {}
|
||||
lines = {}
|
||||
|
||||
def __init__(self, pdb, file):
|
||||
call = [os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), 'cvdump.exe'), '-l', '-s']
|
||||
call = [get_file_in_script_dir('cvdump.exe'), '-l', '-s']
|
||||
|
||||
if os.name != 'nt':
|
||||
# Run cvdump through wine and convert path to Windows-friendly wine path
|
||||
|
@ -192,23 +200,31 @@ def get_recompiled_address(self, filename, line):
|
|||
md = Cs(CS_ARCH_X86, CS_MODE_32)
|
||||
|
||||
def sanitize(file, mnemonic, op_str):
|
||||
offsetplaceholder = '<OFFSET>'
|
||||
|
||||
if mnemonic == 'call' or mnemonic == 'jmp':
|
||||
# Filter out "calls" because the offsets we're not currently trying to
|
||||
# match offsets. As long as there's a call in the right place, it's
|
||||
# probably accurate.
|
||||
op_str = ''
|
||||
op_str = offsetplaceholder
|
||||
else:
|
||||
# Filter out dword ptrs where the pointer is to an offset
|
||||
def filter_out_ptr(ptype, op_str):
|
||||
try:
|
||||
start = op_str.index('dword ptr [') + 11
|
||||
ptrstr = ptype + ' ptr ['
|
||||
start = op_str.index(ptrstr) + len(ptrstr)
|
||||
end = op_str.index(']', start)
|
||||
|
||||
# This will throw ValueError if not hex
|
||||
inttest = int(op_str[start:end], 16)
|
||||
|
||||
op_str = op_str[0:start] + op_str[end:]
|
||||
return op_str[0:start] + offsetplaceholder + op_str[end:]
|
||||
except ValueError:
|
||||
pass
|
||||
return op_str
|
||||
|
||||
# Filter out dword ptrs where the pointer is to an offset
|
||||
op_str = filter_out_ptr('dword', op_str)
|
||||
op_str = filter_out_ptr('word', op_str)
|
||||
op_str = filter_out_ptr('byte', op_str)
|
||||
|
||||
# Use heuristics to filter out any args that look like offsets
|
||||
words = op_str.split(' ')
|
||||
|
@ -216,7 +232,7 @@ def sanitize(file, mnemonic, op_str):
|
|||
try:
|
||||
inttest = int(word, 16)
|
||||
if inttest >= file.imagebase + file.textvirt:
|
||||
words[i] = ''
|
||||
words[i] = offsetplaceholder
|
||||
except ValueError:
|
||||
pass
|
||||
op_str = ' '.join(words)
|
||||
|
@ -230,11 +246,15 @@ def parse_asm(file, addr, size):
|
|||
# Use heuristics to disregard some differences that aren't representative
|
||||
# of the accuracy of a function (e.g. global offsets)
|
||||
mnemonic, op_str = sanitize(file, i.mnemonic, i.op_str)
|
||||
if op_str is None:
|
||||
asm.append(mnemonic)
|
||||
else:
|
||||
asm.append("%s %s" % (mnemonic, op_str))
|
||||
return asm
|
||||
|
||||
function_count = 0
|
||||
total_accuracy = 0
|
||||
htmlinsert = []
|
||||
|
||||
for subdir, dirs, files in os.walk(source):
|
||||
for file in files:
|
||||
|
@ -274,15 +294,42 @@ def parse_asm(file, addr, size):
|
|||
function_count += 1
|
||||
total_accuracy += ratio
|
||||
|
||||
if verbose == addr:
|
||||
if verbose == addr or html:
|
||||
udiff = difflib.unified_diff(origasm, recompasm)
|
||||
|
||||
if verbose == addr:
|
||||
for line in udiff:
|
||||
print(line)
|
||||
print()
|
||||
print()
|
||||
|
||||
if html:
|
||||
htmlinsert.append('{address: "%s", name: "%s", matching: %s, diff: "%s"}' % (hex(addr), recinfo.name, str(ratio), '\\n'.join(udiff).replace('"', '\\"').replace('\n', '\\n')))
|
||||
|
||||
except UnicodeDecodeError:
|
||||
break
|
||||
|
||||
def gen_html(html, data):
|
||||
templatefile = open(get_file_in_script_dir('template.html'), 'r')
|
||||
if not templatefile:
|
||||
print('Failed to find HTML template file, can\'t generate HTML summary')
|
||||
return
|
||||
|
||||
templatedata = templatefile.read()
|
||||
templatefile.close()
|
||||
|
||||
templatedata = templatedata.replace('/* INSERT DATA HERE */', ','.join(data), 1)
|
||||
|
||||
htmlfile = open(html, 'w')
|
||||
if not htmlfile:
|
||||
print('Failed to write to HTML file %s' % html)
|
||||
return
|
||||
|
||||
htmlfile.write(templatedata)
|
||||
htmlfile.close()
|
||||
|
||||
if html:
|
||||
gen_html(html, htmlinsert)
|
||||
|
||||
if function_count > 0:
|
||||
print('\nTotal accuracy %.2f%% across %i functions' % (total_accuracy / function_count * 100, function_count))
|
||||
|
|
240
tools/reccomp/template.html
Normal file
240
tools/reccomp/template.html
Normal file
|
@ -0,0 +1,240 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Decompilation Status</title>
|
||||
<style>
|
||||
body {
|
||||
background: #202020;
|
||||
color: #f0f0f0;
|
||||
font-family: sans-serif;
|
||||
}
|
||||
|
||||
h1 {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.main {
|
||||
width: 800px;
|
||||
max-width: 100%;
|
||||
margin: auto;
|
||||
}
|
||||
|
||||
#search {
|
||||
width: 100%;
|
||||
box-sizing: border-box;
|
||||
background: #303030;
|
||||
color: #f0f0f0;
|
||||
border: 1px #f0f0f0 solid;
|
||||
padding: 0.5em;
|
||||
border-radius: 0.5em;
|
||||
}
|
||||
|
||||
#search::placeholder {
|
||||
color: #b0b0b0;
|
||||
}
|
||||
|
||||
#listing {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
font-family: monospace;
|
||||
}
|
||||
|
||||
.funcrow:hover {
|
||||
background: #404040 !important;
|
||||
}
|
||||
|
||||
.funcrow:nth-child(odd), #listing th {
|
||||
background: #282828;
|
||||
}
|
||||
|
||||
.funcrow:nth-child(even) {
|
||||
background: #383838;
|
||||
}
|
||||
|
||||
#listing td, #listing th {
|
||||
border: 1px #f0f0f0 solid;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
.diffneg {
|
||||
color: #FF8080;
|
||||
}
|
||||
|
||||
.diffpos {
|
||||
color: #80FF80;
|
||||
}
|
||||
|
||||
#sortind {
|
||||
margin: 0 0.5em;
|
||||
}
|
||||
</style>
|
||||
<script>
|
||||
var data = [/* INSERT DATA HERE */];
|
||||
|
||||
function formatAsm(asm) {
|
||||
var lines = asm.split('\n');
|
||||
|
||||
for (var i = 0; i < lines.length; i++) {
|
||||
var l = lines[i];
|
||||
if (l.length > 0) {
|
||||
if (l[0] == '-') {
|
||||
lines[i] = '<span class="diffneg">' + l + '</span>';
|
||||
} else if (l[0] == '+') {
|
||||
lines[i] = '<span class="diffpos">' + l + '</span>';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return lines.join('<br>');
|
||||
}
|
||||
|
||||
function rowClick() {
|
||||
if (this.dataset.expanded === 'true') {
|
||||
this.nextSibling.remove();
|
||||
this.dataset.expanded = false;
|
||||
} else {
|
||||
var row = this.parentNode.insertBefore(document.createElement('tr'), this.nextSibling);
|
||||
row.classList.add('diff');
|
||||
var decCel = row.appendChild(document.createElement('td'));
|
||||
decCel.colSpan = 3;
|
||||
var diff = data[this.dataset.index].diff;
|
||||
if (diff == '') {
|
||||
diff = '<center><i>Identical function - no diff</i></center>';
|
||||
} else {
|
||||
diff = formatAsm(diff);
|
||||
}
|
||||
decCel.innerHTML = diff;
|
||||
this.dataset.expanded = true;
|
||||
}
|
||||
}
|
||||
|
||||
function closeAllDiffs() {
|
||||
const collection = document.getElementsByClassName("diff");
|
||||
for (var ele of collection) {
|
||||
ele.remove();
|
||||
}
|
||||
}
|
||||
|
||||
function filter(text) {
|
||||
closeAllDiffs();
|
||||
|
||||
var ltext = text.toLowerCase();
|
||||
|
||||
const collection = document.getElementsByClassName("funcrow");
|
||||
var searchCount = 0;
|
||||
for (var ele of collection) {
|
||||
var eledata = data[ele.dataset.index];
|
||||
if (text == ''
|
||||
|| eledata.address.toLowerCase().includes(ltext)
|
||||
|| eledata.name.toLowerCase().includes(ltext)) {
|
||||
ele.style.display = '';
|
||||
searchCount++;
|
||||
} else {
|
||||
ele.style.display = 'none';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var lastSortedCol = -1;
|
||||
var ascending = true;
|
||||
|
||||
function sortByColumn(column) {
|
||||
closeAllDiffs();
|
||||
|
||||
if (column == lastSortedCol) {
|
||||
ascending = !ascending;
|
||||
}
|
||||
lastSortedCol = column;
|
||||
|
||||
const collection = document.getElementsByClassName("funcrow");
|
||||
|
||||
var newOrder = [];
|
||||
|
||||
for (var ele of collection) {
|
||||
var inserted = false;
|
||||
|
||||
for (var i = 0; i < newOrder.length; i++) {
|
||||
var cmpEle = newOrder[i];
|
||||
|
||||
var ourCol = ele.childNodes[column];
|
||||
var cmpCol = cmpEle.childNodes[column];
|
||||
|
||||
if ((cmpCol.dataset.value > ourCol.dataset.value) == ascending) {
|
||||
newOrder.splice(i, 0, ele);
|
||||
inserted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!inserted) {
|
||||
newOrder.push(ele);
|
||||
}
|
||||
}
|
||||
|
||||
for (var i = 1; i < newOrder.length; i++) {
|
||||
newOrder[i - 1].after(newOrder[i]);
|
||||
}
|
||||
|
||||
var sortIndicator = document.getElementById('sortind');
|
||||
if (!sortIndicator) {
|
||||
sortIndicator = document.createElement('span');
|
||||
sortIndicator.id = 'sortind';
|
||||
}
|
||||
sortIndicator.innerHTML = ascending ? '▲' : '▼';
|
||||
|
||||
var th = document.getElementById('listingheader').childNodes[column];
|
||||
th.appendChild(sortIndicator);
|
||||
}
|
||||
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
var listing = document.getElementById('listing');
|
||||
|
||||
const headers = listing.getElementsByTagName('th');
|
||||
var headerCount = 0;
|
||||
for (const header of headers) {
|
||||
header.addEventListener('click', function(){
|
||||
sortByColumn(this.dataset.column, true);
|
||||
});
|
||||
|
||||
header.dataset.column = headerCount;
|
||||
headerCount++;
|
||||
}
|
||||
|
||||
data.forEach((element, index) => {
|
||||
var row = listing.appendChild(document.createElement('tr'));
|
||||
var addrCel = row.appendChild(document.createElement('td'));
|
||||
var nameCel = row.appendChild(document.createElement('td'));
|
||||
var matchCel = row.appendChild(document.createElement('td'));
|
||||
|
||||
addrCel.innerHTML = addrCel.dataset.value = element.address;
|
||||
nameCel.innerHTML = nameCel.dataset.value = element.name;
|
||||
matchCel.innerHTML = (element.matching * 100).toFixed(2) + '%';
|
||||
matchCel.dataset.value = element.matching;
|
||||
|
||||
row.classList.add('funcrow');
|
||||
row.addEventListener('click', rowClick);
|
||||
row.dataset.index = index;
|
||||
row.dataset.expanded = false;
|
||||
});
|
||||
|
||||
var search = document.getElementById('search');
|
||||
search.addEventListener('input', function (evt) {
|
||||
filter(search.value);
|
||||
});
|
||||
|
||||
sortByColumn(0);
|
||||
});
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<div class="main">
|
||||
<h1>Decompilation Status</h1>
|
||||
<input id="search" type="search" placeholder="Search for offset or function name...">
|
||||
<br>
|
||||
<br>
|
||||
<table id="listing">
|
||||
<tr id='listingheader'><th style='width: 20%'>Address</th><th style="width:60%">Name</th><th style='width: 20%'>Matching</th></tr>
|
||||
</table>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
Loading…
Reference in a new issue