extract-partitions.py: fix reading 512 bytes too short

/proc/partitions outputs blocks of 1 KiB, but the size can be aligned at 512 bytes (as observed with the "grow" partition). Re-use the partitions.py code for dumping partitions. Previously every partition would be opened separately, now this requests the partition offset and size for each partition, but the disk is opened only once. This takes about 32 seconds for 356 MiB.
2015-12-26 23:18:35 +01:00 · 2015-12-26 23:18:35 +01:00 · a27775129d
commit a27775129d
parent 8f9c53b9eb
1 changed files with 37 additions and 86 deletions
--- a/scripts/extract-partitions.py
+++ b/scripts/extract-partitions.py
@ -5,108 +5,57 @@
 # Copyright (C) 2015 Peter Wu <peter@lekensteyn.nl>
 # Licensed under the MIT license <http://opensource.org/licenses/MIT>.

-from collections import OrderedDict
-from contextlib import closing, contextmanager
+from contextlib import closing
 import argparse, logging, os, struct
-import lglaf
+import lglaf, partitions

-_logger = logging.getLogger(__name__)
+_logger = logging.getLogger("extract-partitions")

-def read_uint32(data, offset):
-    return struct.unpack_from('<I', data, 4)[0]
-
-def read_partitions(comm):
+def read_partition_numbers(comm):
    output = comm.call(lglaf.make_exec_request('cat /proc/partitions'))[1]
-    partitions = OrderedDict()
+    partitions = []
    for line in output.decode('ascii').split('\n'):
-        if not 'mmcblk0p' in line:
+        if not line:
            continue
-        major, minor, blocks, name = line.split()
-        partitions[name] = int(blocks)
+        name = line.split()[-1]
+        if not name.startswith('mmcblk0p'):
+            continue
+        part_num = int(name[len('mmcblk0p'):])
+        partitions.append(part_num)
    return partitions

-@contextmanager
-def laf_open_ro(comm, path):
-    # Avoid opening the whole partition in read/write mode.
-    assert path, "Path must not be empty"
-    path_bin = path.encode('ascii') + b'\0'
-    open_cmd = lglaf.make_request(b'OPEN', body=path_bin)
-    open_header = comm.call(open_cmd)[0]
-    fd_num = read_uint32(open_header, 4)
-    try:
-        yield fd_num
-    finally:
-        close_cmd = lglaf.make_request(b'CLSE', args=[fd_num])
-        comm.call(close_cmd)
-
-def laf_read(comm, fd_num, offset, size):
-    """Read size bytes at the given block offset."""
-    read_cmd = lglaf.make_request(b'READ', args=[fd_num, offset, size])
-    header, response = comm.call(read_cmd)
-    # Ensure that response fd, offset and length are sane (match the request)
-    assert read_cmd[4:4+12] == header[4:4+12], "Unexpected read response"
-    assert len(response) == size
-    return response
-
 parser = argparse.ArgumentParser()
 parser.add_argument("-d", "--outdir", default=".",
        help="Output directory for disk images.")
 # Do not dump partitions larger than this size
 # (userdata 11728 MiB, system 2064 MiB, cache 608 MiB, cust 256 MiB)
-parser.add_argument("--max-size", type=int, default=65536,
+parser.add_argument("--max-size", metavar="kbytes", type=int, default=65536,
        help="Maximum partition size to dump (in KiB)")
 parser.add_argument("--debug", action='store_true', help="Enable debug messages")

-# On Linux, one bulk read returns at most 16 KiB. 32 bytes are part of the first
-# header, so remove one block size (512 bytes) to stay within that margin.
-# This ensures that whenever the USB communication gets out of sync, it will
-# always start with a message header, making recovery easier.
-MAX_BLOCK_SIZE = (16 * 1024 - 512) // 512
-
-def dump_file(comm, remote_path, local_path, size):
-    try:
-        offset = os.path.getsize(local_path)
-    except OSError:
-        offset = 0
-    if offset >= size:
-        if offset > size:
-            _logger.warn("%s: unexpected size %dK > %dK",
-                    local_path, offset, size)
-        else:
-            _logger.info("%s: already retrieved %dK",
-                    local_path, size)
-        return
-
-    # Read offsets must be a multiple of 512 bytes, enforce this
-    BLOCK_SIZE = 512
-    unaligned_bytes = offset % BLOCK_SIZE
-    offset = BLOCK_SIZE * (offset // BLOCK_SIZE)
-
-    with laf_open_ro(comm, remote_path) as fd_num:
-        _logger.debug("Opened fd %d for %s (final size %.2fK, offset %.2fK)",
-                fd_num, remote_path, size / 1024, offset / 1024)
-        with open(local_path, 'ab') as f:
-            # Offset should be aligned to block size. If not, read at most a
-            # whole block and drop the leading bytes.
-            if unaligned_bytes:
-                chunksize = min(size - offset, BLOCK_SIZE)
-                data = laf_read(comm, fd_num, offset // BLOCK_SIZE, chunksize)
-                f.write(data[unaligned_bytes:])
-                offset += BLOCK_SIZE
-            while offset < size:
-                chunksize = min(size - offset, BLOCK_SIZE * MAX_BLOCK_SIZE)
-                data = laf_read(comm, fd_num, offset // BLOCK_SIZE, chunksize)
-                f.write(data)
-                offset += chunksize
-
-def dump_partitions(comm, outdir, max_size):
-    parts = read_partitions(comm)
-    for name, size in parts.items():
-        if size > max_size:
-            _logger.info("Ignoring large partition %s of size %dK" % (name, size))
+def dump_partitions(comm, disk_fd, outdir, max_size):
+    part_nums = read_partition_numbers(comm)
+    for part_num in part_nums:
+        part_offset, part_size = partitions.partition_info(comm, part_num)
+        if part_size > max_size:
+            _logger.info("Ignoring large partition %s of size %dK" % (part_num,
+                part_size / 1024))
            continue
-        out_path = os.path.join(outdir, "%s.bin" % name)
-        dump_file(comm, "/dev/block/%s" % name, out_path, 1024 * size)
+        out_path = os.path.join(outdir, "mmcblk0p%d.bin" % part_num)
+        try:
+            current_size = os.path.getsize(out_path)
+            if current_size > part_size:
+                _logger.warn("%s: unexpected size %dK, larger than %dK",
+                        out_path, current_size / 1024, part_size / 1024)
+                continue
+            elif current_size == part_size:
+                _logger.info("%s: already retrieved %dK",
+                        out_path, part_size / 1024)
+                continue
+        except OSError: pass
+        _logger.info("Dumping partition %d to %s (%d bytes)",
+                part_num, out_path, part_size)
+        partitions.dump_partition(comm, disk_fd, out_path, part_offset, part_size)

 def main():
    args = parser.parse_args()
@ -119,7 +68,9 @@ def main():
    comm = lglaf.autodetect_device()
    with closing(comm):
        lglaf.try_hello(comm)
-        dump_partitions(comm, args.outdir, args.max_size)
+        with partitions.laf_open_disk(comm) as disk_fd:
+            _logger.debug("Opened fd %d for disk", disk_fd)
+            dump_partitions(comm, disk_fd, args.outdir, args.max_size * 1024)

 if __name__ == '__main__':
    main()