diff --git a/package.json b/package.json
index b39779db8..ca861f9c8 100644
--- a/package.json
+++ b/package.json
@@ -20,9 +20,10 @@
     "lint": "eslint . && format-message lint src/**/*.js",
     "prepublish": "in-publish && npm run build || not-in-publish",
     "start": "webpack-dev-server",
-    "tap": "tap ./test/{unit,integration}/*.js",
+    "tap": "tap ./test/{unit,integration,execute}/*.js",
     "tap:unit": "tap ./test/unit/*.js",
-    "tap:integration": "tap ./test/integration/*.js",
+    "tap:execute": "tap ./test/execute/*.js",
+    "tap:integration": "tap ./test/{integration,execute}/*.js",
     "test": "npm run lint && npm run tap",
     "watch": "webpack --progress --colors --watch",
     "version": "json -f package.json -I -e \"this.repository.sha = '$(git log -n1 --pretty=format:%H)'\""
diff --git a/test/execute/index.js b/test/execute/index.js
new file mode 100644
index 000000000..3fcc650f6
--- /dev/null
+++ b/test/execute/index.js
@@ -0,0 +1,136 @@
+const fs = require('fs');
+const path = require('path');
+
+const test = require('tap').test;
+
+const log = require('../../src/util/log');
+const makeTestStorage = require('../fixtures/make-test-storage');
+const readFileToBuffer = require('../fixtures/readProjectFile').readFileToBuffer;
+const VirtualMachine = require('../../src/index');
+
+/**
+ * @fileoverview Transform each sb2 in this directory into a test.
+ *
+ * Test execution of a group of scratch blocks by SAYing if a test did "pass",
+ * or did "fail". Four keywords can be set at the beginning of a SAY messaage
+ * to indicate a test primitive.
+ *
+ * - "pass MESSAGE" will t.pass(MESSAGE).
+ * - "fail MESSAGE" will t.fail(MESSAGE).
+ * - "plan NUMBER_OF_TESTS" will t.plan(Number(NUMBER_OF_TESTS)).
+ * - "end" will t.end().
+ *
+ * A good strategy to follow is to SAY "plan NUMBER_OF_TESTS" first. Then
+ * "pass" and "fail" depending on expected scratch results in conditions, event
+ * scripts, or what is best for testing the target block or group of blocks.
+ * When its done you must SAY "end" so the test and tap know that the end has
+ * been reached.
+ */
+
+const whenThreadsComplete = (t, vm, timeLimit = 2000) => (
+    // When the number of threads reaches 0 the test is expected to be complete.
+    new Promise((resolve, reject) => {
+        const intervalId = setInterval(() => {
+            if (vm.runtime.threads.length === 0) {
+                resolve();
+            }
+        }, 50);
+
+        const timeoutId = setTimeout(() => {
+            reject(new Error('time limit reached'));
+        }, timeLimit);
+
+        // Clear the interval to allow the process to exit
+        // naturally.
+        t.tearDown(() => {
+            clearInterval(intervalId);
+            clearTimeout(timeoutId);
+        });
+    })
+);
+
+fs.readdirSync(__dirname)
+    .filter(uri => uri.endsWith('.sb2'))
+    .forEach(uri => {
+        test(uri, t => {
+            // Disable logging during this test.
+            log.suggest.deny('vm', 'error');
+            t.tearDown(() => log.suggest.clear());
+
+            // Map string messages to tap reporting methods. This will be used
+            // with events from scratch's runtime emitted on block instructions.
+            let didPlan;
+            let didEnd;
+            const reporters = {
+                comment (message) {
+                    t.comment(message);
+                },
+                pass (reason) {
+                    t.pass(reason);
+                },
+                fail (reason) {
+                    t.fail(reason);
+                },
+                plan (count) {
+                    didPlan = true;
+                    t.plan(Number(count));
+                },
+                end () {
+                    didEnd = true;
+                    t.end();
+                }
+            };
+            const reportVmResult = text => {
+                const command = text.split(/\s+/, 1)[0].toLowerCase();
+                if (reporters[command]) {
+                    return reporters[command](text.substring(command.length).trim());
+                }
+
+                // Default to a comment with the full text if we didn't match
+                // any command prefix
+                return reporters.comment(text);
+            };
+
+            const vm = new VirtualMachine();
+            vm.attachStorage(makeTestStorage());
+
+            // Start the VM and initialize some vm properties.
+            // complete.
+            vm.start();
+            vm.clear();
+            vm.setCompatibilityMode(false);
+            vm.setTurboMode(false);
+
+            // Stop the runtime interval once the test is complete so the test
+            // process may naturally exit.
+            t.tearDown(() => {
+                clearInterval(vm.runtime._steppingInterval);
+            });
+
+            // Report the text of SAY events as testing instructions.
+            vm.runtime.on('SAY', (target, type, text) => reportVmResult(text));
+
+            const project = readFileToBuffer(path.resolve(__dirname, uri));
+
+            // Load the project and once all threads are complete ensure that
+            // the scratch project sent us a "end" message.
+            return vm.loadProject(project)
+                .then(() => vm.greenFlag())
+                .then(() => whenThreadsComplete(t, vm))
+                .then(() => {
+                    // Setting a plan is not required but is a good idea.
+                    if (!didPlan) {
+                        t.comment('did not say "plan NUMBER_OF_TESTS"');
+                    }
+
+                    // End must be called so that tap knows the test is done. If
+                    // the test has an SAY "end" block but that block did not
+                    // execute, this explicit failure will raise that issue so
+                    // it can be resolved.
+                    if (!didEnd) {
+                        t.fail('did not say "end"');
+                        t.end();
+                    }
+                });
+        });
+    });