mu

2 years ago · 59dbb8262b
parent 600f60ab8e
commit 59dbb8262b
9 changed files with 1032 additions and 1012 deletions
--- a/lib/minitest/thesis.rb
+++ b/lib/minitest/thesis.rb
@ -2,10 +2,16 @@ require "digest"

 require "minitest"

+require_relative "thesis/directory_db"
+require_relative "thesis/error"
+require_relative "thesis/possibility"
+require_relative "thesis/status"
+require_relative "thesis/test_case"
+require_relative "thesis/testing_state"
+require_relative "thesis/version"
+
 module Minitest
  module Thesis
-    VERSION = "0.1.0"
-
    class Test < Minitest::Test

      # Runs a test. Usage is:
@ -72,161 +78,6 @@ module Minitest
        end
      end

-      # Represents a single generated test case, which consists of an underlying
-      # set of choices that produce possibilities.
-      class TestCase
-
-        # Returns a test case that makes this series of choices.
-        def self.for_choices(choices, print_results: false)
-          self.new(prefix: choices, random: nil, max_size: choices.length, print_results:)
-        end
-
-        attr_accessor :status
-        attr_reader :choices, :targeting_score
-
-        def initialize(prefix:, random:, max_size: Float::INFINITY, print_results: false)
-          @prefix, @random, @max_size, @print_results = prefix, random, max_size, print_results
-          @choices = []
-          @status = nil
-          @depth = 0
-          @targeting_score = nil
-        end
-
-        # Returns a number in the range [0, n]
-        def choice(n)
-          result = make_choice(n) { @random.rand(n) }
-
-          puts "choice(#{n}): #{result}" if should_print?
-
-          result
-        end
-
-        # Return True with probability `p`.
-        def weighted(p)
-          result = if    p <= 0 then forced_choice(0)
-                   elsif p >= 1 then forced_choice(1)
-                   else              make_choice(1) { (@random.rand <= p) ? 1 : 0 }
-                   end
-
-          puts "weighted(#{p}): #{result}" if should_print?
-
-          result
-        end
-
-        # Inserts a fake choice into the choice sequence, as if some call to
-        # choice() had returned `n`. You almost never need this, but sometimes it
-        # can be a useful hint to the shrinker.
-        def forced_choice(n)
-          raise RangeError.new("Invalid choice #{n}") if n.bit_length > 64 || n.negative?
-          raise Frozen unless @status.nil?
-
-          mark_status(Status::OVERRUN) if @choices.length >= @max_size
-
-          choices << n
-          n
-        end
-
-        # Mark this test case as invalid.
-        def reject = mark_status(Status::INVALID)
-
-        # If this precondition is not met, abort the test and mark this test case as invalid.
-        def assume(precondition)
-          return if precondition
-          reject
-        end
-
-        # Set a score to maximize. Multiple calls to this function will override previous ones.
-        #
-        # The name and idea come from Löscher, Andreas, and Konstantinos Sagonas.
-        # "Targeted property-based testing." ISSTA. 2017, but the implementation
-        # is based on that found in Hypothesis, which is not that similar to
-        # anything described in the paper.
-        def target(score) = @targeting_score = score
-
-        # Return a possible value from `possibility`.
-        def any(possibility)
-          begin
-            @depth += 1
-            result = possibility.produce.(self)
-          ensure
-            @depth -= 1
-          end
-
-          puts "any(#{possibility}): #{result}" if should_print?
-
-          result
-        end
-
-        # Set the status and raise StopTest.
-        def mark_status(status)
-          raise Frozen unless self.status.nil?
-
-          @status = status
-          raise StopTest
-        end
-
-        private
-
-        def should_print? = @print_results && @depth.zero?
-
-        # Make a choice in [0, n], by calling rnd_method if randomness is needed.
-        def make_choice(n, &rnd_method)
-          raise RangeError.new("Invalid choice #{n}") if n.bit_length > 64 || n.negative?
-          raise Frozen unless @status.nil?
-
-          mark_status(Status::OVERRUN) if @choices.length >= @max_size
-
-          result = if @choices.length < @prefix.length
-                     @prefix[@choices.length]
-                   else
-                     rnd_method.()
-                   end
-          @choices << result
-
-          mark_status(Status::INVALID) if result > n
-
-          result
-        end
-      end
-
-      # Represents some range of values that might be used in a test, that can be
-      # requested from a `TestCase`. Pass one of these to TestCase.any to get a
-      # concrete value.
-      class Possibility
-        attr_reader :produce, :name
-
-        def initialize(name = "TODO", &produce)
-          @name = name
-          @produce = produce
-        end
-
-        def inspect = name
-        def to_s = name
-
-        # "Returns a `Possibility` where values come from applying `f` to some possible value for `self`."
-        def map(&f)
-          self.class.new("#{name}.map(TODO)") {|tc| f.call(tc.any(self)) }
-        end
-
-        # Returns a `Possibility` where values come from applying `f` (which
-        # should return a new `Possibility` to some possible value for `self`
-        # then returning a possible value from that.
-        def bind(&f)
-          self.class.new("#{name}.bind(TODO)") {|tc| tc.any(f.(tc.any(self))) }
-        end
-
-        # Returns a `Possibility` whose values are any possible value of `self`
-        # for which `f` returns True.
-        def satisfying(&f)
-          self.class.new("#{name}.select(TODO)") {|test_case|
-            3.times.first {
-              candidate = test_case.any(self)
-              candidate if f.(candidate)
-            } || test_case.reject
-          }
-        end
-      end
-
      # Any integer in the range [m, n] is possible
      def integers(m, n) = Possibility.new("integers(#{m}, #{n})") {|tc| m + tc.choice(n - m) }

@ -271,480 +122,6 @@ module Minitest
          possibilities.map {|p| tc.any(p) }
        }
      end
-
-      # We cap the maximum amount of entropy a test case can use.
-      # This prevents cases where the generated test case size explodes
-      # by effectively rejection
-      BUFFER_SIZE = 8 * 1024
-
-      # Returns a cached version of a function that maps a choice sequence to the
-      # status of calling a test function on a test case populated with it. Is
-      # able to take advantage of the structure of the test function to predict
-      # the result even if exact sequence of choices has not been seen
-      # previously.
-      #
-      # You can safely omit implementing this at the cost of somewhat increased
-      # shrinking time.
-      class CachedTestFunction
-        def initialize(&test_function)
-          @test_function = test_function
-
-          # Tree nodes are either a point at which a choice occurs
-          # in which case they map the result of the choice to the
-          # tree node we are in after, or a Status object indicating
-          # mark_status was called at this point and all future
-          # choices are irrelevant.
-          #
-          # Note that a better implementation of this would use
-          # a Patricia trie, which implements long non-branching
-          # paths as an array inline. For simplicity we don't
-          # do that here.
-          @tree = {}
-        end
-
-        def call(choices)
-          node = @tree
-          begin
-            choices.each do |c|
-              node = node.fetch(c)
-              # mark_status was called, thus future choices
-              # will be ignored.
-              if node.is_a?(Status)
-                fail if node == Status::OVERRUN
-                return node
-              end
-            end
-            # If we never entered an unknown region of the tree
-            # or hit a Status value, then we know that another
-            # choice will be made next and the result will overrun.
-            return Status::OVERRUN
-          rescue KeyError
-          end
-
-          # We now have to actually call the test function to find out what
-          # happens.
-          test_case = TestCase.for_choices(choices)
-          @test_function.(test_case)
-          fail if test_case.status.nil?
-
-          # We enter the choices made in a tree.
-          node = @tree
-          *rest, last = test_case.choices
-          rest.each do |c|
-            node = if node.has_key?(c)
-                     node[c]
-                   else
-                     node[c] = {}
-                   end
-          end
-          unless last.nil?
-            node[last] = test_case.status == Status::OVERRUN ? {} : test_case.status
-          end
-
-          test_case.status
-        end
-      end
-
-      class TestingState
-        attr_reader :result, :valid_test_cases, :calls
-
-        def initialize(random:, test_function:, max_examples:)
-          @random, @_test_function, @max_examples = random, test_function, max_examples
-          @valid_test_cases = 0
-          @calls = 0
-          @test_is_trivial = false
-        end
-
-        def test_function(test_case)
-          begin
-            @_test_function.(test_case)
-          rescue StopTest
-          end
-
-          if test_case.status.nil?
-            test_case.status = Status::VALID
-          end
-
-          @calls += 1
-
-          if test_case.status >= Status::INVALID && test_case.choices.length.zero?
-            @test_is_trivial = true
-          end
-
-          if test_case.status >= Status::VALID
-            @valid_test_cases += 1
-
-            unless test_case.targeting_score.nil?
-              relevant_info = [test_case.targeting_score, test_case.choices]
-              if @best_scoring.nil?
-                @best_scoring = relevant_info
-              else
-                best, _ = @best_scoring
-                if test_case.targeting_score > best
-                  @best_scoring = relevant_info
-                end
-              end
-            end
-          end
-
-          if test_case.status == Status::INTERESTING && (
-              @result.nil? || ((sort_key(test_case.choices) <=> sort_key(@result)) == -1)
-          )
-            @result = test_case.choices
-          end
-        end
-
-        # If any test cases have had `target()` called on them, do a simple
-        # hill climbing algorithm to attempt to optimise that target score.
-        def target
-          return if !@result.nil? || @best_scoring.nil?
-
-          # Can we improve the score by changing choices[i] by `step`?
-          adjust = ->(i, step) do
-            fail if @best_scoring.nil?
-
-            score, choices = @best_scoring
-            return false if choices[i] + step < 0 || choices[i].bit_length >= 64
-
-            attempt = choices.dup
-            attempt[i] += step
-            test_case = TestCase.new(
-              prefix: attempt, random: @random, max_size: BUFFER_SIZE
-            )
-            test_function(test_case)
-
-            fail if test_case.status.nil?
-
-            test_case.status >= Status::VALID &&
-              !test_case.targeting_score.nil? &&
-              test_case.targeting_score > score
-          end
-
-          while keep_generating?
-            i = @random.rand(@best_scoring[1].length)
-            sign = 0
-            [1, -1].each do |k|
-              return unless keep_generating?
-
-              if adjust.(i, k)
-                sign = k
-                break
-              end
-            end
-
-            next if sign.zero?
-
-            k = 1
-            k *= 2 while keep_generating? && adjust.(i, sign * k)
-
-            while k.positive?
-              while keep_generating? && adjust.(i, sign * k)
-              end
-              k /= 2
-            end
-          end
-        end
-
-        def run
-          generate
-          target
-          shrink
-        end
-
-        def keep_generating?
-          !@test_is_trivial &&
-            result.nil? &&
-            @valid_test_cases < @max_examples &&
-            # We impose a limit on the maximum number of calls as
-            # well as the maximum number of valid examples. This is
-            # to avoid taking a prohibitively long time on tests which
-            # have hard or impossible to satisfy preconditions.
-            @calls < @max_examples * 10
-        end
-
-        # Run random generation until either we have found an interesting test
-        # case or hit the limit of how many test cases we should evaluate.
-        def generate
-          while keep_generating? && (@best_scoring.nil? || @valid_test_cases < @max_examples / 2)
-            test_function(TestCase.new(prefix: [], random: @random, max_size: BUFFER_SIZE))
-          end
-        end
-
-        # If we have found an interesting example, try shrinking it so that the
-        # choice sequence leading to our best example is shortlex smaller than
-        # the one we originally found. This improves the quality of the generated
-        # test case, as per our paper.
-        #
-        # https://drmaciver.github.io/papers/reduction-via-generation-preview.pdf
-        def shrink
-          # if not self.result:
-          #     return
-          return if @result.nil? || @result.empty?
-
-          # Shrinking will typically try the same choice sequences over and over
-          # again, so we cache the test function in order to not end up
-          # reevaluating it in those cases. This also allows us to catch cases
-          # where we try something that is e.g. a prefix of something we've
-          # previously tried, which is guaranteed not to work.
-          cached = CachedTestFunction.new {|tc| test_function(tc) }
-
-          consider = ->(choices) do
-            return true if choices == @result
-
-            cached.(choices) == Status::INTERESTING
-          end
-
-          fail unless consider.(@result)
-
-          # We are going to perform a number of transformations to the current
-          # result, iterating until none of them make any progress - i.e. until
-          # we make it through an entire iteration of the loop without changing
-          # the result.
-          prev = nil
-          while prev != @result
-            prev = @result
-
-            # A note on weird loop order: We iterate backwards through the choice
-            # sequence rather than forwards, because later bits tend to depend on
-            # earlier bits so it's easier to make changes near the end and
-            # deleting bits at the end may allow us to make changes earlier on
-            # that we we'd have missed.
-            #
-            # Note that we do not restart the loop at the end when we find a
-            # successful shrink. This is because things we've already tried are
-            # less likely to work.
-            #
-            # If this guess is wrong, that's OK, this isn't a correctness
-            # problem, because if we made a successful reduction then we are not
-            # at a fixed point and will restart the loop at the end the next time
-            # round. In some cases this can result in performance issues, but the
-            # end result should still be fine.
-
-            # First try deleting each choice we made in chunks. We try longer
-            # chunks because this allows us to delete whole composite elements:
-            # e.g. deleting an element from a generated list requires us to
-            # delete both the choice of whether to include it and also the
-            # element itself, which may involve more than one choice. Some things
-            # will take more than 8 choices in the sequence. That's too bad, we
-            # may not be able to delete those. In Hypothesis proper we record the
-            # boundaries corresponding to `any` calls so that we can try deleting
-            # those, but that's pretty high overhead and also a bunch of slightly
-            # annoying code that it's not worth porting.
-            #
-            # We could instead do a quadratic amount of work to try all
-            # boundaries, but in general we don't want to do that because even a
-            # shrunk test case can involve a relatively large number of choices.
-            k = 8
-            while k.positive?
-              i = @result.length - k - 1
-              until i.negative?
-                if i >= @result.length
-                  # Can happen if we successfully lowered the value at i - 1
-                  i -= 1
-                  next
-                end
-                attempt = @result[0...i] + (@result[i + k..] || [])
-
-                fail unless attempt.length < @result.length
-
-                unless consider.(attempt)
-                  # This fixes a common problem that occurs
-                  # when you have dependencies on some
-                  # length parameter. e.g. draw a number
-                  # between 0 and 10 and then draw that
-                  # many elements. This can't delete
-                  # everything that occurs that way, but
-                  # it can delete some things and often
-                  # will get us unstuck when nothing else
-                  # does.
-                  if i.positive? && attempt[i - 1].positive?
-                    attempt[i - 1] -= 1
-                    i += 1 if consider.(attempt)
-                  end
-
-                  i -= 1
-                end
-              end
-
-              k /= 2
-            end
-
-            # Attempts to replace some indices in the current result with new
-            # values. Useful for some purely lexicographic reductions that we are
-            # about to perform.
-            replace = ->(values) do
-              fail if @result.nil?
-              attempt = @result.dup
-              values.each do |i, v|
-                # The size of self.result can change during shrinking. If that
-                # happens, stop attempting to make use of these replacements
-                # because some other shrink pass is better to run now.
-                return false if i >= attempt.length
-                attempt[i] = v
-              end
-              consider.(attempt)
-            end
-
-            # Now we try replacing blocks of choices with zeroes. Note that
-            # unlike the above we skip k = 1 because we handle that in the next
-            # step. Often (but not always) a block of all zeroes is the shortlex
-            # smallest value that a region can be.
-            k = 8
-
-            while k > 1
-              i = @result.length - k
-              until i.negative?
-                if replace.((i...i+k).to_h {|i| [i, 0]})
-                  # If we've succeeded then all of [i, i + k] is zero so we
-                  # adjust i so that the next region does not overlap with this
-                  # at all.
-                  i -= k
-                else
-                  # Otherwise we might still be able to zero some of these values
-                  # but not the last one, so we just go back one.
-                  i -= 1
-                end
-              end
-              k /= 2
-            end
-
-            # Now try replacing each choice with a smaller value by doing a
-            # binary search. This will replace n with 0 or n - 1 if possible, but
-            # will also more efficiently replace it with a smaller number than
-            # doing multiple subtractions would.
-            i = @result.length - 1
-            until i.negative?
-              # Attempt to replace
-              bin_search_down(0, @result[i]) {|v| replace.({i => v}) }
-              i -= 1
-            end
-
-            # NB from here on this is just showing off cool shrinker tricks and
-            # you probably don't need to worry about it and can skip these bits
-            # unless they're easy and you want bragging rights for how much
-            # better you are at shrinking than the local QuickCheck equivalent.
-
-            # Try sorting out of order ranges of choices, as `sort(x) <= x`, so
-            # this is always a lexicographic reduction.
-            k = 8
-            # while k > 1:
-            while k > 1
-              (@result.length - k - 1).downto(0).each do |i|
-                consider.(@result[0...i] + @result[i...i+k].sort + @result[i+k..])
-              end
-                k /= 2
-            end
-
-            # Try adjusting nearby pairs of integers by redistributing value
-            # between them. This is useful for tests that depend on the sum of
-            # some generated values.
-            [2, 1].each do |k|
-              (@result.length - k - 1).downto(0).each do |i|
-                j = i + k
-                # This check is necessary because the previous changes might have
-                # shrunk the size of result, but also it's tedious to write tests
-                # for this so I didn't.
-                if j < @result.length
-                  # Try swapping out of order pairs
-                  if @result[i] > @result[j]
-                    replace.({j => @result[i], i => @result[j]})
-                  end
-                  # j could be out of range if the previous swap succeeded.
-                  if j < @result.length && @result[i].positive?
-                    prev_i = @result[i]
-                    prev_j = @result[j]
-                    bin_search_down(0, prev_i) {|v|
-                      replace.({i => v, j => prev_j + (prev_i - v)})
-                    }
-                  end
-                end
-              end
-            end
-          end
-        end
-
-        private
-
-        # Returns a key that can be used for the shrinking order of test cases.
-        def sort_key(choices) = [choices.length, choices]
-
-        # Returns n in [lo, hi] such that f(n) is True, where it is assumed and
-        # will not be checked that f(hi) is True.
-        #
-        # Will return `lo` if `f(lo)` is True, otherwise the only guarantee that is
-        # made is that `f(n - 1)` is False and `f(n)` is True. In particular this
-        # does *not* guarantee to find the smallest value, only a locally minimal
-        # one.
-        def bin_search_down(low, high, &f)
-          return low if f.(low)
-          while low + 1 < high
-            mid = low + (high - low) / 2
-            if f.(mid)
-              high = mid
-            else
-              low = mid
-            end
-          end
-          high
-        end
-      end
-
-      class DirectoryDb
-        def initialize(dir)
-          @dir = dir
-          Dir.mkdir(@dir)
-        rescue SystemCallError => e
-          raise unless e.errno == Errno::EEXIST::Errno
-        end
-
-        def [](key)
-          f = file(key)
-          return nil unless File.exist?(f)
-
-          File.read(f)
-        end
-
-        def []=(key, value)
-          File.write(file(key), value)
-        end
-
-        private
-
-        def file(key)
-          File.join(@dir, Digest::SHA1.hexdigest(key)[0...10])
-        end
-      end
-
-      class Error< StandardError; end
-
-      # Attempted to make choices on a test case that has been completed.
-      class Frozen < Error; end
-
-      # Raised when a test should stop executing early.
-      class StopTest < Error; end
-
-      # Raised when a test has no valid examples.
-      class Unsatisfiable < Error; end
-
-      class Status < Struct.new(:value)
-        # Test case didn't have enough data to complete
-        OVERRUN = self.new(0)
-
-        # Test case contained something that prevented completion
-        INVALID = self.new(1)
-
-        # Test case completed just fine but was boring
-        VALID = self.new(2)
-
-        # Test case completed and was interesting
-        INTERESTING = self.new(3)
-
-        include Comparable
-
-        def <=>(other)
-          value <=> other.value
-        end
-      end
    end
  end
 end
--- a/lib/minitest/thesis/directory_db.rb
+++ b/lib/minitest/thesis/directory_db.rb
@ -0,0 +1,27 @@
+module Minitest::Thesis
+  class DirectoryDb
+    def initialize(dir)
+      @dir = dir
+      Dir.mkdir(@dir)
+    rescue SystemCallError => e
+      raise unless e.errno == Errno::EEXIST::Errno
+    end
+
+    def [](key)
+      f = file(key)
+      return nil unless File.exist?(f)
+
+      File.read(f)
+    end
+
+    def []=(key, value)
+      File.write(file(key), value)
+    end
+
+    private
+
+    def file(key)
+      File.join(@dir, Digest::SHA1.hexdigest(key)[0...10])
+    end
+  end
+end
--- a/lib/minitest/thesis/error.rb
+++ b/lib/minitest/thesis/error.rb
@ -0,0 +1,12 @@
+module Minitest::Thesis
+  Error = Class.new(StandardError)
+
+  # Attempted to make choices on a test case that has been completed.
+  Frozen = Class.new(Error)
+
+  # Raised when a test should stop executing early.
+  StopTest = Class.new(Error)
+
+  # Raised when a test has no valid examples.
+  Unsatisfiable = Class.new(Error)
+end
--- a/lib/minitest/thesis/possibility.rb
+++ b/lib/minitest/thesis/possibility.rb
@ -0,0 +1,39 @@
+module Minitest::Thesis
+  # Represents some range of values that might be used in a test, that can be
+  # requested from a `TestCase`. Pass one of these to TestCase.any to get a
+  # concrete value.
+  class Possibility
+    attr_reader :produce, :name
+
+    def initialize(name = "TODO", &produce)
+      @name = name
+      @produce = produce
+    end
+
+    def inspect = name
+    def to_s = name
+
+    # "Returns a `Possibility` where values come from applying `f` to some possible value for `self`."
+    def map(&f)
+      self.class.new("#{name}.map(TODO)") {|tc| f.call(tc.any(self)) }
+    end
+
+    # Returns a `Possibility` where values come from applying `f` (which
+    # should return a new `Possibility` to some possible value for `self`
+    # then returning a possible value from that.
+    def bind(&f)
+      self.class.new("#{name}.bind(TODO)") {|tc| tc.any(f.(tc.any(self))) }
+    end
+
+    # Returns a `Possibility` whose values are any possible value of `self`
+    # for which `f` returns True.
+    def satisfying(&f)
+      self.class.new("#{name}.select(TODO)") {|test_case|
+        3.times.first {
+          candidate = test_case.any(self)
+          candidate if f.(candidate)
+        } || test_case.reject
+      }
+    end
+  end
+end
--- a/lib/minitest/thesis/status.rb
+++ b/lib/minitest/thesis/status.rb
@ -0,0 +1,21 @@
+module Minitest::Thesis
+  class Status < Struct.new(:value)
+    # Test case didn't have enough data to complete
+    OVERRUN = self.new(0)
+
+    # Test case contained something that prevented completion
+    INVALID = self.new(1)
+
+    # Test case completed just fine but was boring
+    VALID = self.new(2)
+
+    # Test case completed and was interesting
+    INTERESTING = self.new(3)
+
+    include Comparable
+
+    def <=>(other)
+      value <=> other.value
+    end
+  end
+end
--- a/lib/minitest/thesis/test_case.rb
+++ b/lib/minitest/thesis/test_case.rb
@ -0,0 +1,121 @@
+require_relative "error"
+require_relative "status"
+
+module Minitest::Thesis
+  # Represents a single generated test case, which consists of an underlying
+  # set of choices that produce possibilities.
+  class TestCase
+
+    # Returns a test case that makes this series of choices.
+    def self.for_choices(choices, print_results: false)
+      self.new(prefix: choices, random: nil, max_size: choices.length, print_results:)
+    end
+
+    attr_accessor :status
+    attr_reader :choices, :targeting_score
+
+    def initialize(prefix:, random:, max_size: Float::INFINITY, print_results: false)
+      @prefix, @random, @max_size, @print_results = prefix, random, max_size, print_results
+      @choices = []
+      @status = nil
+      @depth = 0
+      @targeting_score = nil
+    end
+
+    # Returns a number in the range [0, n]
+    def choice(n)
+      result = make_choice(n) { @random.rand(n) }
+
+      puts "choice(#{n}): #{result}" if should_print?
+
+      result
+    end
+
+    # Return True with probability `p`.
+    def weighted(p)
+      result = if    p <= 0 then forced_choice(0)
+               elsif p >= 1 then forced_choice(1)
+               else              make_choice(1) { (@random.rand <= p) ? 1 : 0 }
+               end
+
+      puts "weighted(#{p}): #{result}" if should_print?
+
+      result
+    end
+
+    # Inserts a fake choice into the choice sequence, as if some call to
+    # choice() had returned `n`. You almost never need this, but sometimes it
+    # can be a useful hint to the shrinker.
+    def forced_choice(n)
+      raise RangeError.new("Invalid choice #{n}") if n.bit_length > 64 || n.negative?
+      raise Frozen unless @status.nil?
+
+      mark_status(Status::OVERRUN) if @choices.length >= @max_size
+
+      choices << n
+      n
+    end
+
+    # Mark this test case as invalid.
+    def reject = mark_status(Status::INVALID)
+
+    # If this precondition is not met, abort the test and mark this test case as invalid.
+    def assume(precondition)
+      return if precondition
+      reject
+    end
+
+    # Set a score to maximize. Multiple calls to this function will override previous ones.
+    #
+    # The name and idea come from Löscher, Andreas, and Konstantinos Sagonas.
+    # "Targeted property-based testing." ISSTA. 2017, but the implementation
+    # is based on that found in Hypothesis, which is not that similar to
+    # anything described in the paper.
+    def target(score) = @targeting_score = score
+
+    # Return a possible value from `possibility`.
+    def any(possibility)
+      begin
+        @depth += 1
+        result = possibility.produce.(self)
+      ensure
+        @depth -= 1
+      end
+
+      puts "any(#{possibility}): #{result}" if should_print?
+
+      result
+    end
+
+    # Set the status and raise StopTest.
+    def mark_status(status)
+      raise Frozen unless self.status.nil?
+
+      @status = status
+      raise StopTest
+    end
+
+    private
+
+    def should_print? = @print_results && @depth.zero?
+
+    # Make a choice in [0, n], by calling rnd_method if randomness is needed.
+    def make_choice(n, &rnd_method)
+      raise RangeError.new("Invalid choice #{n}") if n.bit_length > 64 || n.negative?
+      raise Frozen unless @status.nil?
+
+      mark_status(Status::OVERRUN) if @choices.length >= @max_size
+
+      result = if @choices.length < @prefix.length
+                 @prefix[@choices.length]
+               else
+                 rnd_method.()
+               end
+      @choices << result
+
+      mark_status(Status::INVALID) if result > n
+
+      result
+    end
+  end
+end
--- a/lib/minitest/thesis/testing_state.rb
+++ b/lib/minitest/thesis/testing_state.rb
@ -0,0 +1,418 @@
+module Minitest::Thesis
+  # We cap the maximum amount of entropy a test case can use.
+  # This prevents cases where the generated test case size explodes
+  # by effectively rejection
+  BUFFER_SIZE = 8 * 1024
+
+  class TestingState
+    attr_reader :result, :valid_test_cases, :calls
+
+    def initialize(random:, test_function:, max_examples:)
+      @random, @_test_function, @max_examples = random, test_function, max_examples
+      @valid_test_cases = 0
+      @calls = 0
+      @test_is_trivial = false
+    end
+
+    def test_function(test_case)
+      begin
+        @_test_function.(test_case)
+      rescue StopTest
+      end
+
+      if test_case.status.nil?
+        test_case.status = Status::VALID
+      end
+
+      @calls += 1
+
+      if test_case.status >= Status::INVALID && test_case.choices.length.zero?
+        @test_is_trivial = true
+      end
+
+      if test_case.status >= Status::VALID
+        @valid_test_cases += 1
+
+        unless test_case.targeting_score.nil?
+          relevant_info = [test_case.targeting_score, test_case.choices]
+          if @best_scoring.nil?
+            @best_scoring = relevant_info
+          else
+            best, _ = @best_scoring
+            if test_case.targeting_score > best
+              @best_scoring = relevant_info
+            end
+          end
+        end
+      end
+
+      if test_case.status == Status::INTERESTING && (
+          @result.nil? || ((sort_key(test_case.choices) <=> sort_key(@result)) == -1)
+      )
+        @result = test_case.choices
+      end
+    end
+
+    # If any test cases have had `target()` called on them, do a simple
+    # hill climbing algorithm to attempt to optimise that target score.
+    def target
+      return if !@result.nil? || @best_scoring.nil?
+
+      # Can we improve the score by changing choices[i] by `step`?
+      adjust = ->(i, step) do
+        fail if @best_scoring.nil?
+
+        score, choices = @best_scoring
+        return false if choices[i] + step < 0 || choices[i].bit_length >= 64
+
+        attempt = choices.dup
+        attempt[i] += step
+        test_case = TestCase.new(
+          prefix: attempt, random: @random, max_size: BUFFER_SIZE
+        )
+        test_function(test_case)
+
+        fail if test_case.status.nil?
+
+        test_case.status >= Status::VALID &&
+          !test_case.targeting_score.nil? &&
+          test_case.targeting_score > score
+      end
+
+      while keep_generating?
+        i = @random.rand(@best_scoring[1].length)
+        sign = 0
+        [1, -1].each do |k|
+          return unless keep_generating?
+
+          if adjust.(i, k)
+            sign = k
+            break
+          end
+        end
+
+        next if sign.zero?
+
+        k = 1
+        k *= 2 while keep_generating? && adjust.(i, sign * k)
+
+        while k.positive?
+          while keep_generating? && adjust.(i, sign * k)
+          end
+          k /= 2
+        end
+      end
+    end
+
+    def run
+      generate
+      target
+      shrink
+    end
+
+    def keep_generating?
+      !@test_is_trivial &&
+        result.nil? &&
+        @valid_test_cases < @max_examples &&
+        # We impose a limit on the maximum number of calls as
+        # well as the maximum number of valid examples. This is
+        # to avoid taking a prohibitively long time on tests which
+        # have hard or impossible to satisfy preconditions.
+        @calls < @max_examples * 10
+    end
+
+    # Run random generation until either we have found an interesting test
+    # case or hit the limit of how many test cases we should evaluate.
+    def generate
+      while keep_generating? && (@best_scoring.nil? || @valid_test_cases < @max_examples / 2)
+        test_function(TestCase.new(prefix: [], random: @random, max_size: BUFFER_SIZE))
+      end
+    end
+
+    # If we have found an interesting example, try shrinking it so that the
+    # choice sequence leading to our best example is shortlex smaller than
+    # the one we originally found. This improves the quality of the generated
+    # test case, as per our paper.
+    #
+    # https://drmaciver.github.io/papers/reduction-via-generation-preview.pdf
+    def shrink
+      # if not self.result:
+      #     return
+      return if @result.nil? || @result.empty?
+
+      # Shrinking will typically try the same choice sequences over and over
+      # again, so we cache the test function in order to not end up
+      # reevaluating it in those cases. This also allows us to catch cases
+      # where we try something that is e.g. a prefix of something we've
+      # previously tried, which is guaranteed not to work.
+      cached = CachedTestFunction.new {|tc| test_function(tc) }
+
+      consider = ->(choices) do
+        return true if choices == @result
+
+        cached.(choices) == Status::INTERESTING
+      end
+
+      fail unless consider.(@result)
+
+      # We are going to perform a number of transformations to the current
+      # result, iterating until none of them make any progress - i.e. until
+      # we make it through an entire iteration of the loop without changing
+      # the result.
+      prev = nil
+      while prev != @result
+        prev = @result
+
+        # A note on weird loop order: We iterate backwards through the choice
+        # sequence rather than forwards, because later bits tend to depend on
+        # earlier bits so it's easier to make changes near the end and
+        # deleting bits at the end may allow us to make changes earlier on
+        # that we we'd have missed.
+        #
+        # Note that we do not restart the loop at the end when we find a
+        # successful shrink. This is because things we've already tried are
+        # less likely to work.
+        #
+        # If this guess is wrong, that's OK, this isn't a correctness
+        # problem, because if we made a successful reduction then we are not
+        # at a fixed point and will restart the loop at the end the next time
+        # round. In some cases this can result in performance issues, but the
+        # end result should still be fine.
+
+        # First try deleting each choice we made in chunks. We try longer
+        # chunks because this allows us to delete whole composite elements:
+        # e.g. deleting an element from a generated list requires us to
+        # delete both the choice of whether to include it and also the
+        # element itself, which may involve more than one choice. Some things
+        # will take more than 8 choices in the sequence. That's too bad, we
+        # may not be able to delete those. In Hypothesis proper we record the
+        # boundaries corresponding to `any` calls so that we can try deleting
+        # those, but that's pretty high overhead and also a bunch of slightly
+        # annoying code that it's not worth porting.
+        #
+        # We could instead do a quadratic amount of work to try all
+        # boundaries, but in general we don't want to do that because even a
+        # shrunk test case can involve a relatively large number of choices.
+        k = 8
+        while k.positive?
+          i = @result.length - k - 1
+          until i.negative?
+            if i >= @result.length
+              # Can happen if we successfully lowered the value at i - 1
+              i -= 1
+              next
+            end
+            attempt = @result[0...i] + (@result[i + k..] || [])
+
+            fail unless attempt.length < @result.length
+
+            unless consider.(attempt)
+              # This fixes a common problem that occurs
+              # when you have dependencies on some
+              # length parameter. e.g. draw a number
+              # between 0 and 10 and then draw that
+              # many elements. This can't delete
+              # everything that occurs that way, but
+              # it can delete some things and often
+              # will get us unstuck when nothing else
+              # does.
+              if i.positive? && attempt[i - 1].positive?
+                attempt[i - 1] -= 1
+                i += 1 if consider.(attempt)
+              end
+
+              i -= 1
+            end
+          end
+
+          k /= 2
+        end
+
+        # Attempts to replace some indices in the current result with new
+        # values. Useful for some purely lexicographic reductions that we are
+        # about to perform.
+        replace = ->(values) do
+          fail if @result.nil?
+          attempt = @result.dup
+          values.each do |i, v|
+            # The size of self.result can change during shrinking. If that
+            # happens, stop attempting to make use of these replacements
+            # because some other shrink pass is better to run now.
+            return false if i >= attempt.length
+            attempt[i] = v
+          end
+          consider.(attempt)
+        end
+
+        # Now we try replacing blocks of choices with zeroes. Note that
+        # unlike the above we skip k = 1 because we handle that in the next
+        # step. Often (but not always) a block of all zeroes is the shortlex
+        # smallest value that a region can be.
+        k = 8
+
+        while k > 1
+          i = @result.length - k
+          until i.negative?
+            if replace.((i...i+k).to_h {|i| [i, 0]})
+              # If we've succeeded then all of [i, i + k] is zero so we
+              # adjust i so that the next region does not overlap with this
+              # at all.
+              i -= k
+            else
+              # Otherwise we might still be able to zero some of these values
+              # but not the last one, so we just go back one.
+              i -= 1
+            end
+          end
+          k /= 2
+        end
+
+        # Now try replacing each choice with a smaller value by doing a
+        # binary search. This will replace n with 0 or n - 1 if possible, but
+        # will also more efficiently replace it with a smaller number than
+        # doing multiple subtractions would.
+        i = @result.length - 1
+        until i.negative?
+          # Attempt to replace
+          bin_search_down(0, @result[i]) {|v| replace.({i => v}) }
+          i -= 1
+        end
+
+        # NB from here on this is just showing off cool shrinker tricks and
+        # you probably don't need to worry about it and can skip these bits
+        # unless they're easy and you want bragging rights for how much
+        # better you are at shrinking than the local QuickCheck equivalent.
+
+        # Try sorting out of order ranges of choices, as `sort(x) <= x`, so
+        # this is always a lexicographic reduction.
+        k = 8
+        # while k > 1:
+        while k > 1
+          (@result.length - k - 1).downto(0).each do |i|
+            consider.(@result[0...i] + @result[i...i+k].sort + @result[i+k..])
+          end
+            k /= 2
+        end
+
+        # Try adjusting nearby pairs of integers by redistributing value
+        # between them. This is useful for tests that depend on the sum of
+        # some generated values.
+        [2, 1].each do |k|
+          (@result.length - k - 1).downto(0).each do |i|
+            j = i + k
+            # This check is necessary because the previous changes might have
+            # shrunk the size of result, but also it's tedious to write tests
+            # for this so I didn't.
+            if j < @result.length
+              # Try swapping out of order pairs
+              if @result[i] > @result[j]
+                replace.({j => @result[i], i => @result[j]})
+              end
+              # j could be out of range if the previous swap succeeded.
+              if j < @result.length && @result[i].positive?
+                prev_i = @result[i]
+                prev_j = @result[j]
+                bin_search_down(0, prev_i) {|v|
+                  replace.({i => v, j => prev_j + (prev_i - v)})
+                }
+              end
+            end
+          end
+        end
+      end
+    end
+
+    private
+
+    # Returns a key that can be used for the shrinking order of test cases.
+    def sort_key(choices) = [choices.length, choices]
+
+    # Returns n in [lo, hi] such that f(n) is True, where it is assumed and
+    # will not be checked that f(hi) is True.
+    #
+    # Will return `lo` if `f(lo)` is True, otherwise the only guarantee that is
+    # made is that `f(n - 1)` is False and `f(n)` is True. In particular this
+    # does *not* guarantee to find the smallest value, only a locally minimal
+    # one.
+    def bin_search_down(low, high, &f)
+      return low if f.(low)
+      while low + 1 < high
+        mid = low + (high - low) / 2
+        if f.(mid)
+          high = mid
+        else
+          low = mid
+        end
+      end
+      high
+    end
+  end
+
+  # Returns a cached version of a function that maps a choice sequence to the
+  # status of calling a test function on a test case populated with it. Is
+  # able to take advantage of the structure of the test function to predict
+  # the result even if exact sequence of choices has not been seen
+  # previously.
+  #
+  # You can safely omit implementing this at the cost of somewhat increased
+  # shrinking time.
+  class CachedTestFunction
+    def initialize(&test_function)
+      @test_function = test_function
+
+      # Tree nodes are either a point at which a choice occurs
+      # in which case they map the result of the choice to the
+      # tree node we are in after, or a Status object indicating
+      # mark_status was called at this point and all future
+      # choices are irrelevant.
+      #
+      # Note that a better implementation of this would use
+      # a Patricia trie, which implements long non-branching
+      # paths as an array inline. For simplicity we don't
+      # do that here.
+      @tree = {}
+    end
+
+    def call(choices)
+      node = @tree
+      begin
+        choices.each do |c|
+          node = node.fetch(c)
+          # mark_status was called, thus future choices
+          # will be ignored.
+          if node.is_a?(Status)
+            fail if node == Status::OVERRUN
+            return node
+          end
+        end
+        # If we never entered an unknown region of the tree
+        # or hit a Status value, then we know that another
+        # choice will be made next and the result will overrun.
+        return Status::OVERRUN
+      rescue KeyError
+      end
+
+      # We now have to actually call the test function to find out what
+      # happens.
+      test_case = TestCase.for_choices(choices)
+      @test_function.(test_case)
+      fail if test_case.status.nil?
+
+      # We enter the choices made in a tree.
+      node = @tree
+      *rest, last = test_case.choices
+      rest.each do |c|
+        node = if node.has_key?(c)
+                 node[c]
+               else
+                 node[c] = {}
+               end
+      end
+      unless last.nil?
+        node[last] = test_case.status == Status::OVERRUN ? {} : test_case.status
+      end
+
+      test_case.status
+    end
+  end
+end
--- a/lib/minitest/thesis/version.rb
+++ b/lib/minitest/thesis/version.rb
@ -0,0 +1,3 @@
+module Minitest::Thesis
+  VERSION = "0.1.0"
+end
--- a/test/minitest/thesis_test.rb
+++ b/test/minitest/thesis_test.rb
@ -1,442 +1,469 @@
 require "test_helper"

-class Minitest::ThesisTest < Minitest::Thesis::Test
-  class Failure < StandardError; end
-
-  def test_finds_small_list
-    (0...10).each do |seed|
-      out, _ = capture_io do
-        assert_raises(Minitest::Assertion) do
-          run_test("finds_small_list", database: {}, random: Random.new(seed)) do |test_case|
-            ls = test_case.any(lists(integers(0, 10_000)))
-            assert ls.sum <= 1_000
+module Minitest::Thesis
+  class ThesisTest < Minitest::Thesis::Test
+    class Failure < StandardError; end
+
+    def test_finds_small_list
+      (0...10).each do |seed|
+        out, _ = capture_io do
+          assert_raises(Minitest::Assertion) do
+            run_test("finds_small_list", database: {}, random: Random.new(seed)) do |test_case|
+              ls = test_case.any(lists(integers(0, 10_000)))
+              assert ls.sum <= 1_000
+            end
          end
        end
+
+        assert_equal <<~OUT, out
+          any(lists(integers(0, 10000))): [1001]
+        OUT
      end
+    end

-      assert_equal <<~OUT, out
-        any(lists(integers(0, 10000))): [1001]
-      OUT
+    # Minithesis can't really handle shrinking arbitrary monadic bind, but length
+    # parameters are a common case of monadic bind that it has a little bit of
+    # special casing for. This test ensures that that special casing works.
+    #
+    # The problem is that if you generate a list by drawing a length and then
+    # drawing that many elements, you can end up with something like ``[1001, 0,
+    # 0]`` then deleting those zeroes in the middle is a pain. minithesis will
+    # solve this by first sorting those elements, so that we have ``[0, 0,
+    # 1001]``, and then lowering the length by two, turning it into ``[1001]`` as
+    # desired.
+    def test_finds_small_list_even_with_bad_lists
+      bad_list = Possibility.new("bad_list") {|tc|
+        n = tc.choice(10)
+        Array.new(n) { tc.choice(10_000) }
+      }
+
+      (0...10).each do |seed|
+        out, _ = capture_io do
+          assert_raises(Minitest::Assertion) do
+            run_test("finds_small_list_even_with_bad_lists", database: {}, random: Random.new(seed)) do |test_case|
+              ls = test_case.any(bad_list)
+              assert ls.sum <= 1_000
+            end
+          end
+        end
+
+        assert_equal <<~OUT, out
+          any(bad_list): [1001]
+        OUT
+      end
    end
-  end

-  # Minithesis can't really handle shrinking arbitrary monadic bind, but length
-  # parameters are a common case of monadic bind that it has a little bit of
-  # special casing for. This test ensures that that special casing works.
-  #
-  # The problem is that if you generate a list by drawing a length and then
-  # drawing that many elements, you can end up with something like ``[1001, 0,
-  # 0]`` then deleting those zeroes in the middle is a pain. minithesis will
-  # solve this by first sorting those elements, so that we have ``[0, 0,
-  # 1001]``, and then lowering the length by two, turning it into ``[1001]`` as
-  # desired.
-  def test_finds_small_list_even_with_bad_lists
-    bad_list = Possibility.new("bad_list") {|tc|
-      n = tc.choice(10)
-      Array.new(n) { tc.choice(10_000) }
-    }
-
-    (0...10).each do |seed|
+    def test_reduces_additive_pairs
      out, _ = capture_io do
        assert_raises(Minitest::Assertion) do
-          run_test("finds_small_list_even_with_bad_lists", database: {}, random: Random.new(seed)) do |test_case|
-            ls = test_case.any(bad_list)
-            assert ls.sum <= 1_000
+          run_test("reduces_additive_pairs", database: {}, max_examples: 10_000) do |test_case|
+            m = test_case.choice(1000)
+            n = test_case.choice(1000)
+            assert m + n <= 1000
          end
        end
      end

      assert_equal <<~OUT, out
-        any(bad_list): [1001]
+        choice(1000): 1
+        choice(1000): 1000
      OUT
    end
-  end
-
-  def test_reduces_additive_pairs
-    out, _ = capture_io do
-      assert_raises(Minitest::Assertion) do
-        run_test("reduces_additive_pairs", database: {}, max_examples: 10_000) do |test_case|
-          m = test_case.choice(1000)
-          n = test_case.choice(1000)
-          assert m + n <= 1000
-        end
-      end
-    end
-
-    assert_equal <<~OUT, out
-      choice(1000): 1
-      choice(1000): 1000
-    OUT
-  end

-  def test_reuses_results_from_the_database
-    Dir.mktmpdir do |tmpdir|
-      db = DirectoryDb.new(tmpdir)
-      count = 0
+    def test_reuses_results_from_the_database
+      Dir.mktmpdir do |tmpdir|
+        db = DirectoryDb.new(tmpdir)
+        count = 0

-      run = -> {
-        assert_raises(Minitest::Assertion) do
-          run_test("reuses_results_from_the_database", database: db, quiet: true) do |test_case|
-            count += 1
-            assert test_case.choice(10_000) < 10
+        run = -> {
+          assert_raises(Minitest::Assertion) do
+            run_test("reuses_results_from_the_database", database: db, quiet: true) do |test_case|
+              count += 1
+              assert test_case.choice(10_000) < 10
+            end
          end
-        end
-      }
-
-      run.()
+        }

-      assert_equal 1, Dir.children(tmpdir).length
-      prev_count = count
+        run.()

-      run.()
+        assert_equal 1, Dir.children(tmpdir).length
+        prev_count = count

-      assert_equal 1, Dir.children(tmpdir).length
-      assert_equal prev_count + 2, count
-    end
-  end
+        run.()

-  def test_test_cases_satisfy_preconditions
-    run_test("test_cases_satisfy_preconditions", database: {}) do |test_case|
-      n = test_case.choice(10)
-      test_case.assume(n != 0)
-      refute_equal 0, n
+        assert_equal 1, Dir.children(tmpdir).length
+        assert_equal prev_count + 2, count
+      end
    end
-  end

-  def test_error_on_too_strict_precondition
-    assert_raises(Unsatisfiable) do
-      run_test("error_on_too_strict_precondition", database: {}) do |test_case|
+    def test_test_cases_satisfy_preconditions
+      run_test("test_cases_satisfy_preconditions", database: {}) do |test_case|
        n = test_case.choice(10)
-        test_case.reject
+        test_case.assume(n != 0)
+        refute_equal 0, n
      end
    end
-  end
-
-  def test_error_on_unbounded_test_function
-    orig_buffer_size = BUFFER_SIZE
-    suppress_warnings do
-      Minitest::Thesis.const_set(:BUFFER_SIZE, 10)
-    end

-    assert_raises(Unsatisfiable) do
-      run_test("error_on_unbounded_test_function", database: {}, max_examples: 5) do |test_case|
-        loop do
+    def test_error_on_too_strict_precondition
+      assert_raises(Unsatisfiable) do
+        run_test("error_on_too_strict_precondition", database: {}) do |test_case|
          test_case.choice(10)
+          test_case.reject
        end
      end
    end
-  ensure
-    suppress_warnings do
-      Minitest::Thesis.const_set(:BUFFER_SIZE, orig_buffer_size)
-    end
-  end

-  def test_function_cache
-    tf = ->(tc) do
-      tc.mark_status(Status::INTERESTING) if tc.choice(1_000) >= 200
-      tc.reject if tc.choice(1).zero?
-    end
-
-    state = TestingState.new(random: Random.new(0), test_function: tf, max_examples: 100)
-    cache = CachedTestFunction.new {|tc| state.test_function(tc) }
+    def test_error_on_unbounded_test_function
+      orig_buffer_size = BUFFER_SIZE
+      suppress_warnings do
+        Minitest::Thesis.const_set(:BUFFER_SIZE, 10)
+      end

-    assert_equal Status::VALID, cache.([1, 1])
-    assert_equal Status::OVERRUN, cache.([1])
-    assert_equal Status::INTERESTING, cache.([1_000])
-    assert_equal Status::INTERESTING, cache.([1_000])
-    assert_equal Status::INTERESTING, cache.([1_000, 1])
+      assert_raises(Unsatisfiable) do
+        run_test("error_on_unbounded_test_function", database: {}, max_examples: 5) do |test_case|
+          loop do
+            test_case.choice(10)
+          end
+        end
+      end
+    ensure
+      suppress_warnings do
+        Minitest::Thesis.const_set(:BUFFER_SIZE, orig_buffer_size)
+      end
+    end

-    assert_equal 2, state.calls
-  end
+    def test_function_cache
+      tf = ->(tc) do
+        tc.mark_status(Status::INTERESTING) if tc.choice(1_000) >= 200
+        tc.reject if tc.choice(1).zero?
+      end

-  # Targeting has a number of places it checks for whether we've exceeded the
-  # generation limits. This makes sure we've checked them all.
-  def test_max_examples_is_not_exceeded
-    (1...100).each do |max_examples|
-      calls = 0
+      state = TestingState.new(random: Random.new(0), test_function: tf, max_examples: 100)
+      cache = CachedTestFunction.new {|tc| state.test_function(tc) }

-      run_test(
-        "max_examples_is_not_exceeded",
-        database: {},
-        random: Random.new(0),
-        max_examples:,
-      ) do |tc|
-        m = 10000
-        n = tc.choice(m)
-        calls += 1
-        tc.target(n * (m - n))
-      end
+      assert_equal Status::VALID, cache.([1, 1])
+      assert_equal Status::OVERRUN, cache.([1])
+      assert_equal Status::INTERESTING, cache.([1_000])
+      assert_equal Status::INTERESTING, cache.([1_000])
+      assert_equal Status::INTERESTING, cache.([1_000, 1])

-      assert_equal max_examples, calls
+      assert_equal 2, state.calls
    end
-  end

+    # Targeting has a number of places it checks for whether we've exceeded the
+    # generation limits. This makes sure we've checked them all.
+    def test_max_examples_is_not_exceeded
+      (1...100).each do |max_examples|
+        calls = 0

-  # Targeting has a number of places it checks for whether we've exceeded the
-  # generation limits. This makes sure we've checked them all.
-  def test_finds_a_local_maximum
-    (0...100).each do |seed|
-      assert_raises(Minitest::Assertion) do
        run_test(
-          "finds_a_local_maximum",
+          "max_examples_is_not_exceeded",
          database: {},
-          random: Random.new(seed),
-          max_examples: 200,
-          quiet: true
+          random: Random.new(0),
+          max_examples:,
        ) do |tc|
-          m = tc.choice(1000)
-          n = tc.choice(1000)
-          score = -((m - 500) ** 2 + (n - 500) ** 2)
-          tc.target(score)
-          assert m != 500 || n != 500
+          m = 10000
+          n = tc.choice(m)
+          calls += 1
+          tc.target(n * (m - n))
        end
+
+        assert_equal max_examples, calls
      end
    end
-  end

-  def test_can_target_a_score_upwards_to_interesting
-    out, _ = capture_io do
-      assert_raises(Minitest::Assertion) do
-        run_test("can_target_a_score_upwards_to_interesting", database: {}, max_examples: 1000) do |test_case|
-          n = test_case.choice(1000)
-          m = test_case.choice(1000)
-          score = n + m
-          test_case.target(score)
-          assert score < 2000
+
+    # Targeting has a number of places it checks for whether we've exceeded the
+    # generation limits. This makes sure we've checked them all.
+    def test_finds_a_local_maximum
+      (0...100).each do |seed|
+        assert_raises(Minitest::Assertion) do
+          run_test(
+            "finds_a_local_maximum",
+            database: {},
+            random: Random.new(seed),
+            max_examples: 200,
+            quiet: true
+          ) do |tc|
+            m = tc.choice(1000)
+            n = tc.choice(1000)
+            score = -((m - 500) ** 2 + (n - 500) ** 2)
+            tc.target(score)
+            assert m != 500 || n != 500
+          end
        end
      end
    end

-    assert_equal <<~OUT, out
-      choice(1000): 1000
-      choice(1000): 1000
-    OUT
-  end
-
-  def test_can_target_a_score_upwards_without_failing
-    max_score = 0
+    def test_can_target_a_score_upwards_to_interesting
+      out, _ = capture_io do
+        assert_raises(Minitest::Assertion) do
+          run_test("can_target_a_score_upwards_to_interesting", database: {}, max_examples: 1000) do |test_case|
+            n = test_case.choice(1000)
+            m = test_case.choice(1000)
+            score = n + m
+            test_case.target(score)
+            assert score < 2000
+          end
+        end
+      end

-    run_test("can_target_a_score_upwards_without_failing", database: {}, max_examples: 1000) do |test_case|
-      n = test_case.choice(1000)
-      m = test_case.choice(1000)
-      score = n + m
-      test_case.target(score)
-      max_score = [score, max_score].max
+      assert_equal <<~OUT, out
+        choice(1000): 1000
+        choice(1000): 1000
+      OUT
    end

-    assert_equal 2000, max_score
-  end
+    def test_can_target_a_score_upwards_without_failing
+      max_score = 0

-  def test_targeting_when_most_do_not_benefit
-    big = 10_000
-
-    out, _ = capture_io do
-      assert_raises(Minitest::Assertion) do
-        run_test("targeting_when_most_do_not_benefit", database: {}, max_examples: 1000) do |test_case|
-          test_case.choice(1000)
-          test_case.choice(1000)
-          score = test_case.choice(big)
-          test_case.target(score)
-          assert score < big
-        end
+      run_test("can_target_a_score_upwards_without_failing", database: {}, max_examples: 1000) do |test_case|
+        n = test_case.choice(1000)
+        m = test_case.choice(1000)
+        score = n + m
+        test_case.target(score)
+        max_score = [score, max_score].max
      end
+
+      assert_equal 2000, max_score
    end

-    assert_equal <<~OUT, out
-      choice(1000): 0
-      choice(1000): 0
-      choice(#{big}): #{big}
-    OUT
-  end
+    def test_targeting_when_most_do_not_benefit
+      big = 10_000

-  def test_can_target_a_score_downwards
-    out, _ = capture_io do
-      assert_raises(Minitest::Assertion) do
-        run_test("can_target_a_score_downwards", database: {}, max_examples: 1000) do |test_case|
-          n = test_case.choice(1000)
-          m = test_case.choice(1000)
-          score = n + m
-          test_case.target(-score)
-          assert score.positive?
+      out, _ = capture_io do
+        assert_raises(Minitest::Assertion) do
+          run_test("targeting_when_most_do_not_benefit", database: {}, max_examples: 1000) do |test_case|
+            test_case.choice(1000)
+            test_case.choice(1000)
+            score = test_case.choice(big)
+            test_case.target(score)
+            assert score < big
+          end
        end
      end
+
+      assert_equal <<~OUT, out
+        choice(1000): 0
+        choice(1000): 0
+        choice(#{big}): #{big}
+      OUT
    end

-    assert_equal <<~OUT, out
-      choice(1000): 0
-      choice(1000): 0
-    OUT
-  end
+    def test_can_target_a_score_downwards
+      out, _ = capture_io do
+        assert_raises(Minitest::Assertion) do
+          run_test("can_target_a_score_downwards", database: {}, max_examples: 1000) do |test_case|
+            n = test_case.choice(1000)
+            m = test_case.choice(1000)
+            score = n + m
+            test_case.target(-score)
+            assert score.positive?
+          end
+        end
+      end

-  def test_prints_a_top_level_weighted
-    out, _ = capture_io do
-      assert_raises(Minitest::Assertion) do
-        run_test("prints_a_top_level_weighted", database: {}, max_examples: 1000) do |test_case|
-          assert test_case.weighted(0.5).nonzero?
+      assert_equal <<~OUT, out
+        choice(1000): 0
+        choice(1000): 0
+      OUT
+    end
+
+    def test_prints_a_top_level_weighted
+      out, _ = capture_io do
+        assert_raises(Minitest::Assertion) do
+          run_test("prints_a_top_level_weighted", database: {}, max_examples: 1000) do |test_case|
+            assert test_case.weighted(0.5).nonzero?
+          end
        end
      end
+
+      assert_equal <<~OUT, out
+        weighted(0.5): 0
+      OUT
    end

-    assert_equal <<~OUT, out
-      weighted(0.5): 0
-    OUT
-  end
+    def test_errors_when_using_frozen
+      tc = TestCase.for_choices([0])
+      tc.status = Status::VALID

-  def test_errors_when_using_frozen
-    tc = TestCase.for_choices([0])
-    tc.status = Status::VALID
+      assert_raises(Frozen) do
+        tc.mark_status(Status::INTERESTING)
+      end

-    assert_raises(Frozen) do
-      tc.mark_status(Status::INTERESTING)
-    end
+      assert_raises(Frozen) do
+        tc.choice(10)
+      end

-    assert_raises(Frozen) do
-      tc.choice(10)
+      assert_raises(Frozen) do
+        tc.forced_choice(10)
+      end
    end

-    assert_raises(Frozen) do
-      tc.forced_choice(10)
+    def test_errors_on_too_large_choice
+      tc = TestCase.for_choices([0])
+      assert_raises(RangeError) do
+        tc.choice(2 ** 64)
+      end
    end
-  end

-  def test_errors_on_too_large_choice
-    tc = TestCase.for_choices([0])
-    assert_raises(RangeError) do
-      tc.choice(2 ** 64)
+    def test_can_choose_full_64_bits
+      run_test("can_choose_full_64_bits", database: {}) do |tc|
+        tc.choice(2 ** 64 - 1)
+      end
    end
-  end

-  def test_can_choose_full_64_bits
-    run_test("can_choose_full_64_bits", database: {}) do |tc|
-      tc.choice(2 ** 64 - 1)
+    def test_mapped_possibility
+      run_test("mapped_possibility", database: {}) do |tc|
+        n = tc.any(integers(0, 5).map {|n| n * 2 })
+        assert n.even?
+      end
    end
-  end

-  def test_mapped_possibility
-    run_test("mapped_possibility", database: {}) do |tc|
-      n = tc.any(integers(0, 5).map {|n| n * 2 })
-      assert n.even?
+    def test_selected_possibility
+      run_test("selected_possibility", database: {}) do |tc|
+        n = tc.any(
+          integers(0, 5)
+            .satisfying(&:even?)
+        )
+        assert n.even?
+      end
    end
-  end

-  def test_selected_possibility
-    run_test("selected_possibility", database: {}) do |tc|
-      n = tc.any(
-        integers(0, 5)
-          .satisfying(&:even?)
-      )
-      assert n.even?
+    def test_bound_possibility
+      run_test("bound_possibility", database: {}) do |tc|
+        m, n = tc.any(
+          integers(0, 5).bind {|m| tuples(just(m), integers(m, m + 10)) }
+        )
+        assert (m..m+10).cover?(n)
+      end
    end
-  end

-  def test_bound_possibility
-    run_test("bound_possibility", database: {}) do |tc|
-      m, n = tc.any(
-        integers(0, 5).bind {|m| tuples(just(m), integers(m, m + 10)) }
-      )
-      assert (m..m+10).cover?(n)
+    def test_cannot_witness_nothing
+      assert_raises(Unsatisfiable) do
+        run_test("cannot_witness_nothing", database: {}) do |tc|
+          tc.any(nothing)
+        end
+      end
    end
-  end

-  def test_cannot_witness_nothing
-    assert_raises(Unsatisfiable) do
-      run_test("cannot_witness_nothing", database: {}) do |tc|
-        tc.any(nothing)
+    def test_cannot_witness_empty_mix_of
+      assert_raises(Unsatisfiable) do
+        run_test("cannot_witness_empty_mix_of", database: {}) do |tc|
+          tc.any(mix_of)
+        end
      end
    end
-  end

-  def test_cannot_witness_empty_mix_of
-    assert_raises(Unsatisfiable) do
-      run_test("cannot_witness_empty_mix_of", database: {}) do |tc|
-        tc.any(mix_of)
+    def test_can_draw_mixture
+      run_test("can_draw_mixture", database: {}) do |tc|
+        m = tc.any(mix_of(integers(-5, 0), integers(2, 5)))
+        assert (-5..5).cover?(m)
+        refute_equal 1, m
      end
    end
-  end

-  def test_can_draw_mixture
-    run_test("can_draw_mixture", database: {}) do |tc|
-      m = tc.any(mix_of(integers(-5, 0), integers(2, 5)))
-      assert (-5..5).cover?(m)
-      refute_equal 1, m
+    # This test is very hard to trigger without targeting, and targeting will
+    # tend to overshoot the score, so we will see multiple interesting test cases
+    # before shrinking.
+    def test_target_and_reduce
+      out, _ = capture_io do
+        assert_raises(Minitest::Assertion) do
+          run_test("target_and_reduce", database: {}) do |tc|
+            m = tc.choice(100_000)
+            tc.target(m)
+            assert m <= 99_900
+          end
+        end
+      end
+
+      assert_equal <<~OUT, out
+        choice(100000): 99901
+      OUT
    end
-  end

-  # This test is very hard to trigger without targeting, and targeting will
-  # tend to overshoot the score, so we will see multiple interesting test cases
-  # before shrinking.
-  def test_target_and_reduce
-    out, _ = capture_io do
-      assert_raises(Minitest::Assertion) do
-        run_test("target_and_reduce", database: {}) do |tc|
-          m = tc.choice(100_000)
-          tc.target(m)
-          assert m <= 99_900
+    def test_impossible_weighted
+      assert_raises(Failure) do
+        run_test("impossible_weighted", database: {}, quiet: true) do |tc|
+          tc.choice(1)
+          10.times do
+            assert false unless tc.weighted(0.0).zero?
+          end
+          raise Failure if tc.choice(1).zero?
        end
      end
    end

-    assert_equal <<~OUT, out
-      choice(100000): 99901
-    OUT
-  end
-
-  def test_impossible_weighted
-    assert_raises(Failure) do
-      run_test("impossible_weighted", database: {}, quiet: true) do |tc|
-        tc.choice(1)
-        10.times do
-          assert false unless tc.weighted(0.0).zero?
+    def test_guaranteed_weighted
+      assert_raises(Failure) do
+        run_test("guaranteed_weighted", database: {}, quiet: true) do |tc|
+          if tc.weighted(1.0).nonzero?
+            tc.choice(1)
+            raise Failure
+          else
+            assert false
+          end
        end
-        raise Failure if tc.choice(1).zero?
      end
    end
-  end

-  def test_guaranteed_weighted
-    assert_raises(Failure) do
-      run_test("guaranteed_weighted", database: {}, quiet: true) do |tc|
-        if tc.weighted(1.0).nonzero?
-          tc.choice(1)
-          raise Failure
-        else
-          assert false
-        end
+    def test_size_bounds_on_list
+      run_test("size_bounds_on_list", database: {}) do |tc|
+        ls = tc.any(lists(integers(0, 10), min_size: 1, max_size: 3))
+        assert (1..3).cover?(ls.length)
      end
    end
-  end

-  def test_size_bounds_on_list
-    run_test("size_bounds_on_list", database: {}) do |tc|
-      ls = tc.any(lists(integers(0, 10), min_size: 1, max_size: 3))
-      assert (1..3).cover?(ls.length)
+    def test_forced_choice_bounds
+      assert_raises(RangeError) do
+        run_test("forced_choice_bounds", database: {}) do |tc|
+          tc.forced_choice(2 ** 64)
+        end
+      end
    end
-  end

-  def test_forced_choice_bounds
-    assert_raises(RangeError) do
-      run_test("forced_choice_bounds", database: {}) do |tc|
-        tc.forced_choice(2 ** 64)
+    def test_failure_from_hypothesis_1
+      assert_raises(Failure) do
+        run_test("failure_from_hypothesis_1", database: {}, random: Random.new(100), max_examples: 1000, quiet: true) do |tc|
+          n1 = tc.weighted(0.0)
+          if n1.zero?
+            n2 = tc.choice(511)
+            if n2 == 112
+              n3 = tc.choice(511)
+              if n3 == 124
+                raise Failure
+              elsif n3 == 93
+                raise Failure
+              else
+                tc.mark_status(Status::INVALID)
+              end
+            elsif n2 == 93
+              raise Failure
+            else
+              tc.mark_status(Status::INVALID)
+            end
+          end
+        end
      end
    end
-  end

-  def test_failure_from_hypothesis_1
-    assert_raises(Failure) do
-      run_test("failure_from_hypothesis_1", database: {}, random: Random.new(100), max_examples: 1000, quiet: true) do |tc|
-        n1 = tc.weighted(0.0)
-        if n1.zero?
-          n2 = tc.choice(511)
-          if n2 == 112
-            n3 = tc.choice(511)
-            if n3 == 124
+    def test_failure_from_hypothesis_2
+      assert_raises(Failure) do
+        run_test("failure_from_hypothesis_2", database: {}, random: Random.new(0), max_examples: 1000, quiet: true) do |tc|
+          n1 = tc.choice(6)
+          if n1 == 6
+            n2 = tc.weighted(0.0)
+            if n2.zero?
              raise Failure
-            elsif n3 == 93
+            end
+          elsif n1 == 4
+            n3 = tc.choice(0)
+            if n3 == 0
              raise Failure
            else
              tc.mark_status(Status::INVALID)
            end
-          elsif n2 == 93
+          elsif n1 == 2
            raise Failure
          else
            tc.mark_status(Status::INVALID)
@ -444,82 +471,57 @@ class Minitest::ThesisTest < Minitest::Thesis::Test
        end
      end
    end
-  end

-  def test_failure_from_hypothesis_2
-    assert_raises(Failure) do
-      run_test("failure_from_hypothesis_2", database: {}, random: Random.new(0), max_examples: 1000, quiet: true) do |tc|
-        n1 = tc.choice(6)
-        if n1 == 6
-          n2 = tc.weighted(0.0)
-          if n2.zero?
-            raise Failure
-          end
-        elsif n1 == 4
-          n3 = tc.choice(0)
-          if n3 == 0
-            raise Failure
+    def test_refactoring_cache
+      old = ->(node, choices, status) {
+        choices.each.with_index do |c, i|
+          if i + 1 < choices.length || status == Status::OVERRUN
+            node = if node.has_key?(c)
+                     node[c]
+                   else
+                     node[c] = {}
+                   end
          else
-            tc.mark_status(Status::INVALID)
+            node[c] = status
          end
-        elsif n1 == 2
-          raise Failure
-        else
-          tc.mark_status(Status::INVALID)
        end
-      end
-    end
-  end
+      }

-  def test_refactoring_cache
-    old = ->(node, choices, status) {
-      choices.each.with_index do |c, i|
-        if i + 1 < choices.length || status == Status::OVERRUN
+      new = ->(node, choices, status) {
+        *rest, last = choices
+        rest.each do |c|
          node = if node.has_key?(c)
                   node[c]
                 else
                   node[c] = {}
                 end
-        else
-          node[c] = status
        end
-      end
-    }
-
-    new = ->(node, choices, status) {
-      *rest, last = choices
-      rest.each do |c|
-        node = if node.has_key?(c)
-                 node[c]
-               else
-                 node[c] = {}
-               end
-      end
-      unless last.nil?
-        node[last] = status == Status::OVERRUN ? {} : status
-      end
-    }
+        unless last.nil?
+          node[last] = status == Status::OVERRUN ? {} : status
+        end
+      }

-    run_test("refactoring_cache", database: {}) do |tc|
-      old_tree = {}
-      new_tree = {}
+      run_test("refactoring_cache", database: {}) do |tc|
+        old_tree = {}
+        new_tree = {}

-      choices = tc.any(lists(integers(0, 10)))
-      status = Status.new(tc.choice(4))
+        choices = tc.any(lists(integers(0, 10)))
+        status = Status.new(tc.choice(4))

-      old.(old_tree, choices, status)
-      new.(new_tree, choices, status)
+        old.(old_tree, choices, status)
+        new.(new_tree, choices, status)

-      assert_equal old_tree, new_tree
+        assert_equal old_tree, new_tree
+      end
    end
-  end

-  private
+    private

-  def suppress_warnings
-    original_verbosity = $VERBOSE
-    $VERBOSE = nil
-    yield
-    $VERBOSE = original_verbosity
+    def suppress_warnings
+      original_verbosity = $VERBOSE
+      $VERBOSE = nil
+      yield
+      $VERBOSE = original_verbosity
+    end
  end
 end