minitest-thesis/lib/minitest/thesis.rb

require "digest"

require "minitest"

module Minitest
  class Thesis < Test
    VERSION = "0.1.0"

    # Runs a test. Usage is:
    #
    # run_test do |test_case|
    #   n = test_case.choice(1000)
    # end
    #
    # The block takes a `TestCase` argument, and should raise an exception to
    # indicate a test failure. It will either run silently or print drawn
    # values and then fail with an exception if minithesis finds some test case
    # that fails.
    #
    # Arguments:
    # * max_examples: the maximum number of valid test cases to run for.
    #   Note that under some circumstances the test may run fewer test
    #   cases than this.
    # * random: An instance of random.Random that will be used for all
    #   nondeterministic choices.
    # * database: A Hash-like object in which results will be cached and resumed
    #   from, ensuring that if a test is run twice it fails in the same way.
    # * quiet: Will not print anything on failure if True.
    def run_test(
      name,
      max_examples: 100,
      random: Random.new,
      database: DirectoryDb.new(".minitest-thesis-cache"),
      quiet: false,
      &test
    )
      mark_failures_interesting = ->(test_case) do
        test.(test_case)
      rescue Exception
        raise unless test_case.status.nil?

        test_case.mark_status(Status::INTERESTING)
      end

      state = TestingState.new(random:, test_function: mark_failures_interesting, max_examples:)

      prev_failure = database[name]

      unless prev_failure.nil?
        choices = prev_failure.unpack("Q>*")
        state.test_function(TestCase.for_choices(choices))
      end

      if state.result.nil?
        state.run
      end

      if state.valid_test_cases.zero?
        raise Unsatisfiable
      end

      if state.result.nil?
        database.delete(name)
      else
        database[name] = state.result.pack("Q>*")
      end

      unless state.result.nil?
        test.(TestCase.for_choices(state.result, print_results: !quiet))
      end
    end

    # Represents a single generated test case, which consists of an underlying
    # set of choices that produce possibilities.
    class TestCase

      # Returns a test case that makes this series of choices.
      def self.for_choices(choices, print_results: false)
        self.new(prefix: choices, random: nil, max_size: choices.length, print_results:)
      end

      attr_accessor :status
      attr_reader :choices, :targeting_score

      def initialize(prefix:, random:, max_size: Float::INFINITY, print_results: false)
        @prefix, @random, @max_size, @print_results = prefix, random, max_size, print_results
        @choices = []
        @status = nil
        @depth = 0
        @targeting_score = nil
      end

      # Returns a number in the range [0, n]
      def choice(n)
        result = make_choice(n) { @random.rand(n) }

        puts "choice(#{n}): #{result}" if should_print?

        result
      end

      # Return True with probability `p`.
      def weighted(p)
        if p.zero? || p.negative?
          result = forced_choice(0)
        elsif p >= 1
          result = forced_choice(1)
        else
          result = make_choice(1) { (@random.rand <= p) ? 1 : 0 }
        end

        puts "weighted(#{p}): #{result}" if should_print?

        result
      end

      # Inserts a fake choice into the choice sequence, as if some call to
      # choice() had returned `n`. You almost never need this, but sometimes it
      # can be a useful hint to the shrinker.
      def forced_choice(n)

        raise RangeError.new("Invalid choice #{n}") if n.bit_length > 64 || n.negative?
        raise Frozen unless @status.nil?

        mark_status(Status::OVERRUN) if @choices.length >= @max_size

        choices << n
        n
      end

      # Mark this test case as invalid.
      def reject = mark_status(Status::INVALID)

      # If this precondition is not met, abort the test and mark this test case as invalid.
      def assume(precondition)
        return if precondition
        reject
      end

      # Set a score to maximize. Multiple calls to this function will override previous ones.
      #
      # The name and idea come from Löscher, Andreas, and Konstantinos Sagonas.
      # "Targeted property-based testing." ISSTA. 2017, but the implementation
      # is based on that found in Hypothesis, which is not that similar to
      # anything described in the paper.
      def target(score) = @targeting_score = score

      # Return a possible value from `possibility`.
      def any(possibility)
        begin
          @depth += 1
          result = possibility.produce.(self)
        ensure
          @depth -= 1
        end

        puts "any(#{possibility}): #{result}" if should_print?

        result
      end

      # Set the status and raise StopTest.
      def mark_status(status)
        raise Frozen unless self.status.nil?

        @status = status
        raise StopTest
      end

      private

      def should_print? = @print_results && @depth.zero?

      # Make a choice in [0, n], by calling rnd_method if randomness is needed.
      def make_choice(n, &rnd_method)
        raise RangeError.new("Invalid choice #{n}") if n.bit_length > 64 || n.negative?
        raise Frozen unless @status.nil?

        mark_status(Status::OVERRUN) if @choices.length >= @max_size

        result = if @choices.length < @prefix.length
                   @prefix[@choices.length]
                 else
                   rnd_method.()
                 end
        @choices << result

        mark_status(Status::INVALID) if result > n

        result
      end
    end

    # Represents some range of values that might be used in a test, that can be
    # requested from a `TestCase`. Pass one of these to TestCase.any to get a
    # concrete value.
    class Possibility
      attr_reader :produce, :name

      def initialize(produce, name: "TODO")
        @produce = produce
        @name = name
      end

      def inspect = name
      def to_s = name

      # "Returns a `Possibility` where values come from applying `f` to some possible value for `self`."
      def map(&f)
        self.class.new(
          ->(test_case) { f.(test_case.any(self)) },
          name: "#{name}.map(TODO)",
        )
      end

      # Returns a `Possibility` where values come from applying `f` (which
      # should return a new `Possibility` to some possible value for `self`
      # then returning a possible value from that.
      def bind(&f)
        produce = ->(test_case) { test_case.any(f.(test_case.any(self))) }
        self.class.new(produce, name: "#{name}.bind(TODO)")
      end

      # Returns a `Possibility` whose values are any possible value of `self`
      # for which `f` returns True.
      def satisfying(&f)
        produce = ->(test_case) {
          3.times do
            candidate = test_case.any(self)
            return candidate if f.(candidate)
          end
          test_case.reject
        }

        self.class.new(produce, name: "#{name}.select(TODO)")
      end
    end

    # Any integer in the range [m, n] is possible
    def integers(m, n) = Possibility.new(->(tc) { m + tc.choice(n - m) }, name: "integers(#{m}, #{n})")

    # Any lists whose elements are possible values from `elements` are possible.
    def lists(elements, min_size: 0, max_size: Float::INFINITY)
      produce = ->(test_case) {
        result = []
        loop do
          if result.length < min_size
            test_case.forced_choice(1)
          elsif result.length + 1 >= max_size
            test_case.forced_choice(0)
            break
          elsif test_case.weighted(0.9).zero?
            break
          end
          result << test_case.any(elements)
        end
        result
      }

      Possibility.new(produce, name: "lists(#{elements.name})")
    end

    # Only `value` is possible.
    def just(value) = Possibility.new(->(_) { value }, name: "just(#{value})")

    # No possible values. i.e. Any call to `any` will reject the test case.
    def nothing = Possibility.new(->(tc) { tc.reject })

    # Possible values can be any value possible for one of `possibilities`.
    def mix_of(*possibilities)
      return nothing if possibilities.empty?

      Possibility.new(
        ->(tc) { tc.any(possibilities[tc.choice(possibilities.length - 1)]) },
        name: "mix_of(#{possibilities.map(&:name).join(", ")})",
      )
    end

    # Any tuple t of of length len(possibilities) such that t[i] is possible
    # for possibilities[i] is possible.
    def tuples(*possibilities)
      Possibility.new(
        ->(tc) { possibilities.map {|p| tc.any(p) } },
        name: "tuples(#{possibilities.map(&:name).join(", ")})",
      )
    end

    # We cap the maximum amount of entropy a test case can use.
    # This prevents cases where the generated test case size explodes
    # by effectively rejection
    BUFFER_SIZE = 8 * 1024

    # Returns a cached version of a function that maps a choice sequence to the
    # status of calling a test function on a test case populated with it. Is
    # able to take advantage of the structure of the test function to predict
    # the result even if exact sequence of choices has not been seen
    # previously.
    #
    # You can safely omit implementing this at the cost of somewhat increased
    # shrinking time.
    class CachedTestFunction
      def initialize(&test_function)
        @test_function = test_function

        # Tree nodes are either a point at which a choice occurs
        # in which case they map the result of the choice to the
        # tree node we are in after, or a Status object indicating
        # mark_status was called at this point and all future
        # choices are irrelevant.
        #
        # Note that a better implementation of this would use
        # a Patricia trie, which implements long non-branching
        # paths as an array inline. For simplicity we don't
        # do that here.
        @tree = {}
      end

      def call(choices)
        node = @tree
        begin
          choices.each do |c|
            node = node.fetch(c)
            # mark_status was called, thus future choices
            # will be ignored.
            if node.is_a?(Status)
              fail if node == Status::OVERRUN
              return node
            end
          end
          # If we never entered an unknown region of the tree
          # or hit a Status value, then we know that another
          # choice will be made next and the result will overrun.
          return Status::OVERRUN
        rescue KeyError
        end

        # We now have to actually call the test function to find out what
        # happens.
        test_case = TestCase.for_choices(choices)
        @test_function.(test_case)
        fail if test_case.status.nil?

        # We enter the choices made in a tree.
        node = @tree
        test_case.choices.each.with_index do |c, i|
          if i + 1 < test_case.choices.length || test_case.status == Status::OVERRUN
            if node.has_key?(c)
              node = node[c]
            else
              node = node[c] = {}
            end
          else
            node[c] = test_case.status
          end
        end

        test_case.status
      end
    end

    class TestingState
      attr_reader :result, :valid_test_cases, :calls

      def initialize(random:, test_function:, max_examples:)
        @random, @_test_function, @max_examples = random, test_function, max_examples
        @valid_test_cases = 0
        @calls = 0
        @test_is_trivial = false
      end

      def test_function(test_case)
        begin
          @_test_function.(test_case)
        rescue StopTest
        end

        if test_case.status.nil?
          test_case.status = Status::VALID
        end

        @calls += 1

        if test_case.status >= Status::INVALID && test_case.choices.length.zero?
          @test_is_trivial = true
        end

        if test_case.status >= Status::VALID
          @valid_test_cases += 1

          unless test_case.targeting_score.nil?
            relevant_info = [test_case.targeting_score, test_case.choices]
            if @best_scoring.nil?
              @best_scoring = relevant_info
            else
              best, _ = @best_scoring
              if test_case.targeting_score > best
                @best_scoring = relevant_info
              end
            end
          end
        end

        if test_case.status == Status::INTERESTING && (
            @result.nil? || ((sort_key(test_case.choices) <=> sort_key(@result)) == -1)
        )
          @result = test_case.choices
        end
      end

      # If any test cases have had `target()` called on them, do a simple
      # hill climbing algorithm to attempt to optimise that target score.
      def target
        return if !@result.nil? || @best_scoring.nil?

        # Can we improve the score by changing choices[i] by `step`?
        adjust = ->(i, step) do
          fail if @best_scoring.nil?

          score, choices = @best_scoring
          return false if choices[i] + step < 0 || choices[i].bit_length >= 64

          attempt = choices.dup
          attempt[i] += step
          test_case = TestCase.new(
            prefix: attempt, random: @random, max_size: BUFFER_SIZE
          )
          test_function(test_case)

          fail if test_case.status.nil?

          test_case.status >= Status::VALID &&
            !test_case.targeting_score.nil? &&
            test_case.targeting_score > score
        end

        while keep_generating?
          i = @random.rand(@best_scoring[1].length)
          sign = 0
          [1, -1].each do |k|
            return unless keep_generating?

            if adjust.(i, k)
              sign = k
              break
            end
          end

          next if sign.zero?

          k = 1
          k *= 2 while keep_generating? && adjust.(i, sign * k)

          while k.positive?
            while keep_generating? && adjust.(i, sign * k)
            end
            k /= 2
          end
        end
      end

      def run
        generate
        target
        shrink
      end

      def keep_generating?
        !@test_is_trivial &&
          result.nil? &&
          @valid_test_cases < @max_examples &&
          # We impose a limit on the maximum number of calls as
          # well as the maximum number of valid examples. This is
          # to avoid taking a prohibitively long time on tests which
          # have hard or impossible to satisfy preconditions.
          @calls < @max_examples * 10
      end

      # Run random generation until either we have found an interesting test
      # case or hit the limit of how many test cases we should evaluate.
      def generate
        while keep_generating? && (@best_scoring.nil? || @valid_test_cases < @max_examples / 2)
          test_function(TestCase.new(prefix: [], random: @random, max_size: BUFFER_SIZE))
        end
      end

      # If we have found an interesting example, try shrinking it so that the
      # choice sequence leading to our best example is shortlex smaller than
      # the one we originally found. This improves the quality of the generated
      # test case, as per our paper.
      #
      # https://drmaciver.github.io/papers/reduction-via-generation-preview.pdf
      def shrink
        # if not self.result:
        #     return
        return if @result.nil? || @result.empty?

        # Shrinking will typically try the same choice sequences over and over
        # again, so we cache the test function in order to not end up
        # reevaluating it in those cases. This also allows us to catch cases
        # where we try something that is e.g. a prefix of something we've
        # previously tried, which is guaranteed not to work.
        cached = CachedTestFunction.new {|tc| test_function(tc) }

        consider = ->(choices) do
          return true if choices == @result

          cached.(choices) == Status::INTERESTING
        end

        fail unless consider.(@result)

        # We are going to perform a number of transformations to the current
        # result, iterating until none of them make any progress - i.e. until
        # we make it through an entire iteration of the loop without changing
        # the result.
        prev = nil
        while prev != @result
          prev = @result

          # A note on weird loop order: We iterate backwards through the choice
          # sequence rather than forwards, because later bits tend to depend on
          # earlier bits so it's easier to make changes near the end and
          # deleting bits at the end may allow us to make changes earlier on
          # that we we'd have missed.
          #
          # Note that we do not restart the loop at the end when we find a
          # successful shrink. This is because things we've already tried are
          # less likely to work.
          #
          # If this guess is wrong, that's OK, this isn't a correctness
          # problem, because if we made a successful reduction then we are not
          # at a fixed point and will restart the loop at the end the next time
          # round. In some cases this can result in performance issues, but the
          # end result should still be fine.

          # First try deleting each choice we made in chunks. We try longer
          # chunks because this allows us to delete whole composite elements:
          # e.g. deleting an element from a generated list requires us to
          # delete both the choice of whether to include it and also the
          # element itself, which may involve more than one choice. Some things
          # will take more than 8 choices in the sequence. That's too bad, we
          # may not be able to delete those. In Hypothesis proper we record the
          # boundaries corresponding to `any` calls so that we can try deleting
          # those, but that's pretty high overhead and also a bunch of slightly
          # annoying code that it's not worth porting.
          #
          # We could instead do a quadratic amount of work to try all
          # boundaries, but in general we don't want to do that because even a
          # shrunk test case can involve a relatively large number of choices.
          k = 8
          while k.positive?
            i = @result.length - k - 1
            until i.negative?
              if i >= @result.length
                # Can happen if we successfully lowered the value at i - 1
                i -= 1
                next
              end
              attempt = @result[0...i] + (@result[i + k..] || [])

              fail unless attempt.length < @result.length

              unless consider.(attempt)
                # This fixes a common problem that occurs
                # when you have dependencies on some
                # length parameter. e.g. draw a number
                # between 0 and 10 and then draw that
                # many elements. This can't delete
                # everything that occurs that way, but
                # it can delete some things and often
                # will get us unstuck when nothing else
                # does.
                if i.positive? && attempt[i - 1].positive?
                  attempt[i - 1] -= 1
                  i += 1 if consider.(attempt)
                end

                i -= 1
              end
            end

            k /= 2
          end

          # Attempts to replace some indices in the current result with new
          # values. Useful for some purely lexicographic reductions that we are
          # about to perform.
          replace = ->(values) do
            fail if @result.nil?
            attempt = @result.dup
            values.each do |i, v|
              # The size of self.result can change during shrinking. If that
              # happens, stop attempting to make use of these replacements
              # because some other shrink pass is better to run now.
              return false if i >= attempt.length
              attempt[i] = v
            end
            consider.(attempt)
          end

          # Now we try replacing blocks of choices with zeroes. Note that
          # unlike the above we skip k = 1 because we handle that in the next
          # step. Often (but not always) a block of all zeroes is the shortlex
          # smallest value that a region can be.
          k = 8

          while k > 1
            i = @result.length - k
            until i.negative?
              if replace.((i...i+k).to_h {|i| [i, 0]})
                # If we've succeeded then all of [i, i + k] is zero so we
                # adjust i so that the next region does not overlap with this
                # at all.
                i -= k
              else
                # Otherwise we might still be able to zero some of these values
                # but not the last one, so we just go back one.
                i -= 1
              end
            end
            k /= 2
          end

          # Now try replacing each choice with a smaller value by doing a
          # binary search. This will replace n with 0 or n - 1 if possible, but
          # will also more efficiently replace it with a smaller number than
          # doing multiple subtractions would.
          i = @result.length - 1
          until i.negative?
            # Attempt to replace
            bin_search_down(0, @result[i]) {|v| replace.({i => v}) }
            i -= 1
          end

          # NB from here on this is just showing off cool shrinker tricks and
          # you probably don't need to worry about it and can skip these bits
          # unless they're easy and you want bragging rights for how much
          # better you are at shrinking than the local QuickCheck equivalent.

          # Try sorting out of order ranges of choices, as `sort(x) <= x`, so
          # this is always a lexicographic reduction.
          k = 8
          # while k > 1:
          while k > 1
            (@result.length - k - 1).downto(0).each do |i|
              consider.(@result[0...i] + @result[i...i+k].sort + @result[i+k..])
            end
              k /= 2
          end

          # Try adjusting nearby pairs of integers by redistributing value
          # between them. This is useful for tests that depend on the sum of
          # some generated values.
          [2, 1].each do |k|
            (@result.length - k - 1).downto(0).each do |i|
              j = i + k
              # This check is necessary because the previous changes might have
              # shrunk the size of result, but also it's tedious to write tests
              # for this so I didn't.
              if j < @result.length
                # Try swapping out of order pairs
                if @result[i] > @result[j]
                  replace.({j => @result[i], i => @result[j]})
                end
                # j could be out of range if the previous swap succeeded.
                if j < @result.length && @result[i].positive?
                  prev_i = @result[i]
                  prev_j = @result[j]
                  bin_search_down(0, prev_i) {|v|
                    replace.({i => v, j => prev_j + (prev_i - v)})
                  }
                end
              end
            end
          end
        end
      end

      private

      # Returns a key that can be used for the shrinking order of test cases.
      def sort_key(choices) = [choices.length, choices]

      # Returns n in [lo, hi] such that f(n) is True, where it is assumed and
      # will not be checked that f(hi) is True.
      #
      # Will return `lo` if `f(lo)` is True, otherwise the only guarantee that is
      # made is that `f(n - 1)` is False and `f(n)` is True. In particular this
      # does *not* guarantee to find the smallest value, only a locally minimal
      # one.
      def bin_search_down(low, high, &f)
        return low if f.(low)
        while low + 1 < high
          mid = low + (high - low) / 2
          if f.(mid)
            high = mid
          else
            low = mid
          end
        end
        high
      end
    end

    class DirectoryDb
      def initialize(dir)
        @dir = dir
        Dir.mkdir(@dir)
      rescue SystemCallError => e
        raise unless e.errno == Errno::EEXIST::Errno
      end

      def [](key)
        f = file(key)
        return nil unless File.exist?(f)

        File.read(f)
      end

      def []=(key, value)
        File.write(file(key), value)
      end

      private

      def file(key)
        File.join(@dir, Digest::SHA1.hexdigest(key)[0...10])
      end
    end

    class Error< StandardError; end

    # Attempted to make choices on a test case that has been completed.
    class Frozen < Error; end

    # Raised when a test should stop executing early.
    class StopTest < Error; end

    # Raised when a test has no valid examples.
    class Unsatisfiable < Error; end

    class Status < Struct.new(:value)
      # Test case didn't have enough data to complete
      OVERRUN = self.new(0)

      # Test case contained something that prevented completion
      INVALID = self.new(1)

      # Test case completed just fine but was boring
      VALID = self.new(2)

      # Test case completed and was interesting
      INTERESTING = self.new(3)

      include Comparable

      def <=>(other)
        value <=> other.value
      end
    end
  end
end