require "digest" require "minitest" module Minitest class Thesis < Test VERSION = "0.1.0" # Runs a test. Usage is: # # run_test do |test_case| # n = test_case.choice(1000) # end # # The block takes a `TestCase` argument, and should raise an exception to # indicate a test failure. It will either run silently or print drawn # values and then fail with an exception if minithesis finds some test case # that fails. # # Arguments: # * max_examples: the maximum number of valid test cases to run for. # Note that under some circumstances the test may run fewer test # cases than this. # * random: An instance of random.Random that will be used for all # nondeterministic choices. # * database: A Hash-like object in which results will be cached and resumed # from, ensuring that if a test is run twice it fails in the same way. # * quiet: Will not print anything on failure if True. def run_test( name, max_examples: 100, random: Random.new, database: DirectoryDb.new(".minitest-thesis-cache"), quiet: false, &test ) mark_failures_interesting = ->(test_case) do test.(test_case) rescue Exception raise unless test_case.status.nil? test_case.mark_status(Status::INTERESTING) end state = TestingState.new(random:, test_function: mark_failures_interesting, max_examples:) prev_failure = database[name] unless prev_failure.nil? choices = prev_failure.unpack("Q>*") state.test_function(TestCase.for_choices(choices)) end if state.result.nil? state.run end if state.valid_test_cases.zero? raise Unsatisfiable end if state.result.nil? database.delete(name) else database[name] = state.result.pack("Q>*") end unless state.result.nil? test.(TestCase.for_choices(state.result, print_results: !quiet)) end end # Represents a single generated test case, which consists of an underlying # set of choices that produce possibilities. class TestCase # Returns a test case that makes this series of choices. def self.for_choices(choices, print_results: false) self.new(prefix: choices, random: nil, max_size: choices.length, print_results:) end attr_accessor :status attr_reader :choices, :targeting_score def initialize(prefix:, random:, max_size: Float::INFINITY, print_results: false) @prefix, @random, @max_size, @print_results = prefix, random, max_size, print_results @choices = [] @status = nil @depth = 0 @targeting_score = nil end # Returns a number in the range [0, n] def choice(n) result = make_choice(n) { @random.rand(n) } puts "choice(#{n}): #{result}" if should_print? result end # Return True with probability `p`. def weighted(p) if p.zero? || p.negative? result = forced_choice(0) elsif p >= 1 result = forced_choice(1) else result = make_choice(1) { (@random.rand <= p) ? 1 : 0 } end puts "weighted(#{p}): #{result}" if should_print? result end # Inserts a fake choice into the choice sequence, as if some call to # choice() had returned `n`. You almost never need this, but sometimes it # can be a useful hint to the shrinker. def forced_choice(n) raise RangeError.new("Invalid choice #{n}") if n.bit_length > 64 || n.negative? raise Frozen unless @status.nil? mark_status(Status::OVERRUN) if @choices.length >= @max_size choices << n n end # Mark this test case as invalid. def reject = mark_status(Status::INVALID) # If this precondition is not met, abort the test and mark this test case as invalid. def assume(precondition) return if precondition reject end # Set a score to maximize. Multiple calls to this function will override previous ones. # # The name and idea come from Löscher, Andreas, and Konstantinos Sagonas. # "Targeted property-based testing." ISSTA. 2017, but the implementation # is based on that found in Hypothesis, which is not that similar to # anything described in the paper. def target(score) = @targeting_score = score # Return a possible value from `possibility`. def any(possibility) begin @depth += 1 result = possibility.produce.(self) ensure @depth -= 1 end puts "any(#{possibility}): #{result}" if should_print? result end # Set the status and raise StopTest. def mark_status(status) raise Frozen unless self.status.nil? @status = status raise StopTest end private def should_print? = @print_results && @depth.zero? # Make a choice in [0, n], by calling rnd_method if randomness is needed. def make_choice(n, &rnd_method) raise RangeError.new("Invalid choice #{n}") if n.bit_length > 64 || n.negative? raise Frozen unless @status.nil? mark_status(Status::OVERRUN) if @choices.length >= @max_size result = if @choices.length < @prefix.length @prefix[@choices.length] else rnd_method.() end @choices << result mark_status(Status::INVALID) if result > n result end end # Represents some range of values that might be used in a test, that can be # requested from a `TestCase`. Pass one of these to TestCase.any to get a # concrete value. class Possibility attr_reader :produce, :name def initialize(produce, name: "TODO") @produce = produce @name = name end def inspect = name def to_s = name # "Returns a `Possibility` where values come from applying `f` to some possible value for `self`." def map(&f) self.class.new( ->(test_case) { f.(test_case.any(self)) }, name: "#{name}.map(TODO)", ) end # Returns a `Possibility` where values come from applying `f` (which # should return a new `Possibility` to some possible value for `self` # then returning a possible value from that. def bind(&f) produce = ->(test_case) { test_case.any(f.(test_case.any(self))) } self.class.new(produce, name: "#{name}.bind(TODO)") end # Returns a `Possibility` whose values are any possible value of `self` # for which `f` returns True. def satisfying(&f) produce = ->(test_case) { 3.times do candidate = test_case.any(self) return candidate if f.(candidate) end test_case.reject } self.class.new(produce, name: "#{name}.select(TODO)") end end # Any integer in the range [m, n] is possible def integers(m, n) = Possibility.new(->(tc) { m + tc.choice(n - m) }, name: "integers(#{m}, #{n})") # Any lists whose elements are possible values from `elements` are possible. def lists(elements, min_size: 0, max_size: Float::INFINITY) produce = ->(test_case) { result = [] loop do if result.length < min_size test_case.forced_choice(1) elsif result.length + 1 >= max_size test_case.forced_choice(0) break elsif test_case.weighted(0.9).zero? break end result << test_case.any(elements) end result } Possibility.new(produce, name: "lists(#{elements.name})") end # Only `value` is possible. def just(value) = Possibility.new(->(_) { value }, name: "just(#{value})") # No possible values. i.e. Any call to `any` will reject the test case. def nothing = Possibility.new(->(tc) { tc.reject }) # Possible values can be any value possible for one of `possibilities`. def mix_of(*possibilities) return nothing if possibilities.empty? Possibility.new( ->(tc) { tc.any(possibilities[tc.choice(possibilities.length - 1)]) }, name: "mix_of(#{possibilities.map(&:name).join(", ")})", ) end # Any tuple t of of length len(possibilities) such that t[i] is possible # for possibilities[i] is possible. def tuples(*possibilities) Possibility.new( ->(tc) { possibilities.map {|p| tc.any(p) } }, name: "tuples(#{possibilities.map(&:name).join(", ")})", ) end # We cap the maximum amount of entropy a test case can use. # This prevents cases where the generated test case size explodes # by effectively rejection BUFFER_SIZE = 8 * 1024 # Returns a cached version of a function that maps a choice sequence to the # status of calling a test function on a test case populated with it. Is # able to take advantage of the structure of the test function to predict # the result even if exact sequence of choices has not been seen # previously. # # You can safely omit implementing this at the cost of somewhat increased # shrinking time. class CachedTestFunction def initialize(&test_function) @test_function = test_function # Tree nodes are either a point at which a choice occurs # in which case they map the result of the choice to the # tree node we are in after, or a Status object indicating # mark_status was called at this point and all future # choices are irrelevant. # # Note that a better implementation of this would use # a Patricia trie, which implements long non-branching # paths as an array inline. For simplicity we don't # do that here. @tree = {} end def call(choices) node = @tree begin choices.each do |c| node = node.fetch(c) # mark_status was called, thus future choices # will be ignored. if node.is_a?(Status) fail if node == Status::OVERRUN return node end end # If we never entered an unknown region of the tree # or hit a Status value, then we know that another # choice will be made next and the result will overrun. return Status::OVERRUN rescue KeyError end # We now have to actually call the test function to find out what # happens. test_case = TestCase.for_choices(choices) @test_function.(test_case) fail if test_case.status.nil? # We enter the choices made in a tree. node = @tree test_case.choices.each.with_index do |c, i| if i + 1 < test_case.choices.length || test_case.status == Status::OVERRUN if node.has_key?(c) node = node[c] else node = node[c] = {} end else node[c] = test_case.status end end test_case.status end end class TestingState attr_reader :result, :valid_test_cases, :calls def initialize(random:, test_function:, max_examples:) @random, @_test_function, @max_examples = random, test_function, max_examples @valid_test_cases = 0 @calls = 0 @test_is_trivial = false end def test_function(test_case) begin @_test_function.(test_case) rescue StopTest end if test_case.status.nil? test_case.status = Status::VALID end @calls += 1 if test_case.status >= Status::INVALID && test_case.choices.length.zero? @test_is_trivial = true end if test_case.status >= Status::VALID @valid_test_cases += 1 unless test_case.targeting_score.nil? relevant_info = [test_case.targeting_score, test_case.choices] if @best_scoring.nil? @best_scoring = relevant_info else best, _ = @best_scoring if test_case.targeting_score > best @best_scoring = relevant_info end end end end if test_case.status == Status::INTERESTING && ( @result.nil? || ((sort_key(test_case.choices) <=> sort_key(@result)) == -1) ) @result = test_case.choices end end # If any test cases have had `target()` called on them, do a simple # hill climbing algorithm to attempt to optimise that target score. def target return if !@result.nil? || @best_scoring.nil? # Can we improve the score by changing choices[i] by `step`? adjust = ->(i, step) do fail if @best_scoring.nil? score, choices = @best_scoring return false if choices[i] + step < 0 || choices[i].bit_length >= 64 attempt = choices.dup attempt[i] += step test_case = TestCase.new( prefix: attempt, random: @random, max_size: BUFFER_SIZE ) test_function(test_case) fail if test_case.status.nil? test_case.status >= Status::VALID && !test_case.targeting_score.nil? && test_case.targeting_score > score end while keep_generating? i = @random.rand(@best_scoring[1].length) sign = 0 [1, -1].each do |k| return unless keep_generating? if adjust.(i, k) sign = k break end end next if sign.zero? k = 1 k *= 2 while keep_generating? && adjust.(i, sign * k) while k.positive? while keep_generating? && adjust.(i, sign * k) end k /= 2 end end end def run generate target shrink end def keep_generating? !@test_is_trivial && result.nil? && @valid_test_cases < @max_examples && # We impose a limit on the maximum number of calls as # well as the maximum number of valid examples. This is # to avoid taking a prohibitively long time on tests which # have hard or impossible to satisfy preconditions. @calls < @max_examples * 10 end # Run random generation until either we have found an interesting test # case or hit the limit of how many test cases we should evaluate. def generate while keep_generating? && (@best_scoring.nil? || @valid_test_cases < @max_examples / 2) test_function(TestCase.new(prefix: [], random: @random, max_size: BUFFER_SIZE)) end end # If we have found an interesting example, try shrinking it so that the # choice sequence leading to our best example is shortlex smaller than # the one we originally found. This improves the quality of the generated # test case, as per our paper. # # https://drmaciver.github.io/papers/reduction-via-generation-preview.pdf def shrink # if not self.result: # return return if @result.nil? || @result.empty? # Shrinking will typically try the same choice sequences over and over # again, so we cache the test function in order to not end up # reevaluating it in those cases. This also allows us to catch cases # where we try something that is e.g. a prefix of something we've # previously tried, which is guaranteed not to work. cached = CachedTestFunction.new {|tc| test_function(tc) } consider = ->(choices) do return true if choices == @result cached.(choices) == Status::INTERESTING end fail unless consider.(@result) # We are going to perform a number of transformations to the current # result, iterating until none of them make any progress - i.e. until # we make it through an entire iteration of the loop without changing # the result. prev = nil while prev != @result prev = @result # A note on weird loop order: We iterate backwards through the choice # sequence rather than forwards, because later bits tend to depend on # earlier bits so it's easier to make changes near the end and # deleting bits at the end may allow us to make changes earlier on # that we we'd have missed. # # Note that we do not restart the loop at the end when we find a # successful shrink. This is because things we've already tried are # less likely to work. # # If this guess is wrong, that's OK, this isn't a correctness # problem, because if we made a successful reduction then we are not # at a fixed point and will restart the loop at the end the next time # round. In some cases this can result in performance issues, but the # end result should still be fine. # First try deleting each choice we made in chunks. We try longer # chunks because this allows us to delete whole composite elements: # e.g. deleting an element from a generated list requires us to # delete both the choice of whether to include it and also the # element itself, which may involve more than one choice. Some things # will take more than 8 choices in the sequence. That's too bad, we # may not be able to delete those. In Hypothesis proper we record the # boundaries corresponding to `any` calls so that we can try deleting # those, but that's pretty high overhead and also a bunch of slightly # annoying code that it's not worth porting. # # We could instead do a quadratic amount of work to try all # boundaries, but in general we don't want to do that because even a # shrunk test case can involve a relatively large number of choices. k = 8 while k.positive? i = @result.length - k - 1 until i.negative? if i >= @result.length # Can happen if we successfully lowered the value at i - 1 i -= 1 next end attempt = @result[0...i] + (@result[i + k..] || []) fail unless attempt.length < @result.length unless consider.(attempt) # This fixes a common problem that occurs # when you have dependencies on some # length parameter. e.g. draw a number # between 0 and 10 and then draw that # many elements. This can't delete # everything that occurs that way, but # it can delete some things and often # will get us unstuck when nothing else # does. if i.positive? && attempt[i - 1].positive? attempt[i - 1] -= 1 i += 1 if consider.(attempt) end i -= 1 end end k /= 2 end # Attempts to replace some indices in the current result with new # values. Useful for some purely lexicographic reductions that we are # about to perform. replace = ->(values) do fail if @result.nil? attempt = @result.dup values.each do |i, v| # The size of self.result can change during shrinking. If that # happens, stop attempting to make use of these replacements # because some other shrink pass is better to run now. return false if i >= attempt.length attempt[i] = v end consider.(attempt) end # Now we try replacing blocks of choices with zeroes. Note that # unlike the above we skip k = 1 because we handle that in the next # step. Often (but not always) a block of all zeroes is the shortlex # smallest value that a region can be. k = 8 while k > 1 i = @result.length - k until i.negative? if replace.((i...i+k).to_h {|i| [i, 0]}) # If we've succeeded then all of [i, i + k] is zero so we # adjust i so that the next region does not overlap with this # at all. i -= k else # Otherwise we might still be able to zero some of these values # but not the last one, so we just go back one. i -= 1 end end k /= 2 end # Now try replacing each choice with a smaller value by doing a # binary search. This will replace n with 0 or n - 1 if possible, but # will also more efficiently replace it with a smaller number than # doing multiple subtractions would. i = @result.length - 1 until i.negative? # Attempt to replace bin_search_down(0, @result[i]) {|v| replace.({i => v}) } i -= 1 end # NB from here on this is just showing off cool shrinker tricks and # you probably don't need to worry about it and can skip these bits # unless they're easy and you want bragging rights for how much # better you are at shrinking than the local QuickCheck equivalent. # Try sorting out of order ranges of choices, as `sort(x) <= x`, so # this is always a lexicographic reduction. k = 8 # while k > 1: while k > 1 (@result.length - k - 1).downto(0).each do |i| consider.(@result[0...i] + @result[i...i+k].sort + @result[i+k..]) end k /= 2 end # Try adjusting nearby pairs of integers by redistributing value # between them. This is useful for tests that depend on the sum of # some generated values. [2, 1].each do |k| (@result.length - k - 1).downto(0).each do |i| j = i + k # This check is necessary because the previous changes might have # shrunk the size of result, but also it's tedious to write tests # for this so I didn't. if j < @result.length # Try swapping out of order pairs if @result[i] > @result[j] replace.({j => @result[i], i => @result[j]}) end # j could be out of range if the previous swap succeeded. if j < @result.length && @result[i].positive? prev_i = @result[i] prev_j = @result[j] bin_search_down(0, prev_i) {|v| replace.({i => v, j => prev_j + (prev_i - v)}) } end end end end end end private # Returns a key that can be used for the shrinking order of test cases. def sort_key(choices) = [choices.length, choices] # Returns n in [lo, hi] such that f(n) is True, where it is assumed and # will not be checked that f(hi) is True. # # Will return `lo` if `f(lo)` is True, otherwise the only guarantee that is # made is that `f(n - 1)` is False and `f(n)` is True. In particular this # does *not* guarantee to find the smallest value, only a locally minimal # one. def bin_search_down(low, high, &f) return low if f.(low) while low + 1 < high mid = low + (high - low) / 2 if f.(mid) high = mid else low = mid end end high end end class DirectoryDb def initialize(dir) @dir = dir Dir.mkdir(@dir) rescue SystemCallError => e raise unless e.errno == Errno::EEXIST::Errno end def [](key) f = file(key) return nil unless File.exist?(f) File.read(f) end def []=(key, value) File.write(file(key), value) end private def file(key) File.join(@dir, Digest::SHA1.hexdigest(key)[0...10]) end end class Error< StandardError; end # Attempted to make choices on a test case that has been completed. class Frozen < Error; end # Raised when a test should stop executing early. class StopTest < Error; end # Raised when a test has no valid examples. class Unsatisfiable < Error; end class Status < Struct.new(:value) # Test case didn't have enough data to complete OVERRUN = self.new(0) # Test case contained something that prevented completion INVALID = self.new(1) # Test case completed just fine but was boring VALID = self.new(2) # Test case completed and was interesting INTERESTING = self.new(3) include Comparable def <=>(other) value <=> other.value end end end end