diff --git a/lib/minitest/thesis.rb b/lib/minitest/thesis.rb index 0e3ac93..08583db 100644 --- a/lib/minitest/thesis.rb +++ b/lib/minitest/thesis.rb @@ -2,10 +2,16 @@ require "digest" require "minitest" +require_relative "thesis/directory_db" +require_relative "thesis/error" +require_relative "thesis/possibility" +require_relative "thesis/status" +require_relative "thesis/test_case" +require_relative "thesis/testing_state" +require_relative "thesis/version" + module Minitest module Thesis - VERSION = "0.1.0" - class Test < Minitest::Test # Runs a test. Usage is: @@ -72,161 +78,6 @@ module Minitest end end - # Represents a single generated test case, which consists of an underlying - # set of choices that produce possibilities. - class TestCase - - # Returns a test case that makes this series of choices. - def self.for_choices(choices, print_results: false) - self.new(prefix: choices, random: nil, max_size: choices.length, print_results:) - end - - attr_accessor :status - attr_reader :choices, :targeting_score - - def initialize(prefix:, random:, max_size: Float::INFINITY, print_results: false) - @prefix, @random, @max_size, @print_results = prefix, random, max_size, print_results - @choices = [] - @status = nil - @depth = 0 - @targeting_score = nil - end - - # Returns a number in the range [0, n] - def choice(n) - result = make_choice(n) { @random.rand(n) } - - puts "choice(#{n}): #{result}" if should_print? - - result - end - - # Return True with probability `p`. - def weighted(p) - result = if p <= 0 then forced_choice(0) - elsif p >= 1 then forced_choice(1) - else make_choice(1) { (@random.rand <= p) ? 1 : 0 } - end - - puts "weighted(#{p}): #{result}" if should_print? - - result - end - - # Inserts a fake choice into the choice sequence, as if some call to - # choice() had returned `n`. You almost never need this, but sometimes it - # can be a useful hint to the shrinker. - def forced_choice(n) - raise RangeError.new("Invalid choice #{n}") if n.bit_length > 64 || n.negative? - raise Frozen unless @status.nil? - - mark_status(Status::OVERRUN) if @choices.length >= @max_size - - choices << n - n - end - - # Mark this test case as invalid. - def reject = mark_status(Status::INVALID) - - # If this precondition is not met, abort the test and mark this test case as invalid. - def assume(precondition) - return if precondition - reject - end - - # Set a score to maximize. Multiple calls to this function will override previous ones. - # - # The name and idea come from Löscher, Andreas, and Konstantinos Sagonas. - # "Targeted property-based testing." ISSTA. 2017, but the implementation - # is based on that found in Hypothesis, which is not that similar to - # anything described in the paper. - def target(score) = @targeting_score = score - - # Return a possible value from `possibility`. - def any(possibility) - begin - @depth += 1 - result = possibility.produce.(self) - ensure - @depth -= 1 - end - - puts "any(#{possibility}): #{result}" if should_print? - - result - end - - # Set the status and raise StopTest. - def mark_status(status) - raise Frozen unless self.status.nil? - - @status = status - raise StopTest - end - - private - - def should_print? = @print_results && @depth.zero? - - # Make a choice in [0, n], by calling rnd_method if randomness is needed. - def make_choice(n, &rnd_method) - raise RangeError.new("Invalid choice #{n}") if n.bit_length > 64 || n.negative? - raise Frozen unless @status.nil? - - mark_status(Status::OVERRUN) if @choices.length >= @max_size - - result = if @choices.length < @prefix.length - @prefix[@choices.length] - else - rnd_method.() - end - @choices << result - - mark_status(Status::INVALID) if result > n - - result - end - end - - # Represents some range of values that might be used in a test, that can be - # requested from a `TestCase`. Pass one of these to TestCase.any to get a - # concrete value. - class Possibility - attr_reader :produce, :name - - def initialize(name = "TODO", &produce) - @name = name - @produce = produce - end - - def inspect = name - def to_s = name - - # "Returns a `Possibility` where values come from applying `f` to some possible value for `self`." - def map(&f) - self.class.new("#{name}.map(TODO)") {|tc| f.call(tc.any(self)) } - end - - # Returns a `Possibility` where values come from applying `f` (which - # should return a new `Possibility` to some possible value for `self` - # then returning a possible value from that. - def bind(&f) - self.class.new("#{name}.bind(TODO)") {|tc| tc.any(f.(tc.any(self))) } - end - - # Returns a `Possibility` whose values are any possible value of `self` - # for which `f` returns True. - def satisfying(&f) - self.class.new("#{name}.select(TODO)") {|test_case| - 3.times.first { - candidate = test_case.any(self) - candidate if f.(candidate) - } || test_case.reject - } - end - end - # Any integer in the range [m, n] is possible def integers(m, n) = Possibility.new("integers(#{m}, #{n})") {|tc| m + tc.choice(n - m) } @@ -271,480 +122,6 @@ module Minitest possibilities.map {|p| tc.any(p) } } end - - # We cap the maximum amount of entropy a test case can use. - # This prevents cases where the generated test case size explodes - # by effectively rejection - BUFFER_SIZE = 8 * 1024 - - # Returns a cached version of a function that maps a choice sequence to the - # status of calling a test function on a test case populated with it. Is - # able to take advantage of the structure of the test function to predict - # the result even if exact sequence of choices has not been seen - # previously. - # - # You can safely omit implementing this at the cost of somewhat increased - # shrinking time. - class CachedTestFunction - def initialize(&test_function) - @test_function = test_function - - # Tree nodes are either a point at which a choice occurs - # in which case they map the result of the choice to the - # tree node we are in after, or a Status object indicating - # mark_status was called at this point and all future - # choices are irrelevant. - # - # Note that a better implementation of this would use - # a Patricia trie, which implements long non-branching - # paths as an array inline. For simplicity we don't - # do that here. - @tree = {} - end - - def call(choices) - node = @tree - begin - choices.each do |c| - node = node.fetch(c) - # mark_status was called, thus future choices - # will be ignored. - if node.is_a?(Status) - fail if node == Status::OVERRUN - return node - end - end - # If we never entered an unknown region of the tree - # or hit a Status value, then we know that another - # choice will be made next and the result will overrun. - return Status::OVERRUN - rescue KeyError - end - - # We now have to actually call the test function to find out what - # happens. - test_case = TestCase.for_choices(choices) - @test_function.(test_case) - fail if test_case.status.nil? - - # We enter the choices made in a tree. - node = @tree - *rest, last = test_case.choices - rest.each do |c| - node = if node.has_key?(c) - node[c] - else - node[c] = {} - end - end - unless last.nil? - node[last] = test_case.status == Status::OVERRUN ? {} : test_case.status - end - - test_case.status - end - end - - class TestingState - attr_reader :result, :valid_test_cases, :calls - - def initialize(random:, test_function:, max_examples:) - @random, @_test_function, @max_examples = random, test_function, max_examples - @valid_test_cases = 0 - @calls = 0 - @test_is_trivial = false - end - - def test_function(test_case) - begin - @_test_function.(test_case) - rescue StopTest - end - - if test_case.status.nil? - test_case.status = Status::VALID - end - - @calls += 1 - - if test_case.status >= Status::INVALID && test_case.choices.length.zero? - @test_is_trivial = true - end - - if test_case.status >= Status::VALID - @valid_test_cases += 1 - - unless test_case.targeting_score.nil? - relevant_info = [test_case.targeting_score, test_case.choices] - if @best_scoring.nil? - @best_scoring = relevant_info - else - best, _ = @best_scoring - if test_case.targeting_score > best - @best_scoring = relevant_info - end - end - end - end - - if test_case.status == Status::INTERESTING && ( - @result.nil? || ((sort_key(test_case.choices) <=> sort_key(@result)) == -1) - ) - @result = test_case.choices - end - end - - # If any test cases have had `target()` called on them, do a simple - # hill climbing algorithm to attempt to optimise that target score. - def target - return if !@result.nil? || @best_scoring.nil? - - # Can we improve the score by changing choices[i] by `step`? - adjust = ->(i, step) do - fail if @best_scoring.nil? - - score, choices = @best_scoring - return false if choices[i] + step < 0 || choices[i].bit_length >= 64 - - attempt = choices.dup - attempt[i] += step - test_case = TestCase.new( - prefix: attempt, random: @random, max_size: BUFFER_SIZE - ) - test_function(test_case) - - fail if test_case.status.nil? - - test_case.status >= Status::VALID && - !test_case.targeting_score.nil? && - test_case.targeting_score > score - end - - while keep_generating? - i = @random.rand(@best_scoring[1].length) - sign = 0 - [1, -1].each do |k| - return unless keep_generating? - - if adjust.(i, k) - sign = k - break - end - end - - next if sign.zero? - - k = 1 - k *= 2 while keep_generating? && adjust.(i, sign * k) - - while k.positive? - while keep_generating? && adjust.(i, sign * k) - end - k /= 2 - end - end - end - - def run - generate - target - shrink - end - - def keep_generating? - !@test_is_trivial && - result.nil? && - @valid_test_cases < @max_examples && - # We impose a limit on the maximum number of calls as - # well as the maximum number of valid examples. This is - # to avoid taking a prohibitively long time on tests which - # have hard or impossible to satisfy preconditions. - @calls < @max_examples * 10 - end - - # Run random generation until either we have found an interesting test - # case or hit the limit of how many test cases we should evaluate. - def generate - while keep_generating? && (@best_scoring.nil? || @valid_test_cases < @max_examples / 2) - test_function(TestCase.new(prefix: [], random: @random, max_size: BUFFER_SIZE)) - end - end - - # If we have found an interesting example, try shrinking it so that the - # choice sequence leading to our best example is shortlex smaller than - # the one we originally found. This improves the quality of the generated - # test case, as per our paper. - # - # https://drmaciver.github.io/papers/reduction-via-generation-preview.pdf - def shrink - # if not self.result: - # return - return if @result.nil? || @result.empty? - - # Shrinking will typically try the same choice sequences over and over - # again, so we cache the test function in order to not end up - # reevaluating it in those cases. This also allows us to catch cases - # where we try something that is e.g. a prefix of something we've - # previously tried, which is guaranteed not to work. - cached = CachedTestFunction.new {|tc| test_function(tc) } - - consider = ->(choices) do - return true if choices == @result - - cached.(choices) == Status::INTERESTING - end - - fail unless consider.(@result) - - # We are going to perform a number of transformations to the current - # result, iterating until none of them make any progress - i.e. until - # we make it through an entire iteration of the loop without changing - # the result. - prev = nil - while prev != @result - prev = @result - - # A note on weird loop order: We iterate backwards through the choice - # sequence rather than forwards, because later bits tend to depend on - # earlier bits so it's easier to make changes near the end and - # deleting bits at the end may allow us to make changes earlier on - # that we we'd have missed. - # - # Note that we do not restart the loop at the end when we find a - # successful shrink. This is because things we've already tried are - # less likely to work. - # - # If this guess is wrong, that's OK, this isn't a correctness - # problem, because if we made a successful reduction then we are not - # at a fixed point and will restart the loop at the end the next time - # round. In some cases this can result in performance issues, but the - # end result should still be fine. - - # First try deleting each choice we made in chunks. We try longer - # chunks because this allows us to delete whole composite elements: - # e.g. deleting an element from a generated list requires us to - # delete both the choice of whether to include it and also the - # element itself, which may involve more than one choice. Some things - # will take more than 8 choices in the sequence. That's too bad, we - # may not be able to delete those. In Hypothesis proper we record the - # boundaries corresponding to `any` calls so that we can try deleting - # those, but that's pretty high overhead and also a bunch of slightly - # annoying code that it's not worth porting. - # - # We could instead do a quadratic amount of work to try all - # boundaries, but in general we don't want to do that because even a - # shrunk test case can involve a relatively large number of choices. - k = 8 - while k.positive? - i = @result.length - k - 1 - until i.negative? - if i >= @result.length - # Can happen if we successfully lowered the value at i - 1 - i -= 1 - next - end - attempt = @result[0...i] + (@result[i + k..] || []) - - fail unless attempt.length < @result.length - - unless consider.(attempt) - # This fixes a common problem that occurs - # when you have dependencies on some - # length parameter. e.g. draw a number - # between 0 and 10 and then draw that - # many elements. This can't delete - # everything that occurs that way, but - # it can delete some things and often - # will get us unstuck when nothing else - # does. - if i.positive? && attempt[i - 1].positive? - attempt[i - 1] -= 1 - i += 1 if consider.(attempt) - end - - i -= 1 - end - end - - k /= 2 - end - - # Attempts to replace some indices in the current result with new - # values. Useful for some purely lexicographic reductions that we are - # about to perform. - replace = ->(values) do - fail if @result.nil? - attempt = @result.dup - values.each do |i, v| - # The size of self.result can change during shrinking. If that - # happens, stop attempting to make use of these replacements - # because some other shrink pass is better to run now. - return false if i >= attempt.length - attempt[i] = v - end - consider.(attempt) - end - - # Now we try replacing blocks of choices with zeroes. Note that - # unlike the above we skip k = 1 because we handle that in the next - # step. Often (but not always) a block of all zeroes is the shortlex - # smallest value that a region can be. - k = 8 - - while k > 1 - i = @result.length - k - until i.negative? - if replace.((i...i+k).to_h {|i| [i, 0]}) - # If we've succeeded then all of [i, i + k] is zero so we - # adjust i so that the next region does not overlap with this - # at all. - i -= k - else - # Otherwise we might still be able to zero some of these values - # but not the last one, so we just go back one. - i -= 1 - end - end - k /= 2 - end - - # Now try replacing each choice with a smaller value by doing a - # binary search. This will replace n with 0 or n - 1 if possible, but - # will also more efficiently replace it with a smaller number than - # doing multiple subtractions would. - i = @result.length - 1 - until i.negative? - # Attempt to replace - bin_search_down(0, @result[i]) {|v| replace.({i => v}) } - i -= 1 - end - - # NB from here on this is just showing off cool shrinker tricks and - # you probably don't need to worry about it and can skip these bits - # unless they're easy and you want bragging rights for how much - # better you are at shrinking than the local QuickCheck equivalent. - - # Try sorting out of order ranges of choices, as `sort(x) <= x`, so - # this is always a lexicographic reduction. - k = 8 - # while k > 1: - while k > 1 - (@result.length - k - 1).downto(0).each do |i| - consider.(@result[0...i] + @result[i...i+k].sort + @result[i+k..]) - end - k /= 2 - end - - # Try adjusting nearby pairs of integers by redistributing value - # between them. This is useful for tests that depend on the sum of - # some generated values. - [2, 1].each do |k| - (@result.length - k - 1).downto(0).each do |i| - j = i + k - # This check is necessary because the previous changes might have - # shrunk the size of result, but also it's tedious to write tests - # for this so I didn't. - if j < @result.length - # Try swapping out of order pairs - if @result[i] > @result[j] - replace.({j => @result[i], i => @result[j]}) - end - # j could be out of range if the previous swap succeeded. - if j < @result.length && @result[i].positive? - prev_i = @result[i] - prev_j = @result[j] - bin_search_down(0, prev_i) {|v| - replace.({i => v, j => prev_j + (prev_i - v)}) - } - end - end - end - end - end - end - - private - - # Returns a key that can be used for the shrinking order of test cases. - def sort_key(choices) = [choices.length, choices] - - # Returns n in [lo, hi] such that f(n) is True, where it is assumed and - # will not be checked that f(hi) is True. - # - # Will return `lo` if `f(lo)` is True, otherwise the only guarantee that is - # made is that `f(n - 1)` is False and `f(n)` is True. In particular this - # does *not* guarantee to find the smallest value, only a locally minimal - # one. - def bin_search_down(low, high, &f) - return low if f.(low) - while low + 1 < high - mid = low + (high - low) / 2 - if f.(mid) - high = mid - else - low = mid - end - end - high - end - end - - class DirectoryDb - def initialize(dir) - @dir = dir - Dir.mkdir(@dir) - rescue SystemCallError => e - raise unless e.errno == Errno::EEXIST::Errno - end - - def [](key) - f = file(key) - return nil unless File.exist?(f) - - File.read(f) - end - - def []=(key, value) - File.write(file(key), value) - end - - private - - def file(key) - File.join(@dir, Digest::SHA1.hexdigest(key)[0...10]) - end - end - - class Error< StandardError; end - - # Attempted to make choices on a test case that has been completed. - class Frozen < Error; end - - # Raised when a test should stop executing early. - class StopTest < Error; end - - # Raised when a test has no valid examples. - class Unsatisfiable < Error; end - - class Status < Struct.new(:value) - # Test case didn't have enough data to complete - OVERRUN = self.new(0) - - # Test case contained something that prevented completion - INVALID = self.new(1) - - # Test case completed just fine but was boring - VALID = self.new(2) - - # Test case completed and was interesting - INTERESTING = self.new(3) - - include Comparable - - def <=>(other) - value <=> other.value - end - end end end end diff --git a/lib/minitest/thesis/directory_db.rb b/lib/minitest/thesis/directory_db.rb new file mode 100644 index 0000000..bde6fa5 --- /dev/null +++ b/lib/minitest/thesis/directory_db.rb @@ -0,0 +1,27 @@ +module Minitest::Thesis + class DirectoryDb + def initialize(dir) + @dir = dir + Dir.mkdir(@dir) + rescue SystemCallError => e + raise unless e.errno == Errno::EEXIST::Errno + end + + def [](key) + f = file(key) + return nil unless File.exist?(f) + + File.read(f) + end + + def []=(key, value) + File.write(file(key), value) + end + + private + + def file(key) + File.join(@dir, Digest::SHA1.hexdigest(key)[0...10]) + end + end +end diff --git a/lib/minitest/thesis/error.rb b/lib/minitest/thesis/error.rb new file mode 100644 index 0000000..f51ac76 --- /dev/null +++ b/lib/minitest/thesis/error.rb @@ -0,0 +1,12 @@ +module Minitest::Thesis + Error = Class.new(StandardError) + + # Attempted to make choices on a test case that has been completed. + Frozen = Class.new(Error) + + # Raised when a test should stop executing early. + StopTest = Class.new(Error) + + # Raised when a test has no valid examples. + Unsatisfiable = Class.new(Error) +end diff --git a/lib/minitest/thesis/possibility.rb b/lib/minitest/thesis/possibility.rb new file mode 100644 index 0000000..8f24e81 --- /dev/null +++ b/lib/minitest/thesis/possibility.rb @@ -0,0 +1,39 @@ +module Minitest::Thesis + # Represents some range of values that might be used in a test, that can be + # requested from a `TestCase`. Pass one of these to TestCase.any to get a + # concrete value. + class Possibility + attr_reader :produce, :name + + def initialize(name = "TODO", &produce) + @name = name + @produce = produce + end + + def inspect = name + def to_s = name + + # "Returns a `Possibility` where values come from applying `f` to some possible value for `self`." + def map(&f) + self.class.new("#{name}.map(TODO)") {|tc| f.call(tc.any(self)) } + end + + # Returns a `Possibility` where values come from applying `f` (which + # should return a new `Possibility` to some possible value for `self` + # then returning a possible value from that. + def bind(&f) + self.class.new("#{name}.bind(TODO)") {|tc| tc.any(f.(tc.any(self))) } + end + + # Returns a `Possibility` whose values are any possible value of `self` + # for which `f` returns True. + def satisfying(&f) + self.class.new("#{name}.select(TODO)") {|test_case| + 3.times.first { + candidate = test_case.any(self) + candidate if f.(candidate) + } || test_case.reject + } + end + end +end diff --git a/lib/minitest/thesis/status.rb b/lib/minitest/thesis/status.rb new file mode 100644 index 0000000..ca8572b --- /dev/null +++ b/lib/minitest/thesis/status.rb @@ -0,0 +1,21 @@ +module Minitest::Thesis + class Status < Struct.new(:value) + # Test case didn't have enough data to complete + OVERRUN = self.new(0) + + # Test case contained something that prevented completion + INVALID = self.new(1) + + # Test case completed just fine but was boring + VALID = self.new(2) + + # Test case completed and was interesting + INTERESTING = self.new(3) + + include Comparable + + def <=>(other) + value <=> other.value + end + end +end diff --git a/lib/minitest/thesis/test_case.rb b/lib/minitest/thesis/test_case.rb new file mode 100644 index 0000000..ebd8ad9 --- /dev/null +++ b/lib/minitest/thesis/test_case.rb @@ -0,0 +1,121 @@ +require_relative "error" +require_relative "status" + +module Minitest::Thesis + # Represents a single generated test case, which consists of an underlying + # set of choices that produce possibilities. + class TestCase + + # Returns a test case that makes this series of choices. + def self.for_choices(choices, print_results: false) + self.new(prefix: choices, random: nil, max_size: choices.length, print_results:) + end + + attr_accessor :status + attr_reader :choices, :targeting_score + + def initialize(prefix:, random:, max_size: Float::INFINITY, print_results: false) + @prefix, @random, @max_size, @print_results = prefix, random, max_size, print_results + @choices = [] + @status = nil + @depth = 0 + @targeting_score = nil + end + + # Returns a number in the range [0, n] + def choice(n) + result = make_choice(n) { @random.rand(n) } + + puts "choice(#{n}): #{result}" if should_print? + + result + end + + # Return True with probability `p`. + def weighted(p) + result = if p <= 0 then forced_choice(0) + elsif p >= 1 then forced_choice(1) + else make_choice(1) { (@random.rand <= p) ? 1 : 0 } + end + + puts "weighted(#{p}): #{result}" if should_print? + + result + end + + # Inserts a fake choice into the choice sequence, as if some call to + # choice() had returned `n`. You almost never need this, but sometimes it + # can be a useful hint to the shrinker. + def forced_choice(n) + raise RangeError.new("Invalid choice #{n}") if n.bit_length > 64 || n.negative? + raise Frozen unless @status.nil? + + mark_status(Status::OVERRUN) if @choices.length >= @max_size + + choices << n + n + end + + # Mark this test case as invalid. + def reject = mark_status(Status::INVALID) + + # If this precondition is not met, abort the test and mark this test case as invalid. + def assume(precondition) + return if precondition + reject + end + + # Set a score to maximize. Multiple calls to this function will override previous ones. + # + # The name and idea come from Löscher, Andreas, and Konstantinos Sagonas. + # "Targeted property-based testing." ISSTA. 2017, but the implementation + # is based on that found in Hypothesis, which is not that similar to + # anything described in the paper. + def target(score) = @targeting_score = score + + # Return a possible value from `possibility`. + def any(possibility) + begin + @depth += 1 + result = possibility.produce.(self) + ensure + @depth -= 1 + end + + puts "any(#{possibility}): #{result}" if should_print? + + result + end + + # Set the status and raise StopTest. + def mark_status(status) + raise Frozen unless self.status.nil? + + @status = status + raise StopTest + end + + private + + def should_print? = @print_results && @depth.zero? + + # Make a choice in [0, n], by calling rnd_method if randomness is needed. + def make_choice(n, &rnd_method) + raise RangeError.new("Invalid choice #{n}") if n.bit_length > 64 || n.negative? + raise Frozen unless @status.nil? + + mark_status(Status::OVERRUN) if @choices.length >= @max_size + + result = if @choices.length < @prefix.length + @prefix[@choices.length] + else + rnd_method.() + end + @choices << result + + mark_status(Status::INVALID) if result > n + + result + end + end +end diff --git a/lib/minitest/thesis/testing_state.rb b/lib/minitest/thesis/testing_state.rb new file mode 100644 index 0000000..d36e538 --- /dev/null +++ b/lib/minitest/thesis/testing_state.rb @@ -0,0 +1,418 @@ +module Minitest::Thesis + # We cap the maximum amount of entropy a test case can use. + # This prevents cases where the generated test case size explodes + # by effectively rejection + BUFFER_SIZE = 8 * 1024 + + class TestingState + attr_reader :result, :valid_test_cases, :calls + + def initialize(random:, test_function:, max_examples:) + @random, @_test_function, @max_examples = random, test_function, max_examples + @valid_test_cases = 0 + @calls = 0 + @test_is_trivial = false + end + + def test_function(test_case) + begin + @_test_function.(test_case) + rescue StopTest + end + + if test_case.status.nil? + test_case.status = Status::VALID + end + + @calls += 1 + + if test_case.status >= Status::INVALID && test_case.choices.length.zero? + @test_is_trivial = true + end + + if test_case.status >= Status::VALID + @valid_test_cases += 1 + + unless test_case.targeting_score.nil? + relevant_info = [test_case.targeting_score, test_case.choices] + if @best_scoring.nil? + @best_scoring = relevant_info + else + best, _ = @best_scoring + if test_case.targeting_score > best + @best_scoring = relevant_info + end + end + end + end + + if test_case.status == Status::INTERESTING && ( + @result.nil? || ((sort_key(test_case.choices) <=> sort_key(@result)) == -1) + ) + @result = test_case.choices + end + end + + # If any test cases have had `target()` called on them, do a simple + # hill climbing algorithm to attempt to optimise that target score. + def target + return if !@result.nil? || @best_scoring.nil? + + # Can we improve the score by changing choices[i] by `step`? + adjust = ->(i, step) do + fail if @best_scoring.nil? + + score, choices = @best_scoring + return false if choices[i] + step < 0 || choices[i].bit_length >= 64 + + attempt = choices.dup + attempt[i] += step + test_case = TestCase.new( + prefix: attempt, random: @random, max_size: BUFFER_SIZE + ) + test_function(test_case) + + fail if test_case.status.nil? + + test_case.status >= Status::VALID && + !test_case.targeting_score.nil? && + test_case.targeting_score > score + end + + while keep_generating? + i = @random.rand(@best_scoring[1].length) + sign = 0 + [1, -1].each do |k| + return unless keep_generating? + + if adjust.(i, k) + sign = k + break + end + end + + next if sign.zero? + + k = 1 + k *= 2 while keep_generating? && adjust.(i, sign * k) + + while k.positive? + while keep_generating? && adjust.(i, sign * k) + end + k /= 2 + end + end + end + + def run + generate + target + shrink + end + + def keep_generating? + !@test_is_trivial && + result.nil? && + @valid_test_cases < @max_examples && + # We impose a limit on the maximum number of calls as + # well as the maximum number of valid examples. This is + # to avoid taking a prohibitively long time on tests which + # have hard or impossible to satisfy preconditions. + @calls < @max_examples * 10 + end + + # Run random generation until either we have found an interesting test + # case or hit the limit of how many test cases we should evaluate. + def generate + while keep_generating? && (@best_scoring.nil? || @valid_test_cases < @max_examples / 2) + test_function(TestCase.new(prefix: [], random: @random, max_size: BUFFER_SIZE)) + end + end + + # If we have found an interesting example, try shrinking it so that the + # choice sequence leading to our best example is shortlex smaller than + # the one we originally found. This improves the quality of the generated + # test case, as per our paper. + # + # https://drmaciver.github.io/papers/reduction-via-generation-preview.pdf + def shrink + # if not self.result: + # return + return if @result.nil? || @result.empty? + + # Shrinking will typically try the same choice sequences over and over + # again, so we cache the test function in order to not end up + # reevaluating it in those cases. This also allows us to catch cases + # where we try something that is e.g. a prefix of something we've + # previously tried, which is guaranteed not to work. + cached = CachedTestFunction.new {|tc| test_function(tc) } + + consider = ->(choices) do + return true if choices == @result + + cached.(choices) == Status::INTERESTING + end + + fail unless consider.(@result) + + # We are going to perform a number of transformations to the current + # result, iterating until none of them make any progress - i.e. until + # we make it through an entire iteration of the loop without changing + # the result. + prev = nil + while prev != @result + prev = @result + + # A note on weird loop order: We iterate backwards through the choice + # sequence rather than forwards, because later bits tend to depend on + # earlier bits so it's easier to make changes near the end and + # deleting bits at the end may allow us to make changes earlier on + # that we we'd have missed. + # + # Note that we do not restart the loop at the end when we find a + # successful shrink. This is because things we've already tried are + # less likely to work. + # + # If this guess is wrong, that's OK, this isn't a correctness + # problem, because if we made a successful reduction then we are not + # at a fixed point and will restart the loop at the end the next time + # round. In some cases this can result in performance issues, but the + # end result should still be fine. + + # First try deleting each choice we made in chunks. We try longer + # chunks because this allows us to delete whole composite elements: + # e.g. deleting an element from a generated list requires us to + # delete both the choice of whether to include it and also the + # element itself, which may involve more than one choice. Some things + # will take more than 8 choices in the sequence. That's too bad, we + # may not be able to delete those. In Hypothesis proper we record the + # boundaries corresponding to `any` calls so that we can try deleting + # those, but that's pretty high overhead and also a bunch of slightly + # annoying code that it's not worth porting. + # + # We could instead do a quadratic amount of work to try all + # boundaries, but in general we don't want to do that because even a + # shrunk test case can involve a relatively large number of choices. + k = 8 + while k.positive? + i = @result.length - k - 1 + until i.negative? + if i >= @result.length + # Can happen if we successfully lowered the value at i - 1 + i -= 1 + next + end + attempt = @result[0...i] + (@result[i + k..] || []) + + fail unless attempt.length < @result.length + + unless consider.(attempt) + # This fixes a common problem that occurs + # when you have dependencies on some + # length parameter. e.g. draw a number + # between 0 and 10 and then draw that + # many elements. This can't delete + # everything that occurs that way, but + # it can delete some things and often + # will get us unstuck when nothing else + # does. + if i.positive? && attempt[i - 1].positive? + attempt[i - 1] -= 1 + i += 1 if consider.(attempt) + end + + i -= 1 + end + end + + k /= 2 + end + + # Attempts to replace some indices in the current result with new + # values. Useful for some purely lexicographic reductions that we are + # about to perform. + replace = ->(values) do + fail if @result.nil? + attempt = @result.dup + values.each do |i, v| + # The size of self.result can change during shrinking. If that + # happens, stop attempting to make use of these replacements + # because some other shrink pass is better to run now. + return false if i >= attempt.length + attempt[i] = v + end + consider.(attempt) + end + + # Now we try replacing blocks of choices with zeroes. Note that + # unlike the above we skip k = 1 because we handle that in the next + # step. Often (but not always) a block of all zeroes is the shortlex + # smallest value that a region can be. + k = 8 + + while k > 1 + i = @result.length - k + until i.negative? + if replace.((i...i+k).to_h {|i| [i, 0]}) + # If we've succeeded then all of [i, i + k] is zero so we + # adjust i so that the next region does not overlap with this + # at all. + i -= k + else + # Otherwise we might still be able to zero some of these values + # but not the last one, so we just go back one. + i -= 1 + end + end + k /= 2 + end + + # Now try replacing each choice with a smaller value by doing a + # binary search. This will replace n with 0 or n - 1 if possible, but + # will also more efficiently replace it with a smaller number than + # doing multiple subtractions would. + i = @result.length - 1 + until i.negative? + # Attempt to replace + bin_search_down(0, @result[i]) {|v| replace.({i => v}) } + i -= 1 + end + + # NB from here on this is just showing off cool shrinker tricks and + # you probably don't need to worry about it and can skip these bits + # unless they're easy and you want bragging rights for how much + # better you are at shrinking than the local QuickCheck equivalent. + + # Try sorting out of order ranges of choices, as `sort(x) <= x`, so + # this is always a lexicographic reduction. + k = 8 + # while k > 1: + while k > 1 + (@result.length - k - 1).downto(0).each do |i| + consider.(@result[0...i] + @result[i...i+k].sort + @result[i+k..]) + end + k /= 2 + end + + # Try adjusting nearby pairs of integers by redistributing value + # between them. This is useful for tests that depend on the sum of + # some generated values. + [2, 1].each do |k| + (@result.length - k - 1).downto(0).each do |i| + j = i + k + # This check is necessary because the previous changes might have + # shrunk the size of result, but also it's tedious to write tests + # for this so I didn't. + if j < @result.length + # Try swapping out of order pairs + if @result[i] > @result[j] + replace.({j => @result[i], i => @result[j]}) + end + # j could be out of range if the previous swap succeeded. + if j < @result.length && @result[i].positive? + prev_i = @result[i] + prev_j = @result[j] + bin_search_down(0, prev_i) {|v| + replace.({i => v, j => prev_j + (prev_i - v)}) + } + end + end + end + end + end + end + + private + + # Returns a key that can be used for the shrinking order of test cases. + def sort_key(choices) = [choices.length, choices] + + # Returns n in [lo, hi] such that f(n) is True, where it is assumed and + # will not be checked that f(hi) is True. + # + # Will return `lo` if `f(lo)` is True, otherwise the only guarantee that is + # made is that `f(n - 1)` is False and `f(n)` is True. In particular this + # does *not* guarantee to find the smallest value, only a locally minimal + # one. + def bin_search_down(low, high, &f) + return low if f.(low) + while low + 1 < high + mid = low + (high - low) / 2 + if f.(mid) + high = mid + else + low = mid + end + end + high + end + end + + # Returns a cached version of a function that maps a choice sequence to the + # status of calling a test function on a test case populated with it. Is + # able to take advantage of the structure of the test function to predict + # the result even if exact sequence of choices has not been seen + # previously. + # + # You can safely omit implementing this at the cost of somewhat increased + # shrinking time. + class CachedTestFunction + def initialize(&test_function) + @test_function = test_function + + # Tree nodes are either a point at which a choice occurs + # in which case they map the result of the choice to the + # tree node we are in after, or a Status object indicating + # mark_status was called at this point and all future + # choices are irrelevant. + # + # Note that a better implementation of this would use + # a Patricia trie, which implements long non-branching + # paths as an array inline. For simplicity we don't + # do that here. + @tree = {} + end + + def call(choices) + node = @tree + begin + choices.each do |c| + node = node.fetch(c) + # mark_status was called, thus future choices + # will be ignored. + if node.is_a?(Status) + fail if node == Status::OVERRUN + return node + end + end + # If we never entered an unknown region of the tree + # or hit a Status value, then we know that another + # choice will be made next and the result will overrun. + return Status::OVERRUN + rescue KeyError + end + + # We now have to actually call the test function to find out what + # happens. + test_case = TestCase.for_choices(choices) + @test_function.(test_case) + fail if test_case.status.nil? + + # We enter the choices made in a tree. + node = @tree + *rest, last = test_case.choices + rest.each do |c| + node = if node.has_key?(c) + node[c] + else + node[c] = {} + end + end + unless last.nil? + node[last] = test_case.status == Status::OVERRUN ? {} : test_case.status + end + + test_case.status + end + end +end diff --git a/lib/minitest/thesis/version.rb b/lib/minitest/thesis/version.rb new file mode 100644 index 0000000..4ac2525 --- /dev/null +++ b/lib/minitest/thesis/version.rb @@ -0,0 +1,3 @@ +module Minitest::Thesis + VERSION = "0.1.0" +end diff --git a/test/minitest/thesis_test.rb b/test/minitest/thesis_test.rb index 9e2e36f..e0c1ade 100644 --- a/test/minitest/thesis_test.rb +++ b/test/minitest/thesis_test.rb @@ -1,442 +1,469 @@ require "test_helper" -class Minitest::ThesisTest < Minitest::Thesis::Test - class Failure < StandardError; end - - def test_finds_small_list - (0...10).each do |seed| - out, _ = capture_io do - assert_raises(Minitest::Assertion) do - run_test("finds_small_list", database: {}, random: Random.new(seed)) do |test_case| - ls = test_case.any(lists(integers(0, 10_000))) - assert ls.sum <= 1_000 +module Minitest::Thesis + class ThesisTest < Minitest::Thesis::Test + class Failure < StandardError; end + + def test_finds_small_list + (0...10).each do |seed| + out, _ = capture_io do + assert_raises(Minitest::Assertion) do + run_test("finds_small_list", database: {}, random: Random.new(seed)) do |test_case| + ls = test_case.any(lists(integers(0, 10_000))) + assert ls.sum <= 1_000 + end end end + + assert_equal <<~OUT, out + any(lists(integers(0, 10000))): [1001] + OUT end + end - assert_equal <<~OUT, out - any(lists(integers(0, 10000))): [1001] - OUT + # Minithesis can't really handle shrinking arbitrary monadic bind, but length + # parameters are a common case of monadic bind that it has a little bit of + # special casing for. This test ensures that that special casing works. + # + # The problem is that if you generate a list by drawing a length and then + # drawing that many elements, you can end up with something like ``[1001, 0, + # 0]`` then deleting those zeroes in the middle is a pain. minithesis will + # solve this by first sorting those elements, so that we have ``[0, 0, + # 1001]``, and then lowering the length by two, turning it into ``[1001]`` as + # desired. + def test_finds_small_list_even_with_bad_lists + bad_list = Possibility.new("bad_list") {|tc| + n = tc.choice(10) + Array.new(n) { tc.choice(10_000) } + } + + (0...10).each do |seed| + out, _ = capture_io do + assert_raises(Minitest::Assertion) do + run_test("finds_small_list_even_with_bad_lists", database: {}, random: Random.new(seed)) do |test_case| + ls = test_case.any(bad_list) + assert ls.sum <= 1_000 + end + end + end + + assert_equal <<~OUT, out + any(bad_list): [1001] + OUT + end end - end - # Minithesis can't really handle shrinking arbitrary monadic bind, but length - # parameters are a common case of monadic bind that it has a little bit of - # special casing for. This test ensures that that special casing works. - # - # The problem is that if you generate a list by drawing a length and then - # drawing that many elements, you can end up with something like ``[1001, 0, - # 0]`` then deleting those zeroes in the middle is a pain. minithesis will - # solve this by first sorting those elements, so that we have ``[0, 0, - # 1001]``, and then lowering the length by two, turning it into ``[1001]`` as - # desired. - def test_finds_small_list_even_with_bad_lists - bad_list = Possibility.new("bad_list") {|tc| - n = tc.choice(10) - Array.new(n) { tc.choice(10_000) } - } - - (0...10).each do |seed| + def test_reduces_additive_pairs out, _ = capture_io do assert_raises(Minitest::Assertion) do - run_test("finds_small_list_even_with_bad_lists", database: {}, random: Random.new(seed)) do |test_case| - ls = test_case.any(bad_list) - assert ls.sum <= 1_000 + run_test("reduces_additive_pairs", database: {}, max_examples: 10_000) do |test_case| + m = test_case.choice(1000) + n = test_case.choice(1000) + assert m + n <= 1000 end end end assert_equal <<~OUT, out - any(bad_list): [1001] + choice(1000): 1 + choice(1000): 1000 OUT end - end - - def test_reduces_additive_pairs - out, _ = capture_io do - assert_raises(Minitest::Assertion) do - run_test("reduces_additive_pairs", database: {}, max_examples: 10_000) do |test_case| - m = test_case.choice(1000) - n = test_case.choice(1000) - assert m + n <= 1000 - end - end - end - - assert_equal <<~OUT, out - choice(1000): 1 - choice(1000): 1000 - OUT - end - def test_reuses_results_from_the_database - Dir.mktmpdir do |tmpdir| - db = DirectoryDb.new(tmpdir) - count = 0 + def test_reuses_results_from_the_database + Dir.mktmpdir do |tmpdir| + db = DirectoryDb.new(tmpdir) + count = 0 - run = -> { - assert_raises(Minitest::Assertion) do - run_test("reuses_results_from_the_database", database: db, quiet: true) do |test_case| - count += 1 - assert test_case.choice(10_000) < 10 + run = -> { + assert_raises(Minitest::Assertion) do + run_test("reuses_results_from_the_database", database: db, quiet: true) do |test_case| + count += 1 + assert test_case.choice(10_000) < 10 + end end - end - } - - run.() + } - assert_equal 1, Dir.children(tmpdir).length - prev_count = count + run.() - run.() + assert_equal 1, Dir.children(tmpdir).length + prev_count = count - assert_equal 1, Dir.children(tmpdir).length - assert_equal prev_count + 2, count - end - end + run.() - def test_test_cases_satisfy_preconditions - run_test("test_cases_satisfy_preconditions", database: {}) do |test_case| - n = test_case.choice(10) - test_case.assume(n != 0) - refute_equal 0, n + assert_equal 1, Dir.children(tmpdir).length + assert_equal prev_count + 2, count + end end - end - def test_error_on_too_strict_precondition - assert_raises(Unsatisfiable) do - run_test("error_on_too_strict_precondition", database: {}) do |test_case| + def test_test_cases_satisfy_preconditions + run_test("test_cases_satisfy_preconditions", database: {}) do |test_case| n = test_case.choice(10) - test_case.reject + test_case.assume(n != 0) + refute_equal 0, n end end - end - - def test_error_on_unbounded_test_function - orig_buffer_size = BUFFER_SIZE - suppress_warnings do - Minitest::Thesis.const_set(:BUFFER_SIZE, 10) - end - assert_raises(Unsatisfiable) do - run_test("error_on_unbounded_test_function", database: {}, max_examples: 5) do |test_case| - loop do + def test_error_on_too_strict_precondition + assert_raises(Unsatisfiable) do + run_test("error_on_too_strict_precondition", database: {}) do |test_case| test_case.choice(10) + test_case.reject end end end - ensure - suppress_warnings do - Minitest::Thesis.const_set(:BUFFER_SIZE, orig_buffer_size) - end - end - def test_function_cache - tf = ->(tc) do - tc.mark_status(Status::INTERESTING) if tc.choice(1_000) >= 200 - tc.reject if tc.choice(1).zero? - end - - state = TestingState.new(random: Random.new(0), test_function: tf, max_examples: 100) - cache = CachedTestFunction.new {|tc| state.test_function(tc) } + def test_error_on_unbounded_test_function + orig_buffer_size = BUFFER_SIZE + suppress_warnings do + Minitest::Thesis.const_set(:BUFFER_SIZE, 10) + end - assert_equal Status::VALID, cache.([1, 1]) - assert_equal Status::OVERRUN, cache.([1]) - assert_equal Status::INTERESTING, cache.([1_000]) - assert_equal Status::INTERESTING, cache.([1_000]) - assert_equal Status::INTERESTING, cache.([1_000, 1]) + assert_raises(Unsatisfiable) do + run_test("error_on_unbounded_test_function", database: {}, max_examples: 5) do |test_case| + loop do + test_case.choice(10) + end + end + end + ensure + suppress_warnings do + Minitest::Thesis.const_set(:BUFFER_SIZE, orig_buffer_size) + end + end - assert_equal 2, state.calls - end + def test_function_cache + tf = ->(tc) do + tc.mark_status(Status::INTERESTING) if tc.choice(1_000) >= 200 + tc.reject if tc.choice(1).zero? + end - # Targeting has a number of places it checks for whether we've exceeded the - # generation limits. This makes sure we've checked them all. - def test_max_examples_is_not_exceeded - (1...100).each do |max_examples| - calls = 0 + state = TestingState.new(random: Random.new(0), test_function: tf, max_examples: 100) + cache = CachedTestFunction.new {|tc| state.test_function(tc) } - run_test( - "max_examples_is_not_exceeded", - database: {}, - random: Random.new(0), - max_examples:, - ) do |tc| - m = 10000 - n = tc.choice(m) - calls += 1 - tc.target(n * (m - n)) - end + assert_equal Status::VALID, cache.([1, 1]) + assert_equal Status::OVERRUN, cache.([1]) + assert_equal Status::INTERESTING, cache.([1_000]) + assert_equal Status::INTERESTING, cache.([1_000]) + assert_equal Status::INTERESTING, cache.([1_000, 1]) - assert_equal max_examples, calls + assert_equal 2, state.calls end - end + # Targeting has a number of places it checks for whether we've exceeded the + # generation limits. This makes sure we've checked them all. + def test_max_examples_is_not_exceeded + (1...100).each do |max_examples| + calls = 0 - # Targeting has a number of places it checks for whether we've exceeded the - # generation limits. This makes sure we've checked them all. - def test_finds_a_local_maximum - (0...100).each do |seed| - assert_raises(Minitest::Assertion) do run_test( - "finds_a_local_maximum", + "max_examples_is_not_exceeded", database: {}, - random: Random.new(seed), - max_examples: 200, - quiet: true + random: Random.new(0), + max_examples:, ) do |tc| - m = tc.choice(1000) - n = tc.choice(1000) - score = -((m - 500) ** 2 + (n - 500) ** 2) - tc.target(score) - assert m != 500 || n != 500 + m = 10000 + n = tc.choice(m) + calls += 1 + tc.target(n * (m - n)) end + + assert_equal max_examples, calls end end - end - def test_can_target_a_score_upwards_to_interesting - out, _ = capture_io do - assert_raises(Minitest::Assertion) do - run_test("can_target_a_score_upwards_to_interesting", database: {}, max_examples: 1000) do |test_case| - n = test_case.choice(1000) - m = test_case.choice(1000) - score = n + m - test_case.target(score) - assert score < 2000 + + # Targeting has a number of places it checks for whether we've exceeded the + # generation limits. This makes sure we've checked them all. + def test_finds_a_local_maximum + (0...100).each do |seed| + assert_raises(Minitest::Assertion) do + run_test( + "finds_a_local_maximum", + database: {}, + random: Random.new(seed), + max_examples: 200, + quiet: true + ) do |tc| + m = tc.choice(1000) + n = tc.choice(1000) + score = -((m - 500) ** 2 + (n - 500) ** 2) + tc.target(score) + assert m != 500 || n != 500 + end end end end - assert_equal <<~OUT, out - choice(1000): 1000 - choice(1000): 1000 - OUT - end - - def test_can_target_a_score_upwards_without_failing - max_score = 0 + def test_can_target_a_score_upwards_to_interesting + out, _ = capture_io do + assert_raises(Minitest::Assertion) do + run_test("can_target_a_score_upwards_to_interesting", database: {}, max_examples: 1000) do |test_case| + n = test_case.choice(1000) + m = test_case.choice(1000) + score = n + m + test_case.target(score) + assert score < 2000 + end + end + end - run_test("can_target_a_score_upwards_without_failing", database: {}, max_examples: 1000) do |test_case| - n = test_case.choice(1000) - m = test_case.choice(1000) - score = n + m - test_case.target(score) - max_score = [score, max_score].max + assert_equal <<~OUT, out + choice(1000): 1000 + choice(1000): 1000 + OUT end - assert_equal 2000, max_score - end + def test_can_target_a_score_upwards_without_failing + max_score = 0 - def test_targeting_when_most_do_not_benefit - big = 10_000 - - out, _ = capture_io do - assert_raises(Minitest::Assertion) do - run_test("targeting_when_most_do_not_benefit", database: {}, max_examples: 1000) do |test_case| - test_case.choice(1000) - test_case.choice(1000) - score = test_case.choice(big) - test_case.target(score) - assert score < big - end + run_test("can_target_a_score_upwards_without_failing", database: {}, max_examples: 1000) do |test_case| + n = test_case.choice(1000) + m = test_case.choice(1000) + score = n + m + test_case.target(score) + max_score = [score, max_score].max end + + assert_equal 2000, max_score end - assert_equal <<~OUT, out - choice(1000): 0 - choice(1000): 0 - choice(#{big}): #{big} - OUT - end + def test_targeting_when_most_do_not_benefit + big = 10_000 - def test_can_target_a_score_downwards - out, _ = capture_io do - assert_raises(Minitest::Assertion) do - run_test("can_target_a_score_downwards", database: {}, max_examples: 1000) do |test_case| - n = test_case.choice(1000) - m = test_case.choice(1000) - score = n + m - test_case.target(-score) - assert score.positive? + out, _ = capture_io do + assert_raises(Minitest::Assertion) do + run_test("targeting_when_most_do_not_benefit", database: {}, max_examples: 1000) do |test_case| + test_case.choice(1000) + test_case.choice(1000) + score = test_case.choice(big) + test_case.target(score) + assert score < big + end end end + + assert_equal <<~OUT, out + choice(1000): 0 + choice(1000): 0 + choice(#{big}): #{big} + OUT end - assert_equal <<~OUT, out - choice(1000): 0 - choice(1000): 0 - OUT - end + def test_can_target_a_score_downwards + out, _ = capture_io do + assert_raises(Minitest::Assertion) do + run_test("can_target_a_score_downwards", database: {}, max_examples: 1000) do |test_case| + n = test_case.choice(1000) + m = test_case.choice(1000) + score = n + m + test_case.target(-score) + assert score.positive? + end + end + end - def test_prints_a_top_level_weighted - out, _ = capture_io do - assert_raises(Minitest::Assertion) do - run_test("prints_a_top_level_weighted", database: {}, max_examples: 1000) do |test_case| - assert test_case.weighted(0.5).nonzero? + assert_equal <<~OUT, out + choice(1000): 0 + choice(1000): 0 + OUT + end + + def test_prints_a_top_level_weighted + out, _ = capture_io do + assert_raises(Minitest::Assertion) do + run_test("prints_a_top_level_weighted", database: {}, max_examples: 1000) do |test_case| + assert test_case.weighted(0.5).nonzero? + end end end + + assert_equal <<~OUT, out + weighted(0.5): 0 + OUT end - assert_equal <<~OUT, out - weighted(0.5): 0 - OUT - end + def test_errors_when_using_frozen + tc = TestCase.for_choices([0]) + tc.status = Status::VALID - def test_errors_when_using_frozen - tc = TestCase.for_choices([0]) - tc.status = Status::VALID + assert_raises(Frozen) do + tc.mark_status(Status::INTERESTING) + end - assert_raises(Frozen) do - tc.mark_status(Status::INTERESTING) - end + assert_raises(Frozen) do + tc.choice(10) + end - assert_raises(Frozen) do - tc.choice(10) + assert_raises(Frozen) do + tc.forced_choice(10) + end end - assert_raises(Frozen) do - tc.forced_choice(10) + def test_errors_on_too_large_choice + tc = TestCase.for_choices([0]) + assert_raises(RangeError) do + tc.choice(2 ** 64) + end end - end - def test_errors_on_too_large_choice - tc = TestCase.for_choices([0]) - assert_raises(RangeError) do - tc.choice(2 ** 64) + def test_can_choose_full_64_bits + run_test("can_choose_full_64_bits", database: {}) do |tc| + tc.choice(2 ** 64 - 1) + end end - end - def test_can_choose_full_64_bits - run_test("can_choose_full_64_bits", database: {}) do |tc| - tc.choice(2 ** 64 - 1) + def test_mapped_possibility + run_test("mapped_possibility", database: {}) do |tc| + n = tc.any(integers(0, 5).map {|n| n * 2 }) + assert n.even? + end end - end - def test_mapped_possibility - run_test("mapped_possibility", database: {}) do |tc| - n = tc.any(integers(0, 5).map {|n| n * 2 }) - assert n.even? + def test_selected_possibility + run_test("selected_possibility", database: {}) do |tc| + n = tc.any( + integers(0, 5) + .satisfying(&:even?) + ) + assert n.even? + end end - end - def test_selected_possibility - run_test("selected_possibility", database: {}) do |tc| - n = tc.any( - integers(0, 5) - .satisfying(&:even?) - ) - assert n.even? + def test_bound_possibility + run_test("bound_possibility", database: {}) do |tc| + m, n = tc.any( + integers(0, 5).bind {|m| tuples(just(m), integers(m, m + 10)) } + ) + assert (m..m+10).cover?(n) + end end - end - def test_bound_possibility - run_test("bound_possibility", database: {}) do |tc| - m, n = tc.any( - integers(0, 5).bind {|m| tuples(just(m), integers(m, m + 10)) } - ) - assert (m..m+10).cover?(n) + def test_cannot_witness_nothing + assert_raises(Unsatisfiable) do + run_test("cannot_witness_nothing", database: {}) do |tc| + tc.any(nothing) + end + end end - end - def test_cannot_witness_nothing - assert_raises(Unsatisfiable) do - run_test("cannot_witness_nothing", database: {}) do |tc| - tc.any(nothing) + def test_cannot_witness_empty_mix_of + assert_raises(Unsatisfiable) do + run_test("cannot_witness_empty_mix_of", database: {}) do |tc| + tc.any(mix_of) + end end end - end - def test_cannot_witness_empty_mix_of - assert_raises(Unsatisfiable) do - run_test("cannot_witness_empty_mix_of", database: {}) do |tc| - tc.any(mix_of) + def test_can_draw_mixture + run_test("can_draw_mixture", database: {}) do |tc| + m = tc.any(mix_of(integers(-5, 0), integers(2, 5))) + assert (-5..5).cover?(m) + refute_equal 1, m end end - end - def test_can_draw_mixture - run_test("can_draw_mixture", database: {}) do |tc| - m = tc.any(mix_of(integers(-5, 0), integers(2, 5))) - assert (-5..5).cover?(m) - refute_equal 1, m + # This test is very hard to trigger without targeting, and targeting will + # tend to overshoot the score, so we will see multiple interesting test cases + # before shrinking. + def test_target_and_reduce + out, _ = capture_io do + assert_raises(Minitest::Assertion) do + run_test("target_and_reduce", database: {}) do |tc| + m = tc.choice(100_000) + tc.target(m) + assert m <= 99_900 + end + end + end + + assert_equal <<~OUT, out + choice(100000): 99901 + OUT end - end - # This test is very hard to trigger without targeting, and targeting will - # tend to overshoot the score, so we will see multiple interesting test cases - # before shrinking. - def test_target_and_reduce - out, _ = capture_io do - assert_raises(Minitest::Assertion) do - run_test("target_and_reduce", database: {}) do |tc| - m = tc.choice(100_000) - tc.target(m) - assert m <= 99_900 + def test_impossible_weighted + assert_raises(Failure) do + run_test("impossible_weighted", database: {}, quiet: true) do |tc| + tc.choice(1) + 10.times do + assert false unless tc.weighted(0.0).zero? + end + raise Failure if tc.choice(1).zero? end end end - assert_equal <<~OUT, out - choice(100000): 99901 - OUT - end - - def test_impossible_weighted - assert_raises(Failure) do - run_test("impossible_weighted", database: {}, quiet: true) do |tc| - tc.choice(1) - 10.times do - assert false unless tc.weighted(0.0).zero? + def test_guaranteed_weighted + assert_raises(Failure) do + run_test("guaranteed_weighted", database: {}, quiet: true) do |tc| + if tc.weighted(1.0).nonzero? + tc.choice(1) + raise Failure + else + assert false + end end - raise Failure if tc.choice(1).zero? end end - end - def test_guaranteed_weighted - assert_raises(Failure) do - run_test("guaranteed_weighted", database: {}, quiet: true) do |tc| - if tc.weighted(1.0).nonzero? - tc.choice(1) - raise Failure - else - assert false - end + def test_size_bounds_on_list + run_test("size_bounds_on_list", database: {}) do |tc| + ls = tc.any(lists(integers(0, 10), min_size: 1, max_size: 3)) + assert (1..3).cover?(ls.length) end end - end - def test_size_bounds_on_list - run_test("size_bounds_on_list", database: {}) do |tc| - ls = tc.any(lists(integers(0, 10), min_size: 1, max_size: 3)) - assert (1..3).cover?(ls.length) + def test_forced_choice_bounds + assert_raises(RangeError) do + run_test("forced_choice_bounds", database: {}) do |tc| + tc.forced_choice(2 ** 64) + end + end end - end - def test_forced_choice_bounds - assert_raises(RangeError) do - run_test("forced_choice_bounds", database: {}) do |tc| - tc.forced_choice(2 ** 64) + def test_failure_from_hypothesis_1 + assert_raises(Failure) do + run_test("failure_from_hypothesis_1", database: {}, random: Random.new(100), max_examples: 1000, quiet: true) do |tc| + n1 = tc.weighted(0.0) + if n1.zero? + n2 = tc.choice(511) + if n2 == 112 + n3 = tc.choice(511) + if n3 == 124 + raise Failure + elsif n3 == 93 + raise Failure + else + tc.mark_status(Status::INVALID) + end + elsif n2 == 93 + raise Failure + else + tc.mark_status(Status::INVALID) + end + end + end end end - end - def test_failure_from_hypothesis_1 - assert_raises(Failure) do - run_test("failure_from_hypothesis_1", database: {}, random: Random.new(100), max_examples: 1000, quiet: true) do |tc| - n1 = tc.weighted(0.0) - if n1.zero? - n2 = tc.choice(511) - if n2 == 112 - n3 = tc.choice(511) - if n3 == 124 + def test_failure_from_hypothesis_2 + assert_raises(Failure) do + run_test("failure_from_hypothesis_2", database: {}, random: Random.new(0), max_examples: 1000, quiet: true) do |tc| + n1 = tc.choice(6) + if n1 == 6 + n2 = tc.weighted(0.0) + if n2.zero? raise Failure - elsif n3 == 93 + end + elsif n1 == 4 + n3 = tc.choice(0) + if n3 == 0 raise Failure else tc.mark_status(Status::INVALID) end - elsif n2 == 93 + elsif n1 == 2 raise Failure else tc.mark_status(Status::INVALID) @@ -444,82 +471,57 @@ class Minitest::ThesisTest < Minitest::Thesis::Test end end end - end - def test_failure_from_hypothesis_2 - assert_raises(Failure) do - run_test("failure_from_hypothesis_2", database: {}, random: Random.new(0), max_examples: 1000, quiet: true) do |tc| - n1 = tc.choice(6) - if n1 == 6 - n2 = tc.weighted(0.0) - if n2.zero? - raise Failure - end - elsif n1 == 4 - n3 = tc.choice(0) - if n3 == 0 - raise Failure + def test_refactoring_cache + old = ->(node, choices, status) { + choices.each.with_index do |c, i| + if i + 1 < choices.length || status == Status::OVERRUN + node = if node.has_key?(c) + node[c] + else + node[c] = {} + end else - tc.mark_status(Status::INVALID) + node[c] = status end - elsif n1 == 2 - raise Failure - else - tc.mark_status(Status::INVALID) end - end - end - end + } - def test_refactoring_cache - old = ->(node, choices, status) { - choices.each.with_index do |c, i| - if i + 1 < choices.length || status == Status::OVERRUN + new = ->(node, choices, status) { + *rest, last = choices + rest.each do |c| node = if node.has_key?(c) node[c] else node[c] = {} end - else - node[c] = status end - end - } - - new = ->(node, choices, status) { - *rest, last = choices - rest.each do |c| - node = if node.has_key?(c) - node[c] - else - node[c] = {} - end - end - unless last.nil? - node[last] = status == Status::OVERRUN ? {} : status - end - } + unless last.nil? + node[last] = status == Status::OVERRUN ? {} : status + end + } - run_test("refactoring_cache", database: {}) do |tc| - old_tree = {} - new_tree = {} + run_test("refactoring_cache", database: {}) do |tc| + old_tree = {} + new_tree = {} - choices = tc.any(lists(integers(0, 10))) - status = Status.new(tc.choice(4)) + choices = tc.any(lists(integers(0, 10))) + status = Status.new(tc.choice(4)) - old.(old_tree, choices, status) - new.(new_tree, choices, status) + old.(old_tree, choices, status) + new.(new_tree, choices, status) - assert_equal old_tree, new_tree + assert_equal old_tree, new_tree + end end - end - private + private - def suppress_warnings - original_verbosity = $VERBOSE - $VERBOSE = nil - yield - $VERBOSE = original_verbosity + def suppress_warnings + original_verbosity = $VERBOSE + $VERBOSE = nil + yield + $VERBOSE = original_verbosity + end end end