require "digest"

require "minitest"

module Minitest
  module Thesis
    VERSION = "0.1.0"

    class Test < Minitest::Test

      # Runs a test. Usage is:
      #
      # run_test do |test_case|
      #   n = test_case.choice(1000)
      # end
      #
      # The block takes a `TestCase` argument, and should raise an exception to
      # indicate a test failure. It will either run silently or print drawn
      # values and then fail with an exception if minithesis finds some test case
      # that fails.
      #
      # Arguments:
      # * max_examples: the maximum number of valid test cases to run for.
      #   Note that under some circumstances the test may run fewer test
      #   cases than this.
      # * random: An instance of random.Random that will be used for all
      #   nondeterministic choices.
      # * database: A Hash-like object in which results will be cached and resumed
      #   from, ensuring that if a test is run twice it fails in the same way.
      # * quiet: Will not print anything on failure if True.
      def run_test(
        name,
        max_examples: 100,
        random: Random.new,
        database: DirectoryDb.new(".minitest-thesis-cache"),
        quiet: false,
        &test
      )
        mark_failures_interesting = ->(test_case) do
          test.(test_case)
        rescue Exception
          raise unless test_case.status.nil?

          test_case.mark_status(Status::INTERESTING)
        end

        state = TestingState.new(random:, test_function: mark_failures_interesting, max_examples:)

        prev_failure = database[name]

        if prev_failure
          choices = prev_failure.unpack("Q>*")
          state.test_function(TestCase.for_choices(choices))
        end

        if state.result.nil?
          state.run
        end

        if state.valid_test_cases.zero?
          raise Unsatisfiable
        end

        if state.result.nil?
          database.delete(name)
        else
          database[name] = state.result.pack("Q>*")
        end

        unless state.result.nil?
          test.(TestCase.for_choices(state.result, print_results: !quiet))
        end
      end

      # Represents a single generated test case, which consists of an underlying
      # set of choices that produce possibilities.
      class TestCase

        # Returns a test case that makes this series of choices.
        def self.for_choices(choices, print_results: false)
          self.new(prefix: choices, random: nil, max_size: choices.length, print_results:)
        end

        attr_accessor :status
        attr_reader :choices, :targeting_score

        def initialize(prefix:, random:, max_size: Float::INFINITY, print_results: false)
          @prefix, @random, @max_size, @print_results = prefix, random, max_size, print_results
          @choices = []
          @status = nil
          @depth = 0
          @targeting_score = nil
        end

        # Returns a number in the range [0, n]
        def choice(n)
          result = make_choice(n) { @random.rand(n) }

          puts "choice(#{n}): #{result}" if should_print?

          result
        end

        # Return True with probability `p`.
        def weighted(p)
          result = if    p <= 0 then forced_choice(0)
                   elsif p >= 1 then forced_choice(1)
                   else              make_choice(1) { (@random.rand <= p) ? 1 : 0 }
                   end

          puts "weighted(#{p}): #{result}" if should_print?

          result
        end

        # Inserts a fake choice into the choice sequence, as if some call to
        # choice() had returned `n`. You almost never need this, but sometimes it
        # can be a useful hint to the shrinker.
        def forced_choice(n)
          raise RangeError.new("Invalid choice #{n}") if n.bit_length > 64 || n.negative?
          raise Frozen unless @status.nil?

          mark_status(Status::OVERRUN) if @choices.length >= @max_size

          choices << n
          n
        end

        # Mark this test case as invalid.
        def reject = mark_status(Status::INVALID)

        # If this precondition is not met, abort the test and mark this test case as invalid.
        def assume(precondition)
          return if precondition
          reject
        end

        # Set a score to maximize. Multiple calls to this function will override previous ones.
        #
        # The name and idea come from Löscher, Andreas, and Konstantinos Sagonas.
        # "Targeted property-based testing." ISSTA. 2017, but the implementation
        # is based on that found in Hypothesis, which is not that similar to
        # anything described in the paper.
        def target(score) = @targeting_score = score

        # Return a possible value from `possibility`.
        def any(possibility)
          begin
            @depth += 1
            result = possibility.produce.(self)
          ensure
            @depth -= 1
          end

          puts "any(#{possibility}): #{result}" if should_print?

          result
        end

        # Set the status and raise StopTest.
        def mark_status(status)
          raise Frozen unless self.status.nil?

          @status = status
          raise StopTest
        end

        private

        def should_print? = @print_results && @depth.zero?

        # Make a choice in [0, n], by calling rnd_method if randomness is needed.
        def make_choice(n, &rnd_method)
          raise RangeError.new("Invalid choice #{n}") if n.bit_length > 64 || n.negative?
          raise Frozen unless @status.nil?

          mark_status(Status::OVERRUN) if @choices.length >= @max_size

          result = if @choices.length < @prefix.length
                     @prefix[@choices.length]
                   else
                     rnd_method.()
                   end
          @choices << result

          mark_status(Status::INVALID) if result > n

          result
        end
      end

      # Represents some range of values that might be used in a test, that can be
      # requested from a `TestCase`. Pass one of these to TestCase.any to get a
      # concrete value.
      class Possibility
        attr_reader :produce, :name

        def initialize(name = "TODO", &produce)
          @name = name
          @produce = produce
        end

        def inspect = name
        def to_s = name

        # "Returns a `Possibility` where values come from applying `f` to some possible value for `self`."
        def map(&f)
          self.class.new("#{name}.map(TODO)") {|tc| f.call(tc.any(self)) }
        end

        # Returns a `Possibility` where values come from applying `f` (which
        # should return a new `Possibility` to some possible value for `self`
        # then returning a possible value from that.
        def bind(&f)
          self.class.new("#{name}.bind(TODO)") {|tc| tc.any(f.(tc.any(self))) }
        end

        # Returns a `Possibility` whose values are any possible value of `self`
        # for which `f` returns True.
        def satisfying(&f)
          self.class.new("#{name}.select(TODO)") {|test_case|
            3.times.first {
              candidate = test_case.any(self)
              candidate if f.(candidate)
            } || test_case.reject
          }
        end
      end

      # Any integer in the range [m, n] is possible
      def integers(m, n) = Possibility.new("integers(#{m}, #{n})") {|tc| m + tc.choice(n - m) }

      # Any lists whose elements are possible values from `elements` are possible.
      def lists(elements, min_size: 0, max_size: Float::INFINITY)
        Possibility.new("lists(#{elements.name})") {|test_case|
          result = []
          loop do
            if result.length < min_size
              test_case.forced_choice(1)
            elsif result.length + 1 >= max_size
              test_case.forced_choice(0)
              break
            elsif test_case.weighted(0.9).zero?
              break
            end
            result << test_case.any(elements)
          end
          result
        }
      end

      # Only `value` is possible.
      def just(value) = Possibility.new("just(#{value})") { value }

      # No possible values. i.e. Any call to `any` will reject the test case.
      def nothing = Possibility.new {|tc| tc.reject }

      # Possible values can be any value possible for one of `possibilities`.
      def mix_of(*possibilities)
        return nothing if possibilities.empty?

        Possibility.new("mix_of(#{possibilities.map(&:name).join(", ")})") {|tc|
          tc.any(possibilities[tc.choice(possibilities.length - 1)])
        }
      end

      # Any tuple t of of length len(possibilities) such that t[i] is possible
      # for possibilities[i] is possible.
      def tuples(*possibilities)
        Possibility.new( "tuples(#{possibilities.map(&:name).join(", ")})") {|tc|
          possibilities.map {|p| tc.any(p) }
        }
      end

      # We cap the maximum amount of entropy a test case can use.
      # This prevents cases where the generated test case size explodes
      # by effectively rejection
      BUFFER_SIZE = 8 * 1024

      # Returns a cached version of a function that maps a choice sequence to the
      # status of calling a test function on a test case populated with it. Is
      # able to take advantage of the structure of the test function to predict
      # the result even if exact sequence of choices has not been seen
      # previously.
      #
      # You can safely omit implementing this at the cost of somewhat increased
      # shrinking time.
      class CachedTestFunction
        def initialize(&test_function)
          @test_function = test_function

          # Tree nodes are either a point at which a choice occurs
          # in which case they map the result of the choice to the
          # tree node we are in after, or a Status object indicating
          # mark_status was called at this point and all future
          # choices are irrelevant.
          #
          # Note that a better implementation of this would use
          # a Patricia trie, which implements long non-branching
          # paths as an array inline. For simplicity we don't
          # do that here.
          @tree = {}
        end

        def call(choices)
          node = @tree
          begin
            choices.each do |c|
              node = node.fetch(c)
              # mark_status was called, thus future choices
              # will be ignored.
              if node.is_a?(Status)
                fail if node == Status::OVERRUN
                return node
              end
            end
            # If we never entered an unknown region of the tree
            # or hit a Status value, then we know that another
            # choice will be made next and the result will overrun.
            return Status::OVERRUN
          rescue KeyError
          end

          # We now have to actually call the test function to find out what
          # happens.
          test_case = TestCase.for_choices(choices)
          @test_function.(test_case)
          fail if test_case.status.nil?

          # We enter the choices made in a tree.
          node = @tree
          *rest, last = test_case.choices
          rest.each do |c|
            node = if node.has_key?(c)
                     node[c]
                   else
                     node[c] = {}
                   end
          end
          unless last.nil?
            node[last] = test_case.status == Status::OVERRUN ? {} : test_case.status
          end

          test_case.status
        end
      end

      class TestingState
        attr_reader :result, :valid_test_cases, :calls

        def initialize(random:, test_function:, max_examples:)
          @random, @_test_function, @max_examples = random, test_function, max_examples
          @valid_test_cases = 0
          @calls = 0
          @test_is_trivial = false
        end

        def test_function(test_case)
          begin
            @_test_function.(test_case)
          rescue StopTest
          end

          if test_case.status.nil?
            test_case.status = Status::VALID
          end

          @calls += 1

          if test_case.status >= Status::INVALID && test_case.choices.length.zero?
            @test_is_trivial = true
          end

          if test_case.status >= Status::VALID
            @valid_test_cases += 1

            unless test_case.targeting_score.nil?
              relevant_info = [test_case.targeting_score, test_case.choices]
              if @best_scoring.nil?
                @best_scoring = relevant_info
              else
                best, _ = @best_scoring
                if test_case.targeting_score > best
                  @best_scoring = relevant_info
                end
              end
            end
          end

          if test_case.status == Status::INTERESTING && (
              @result.nil? || ((sort_key(test_case.choices) <=> sort_key(@result)) == -1)
          )
            @result = test_case.choices
          end
        end

        # If any test cases have had `target()` called on them, do a simple
        # hill climbing algorithm to attempt to optimise that target score.
        def target
          return if !@result.nil? || @best_scoring.nil?

          # Can we improve the score by changing choices[i] by `step`?
          adjust = ->(i, step) do
            fail if @best_scoring.nil?

            score, choices = @best_scoring
            return false if choices[i] + step < 0 || choices[i].bit_length >= 64

            attempt = choices.dup
            attempt[i] += step
            test_case = TestCase.new(
              prefix: attempt, random: @random, max_size: BUFFER_SIZE
            )
            test_function(test_case)

            fail if test_case.status.nil?

            test_case.status >= Status::VALID &&
              !test_case.targeting_score.nil? &&
              test_case.targeting_score > score
          end

          while keep_generating?
            i = @random.rand(@best_scoring[1].length)
            sign = 0
            [1, -1].each do |k|
              return unless keep_generating?

              if adjust.(i, k)
                sign = k
                break
              end
            end

            next if sign.zero?

            k = 1
            k *= 2 while keep_generating? && adjust.(i, sign * k)

            while k.positive?
              while keep_generating? && adjust.(i, sign * k)
              end
              k /= 2
            end
          end
        end

        def run
          generate
          target
          shrink
        end

        def keep_generating?
          !@test_is_trivial &&
            result.nil? &&
            @valid_test_cases < @max_examples &&
            # We impose a limit on the maximum number of calls as
            # well as the maximum number of valid examples. This is
            # to avoid taking a prohibitively long time on tests which
            # have hard or impossible to satisfy preconditions.
            @calls < @max_examples * 10
        end

        # Run random generation until either we have found an interesting test
        # case or hit the limit of how many test cases we should evaluate.
        def generate
          while keep_generating? && (@best_scoring.nil? || @valid_test_cases < @max_examples / 2)
            test_function(TestCase.new(prefix: [], random: @random, max_size: BUFFER_SIZE))
          end
        end

        # If we have found an interesting example, try shrinking it so that the
        # choice sequence leading to our best example is shortlex smaller than
        # the one we originally found. This improves the quality of the generated
        # test case, as per our paper.
        #
        # https://drmaciver.github.io/papers/reduction-via-generation-preview.pdf
        def shrink
          # if not self.result:
          #     return
          return if @result.nil? || @result.empty?

          # Shrinking will typically try the same choice sequences over and over
          # again, so we cache the test function in order to not end up
          # reevaluating it in those cases. This also allows us to catch cases
          # where we try something that is e.g. a prefix of something we've
          # previously tried, which is guaranteed not to work.
          cached = CachedTestFunction.new {|tc| test_function(tc) }

          consider = ->(choices) do
            return true if choices == @result

            cached.(choices) == Status::INTERESTING
          end

          fail unless consider.(@result)

          # We are going to perform a number of transformations to the current
          # result, iterating until none of them make any progress - i.e. until
          # we make it through an entire iteration of the loop without changing
          # the result.
          prev = nil
          while prev != @result
            prev = @result

            # A note on weird loop order: We iterate backwards through the choice
            # sequence rather than forwards, because later bits tend to depend on
            # earlier bits so it's easier to make changes near the end and
            # deleting bits at the end may allow us to make changes earlier on
            # that we we'd have missed.
            #
            # Note that we do not restart the loop at the end when we find a
            # successful shrink. This is because things we've already tried are
            # less likely to work.
            #
            # If this guess is wrong, that's OK, this isn't a correctness
            # problem, because if we made a successful reduction then we are not
            # at a fixed point and will restart the loop at the end the next time
            # round. In some cases this can result in performance issues, but the
            # end result should still be fine.

            # First try deleting each choice we made in chunks. We try longer
            # chunks because this allows us to delete whole composite elements:
            # e.g. deleting an element from a generated list requires us to
            # delete both the choice of whether to include it and also the
            # element itself, which may involve more than one choice. Some things
            # will take more than 8 choices in the sequence. That's too bad, we
            # may not be able to delete those. In Hypothesis proper we record the
            # boundaries corresponding to `any` calls so that we can try deleting
            # those, but that's pretty high overhead and also a bunch of slightly
            # annoying code that it's not worth porting.
            #
            # We could instead do a quadratic amount of work to try all
            # boundaries, but in general we don't want to do that because even a
            # shrunk test case can involve a relatively large number of choices.
            k = 8
            while k.positive?
              i = @result.length - k - 1
              until i.negative?
                if i >= @result.length
                  # Can happen if we successfully lowered the value at i - 1
                  i -= 1
                  next
                end
                attempt = @result[0...i] + (@result[i + k..] || [])

                fail unless attempt.length < @result.length

                unless consider.(attempt)
                  # This fixes a common problem that occurs
                  # when you have dependencies on some
                  # length parameter. e.g. draw a number
                  # between 0 and 10 and then draw that
                  # many elements. This can't delete
                  # everything that occurs that way, but
                  # it can delete some things and often
                  # will get us unstuck when nothing else
                  # does.
                  if i.positive? && attempt[i - 1].positive?
                    attempt[i - 1] -= 1
                    i += 1 if consider.(attempt)
                  end

                  i -= 1
                end
              end

              k /= 2
            end

            # Attempts to replace some indices in the current result with new
            # values. Useful for some purely lexicographic reductions that we are
            # about to perform.
            replace = ->(values) do
              fail if @result.nil?
              attempt = @result.dup
              values.each do |i, v|
                # The size of self.result can change during shrinking. If that
                # happens, stop attempting to make use of these replacements
                # because some other shrink pass is better to run now.
                return false if i >= attempt.length
                attempt[i] = v
              end
              consider.(attempt)
            end

            # Now we try replacing blocks of choices with zeroes. Note that
            # unlike the above we skip k = 1 because we handle that in the next
            # step. Often (but not always) a block of all zeroes is the shortlex
            # smallest value that a region can be.
            k = 8

            while k > 1
              i = @result.length - k
              until i.negative?
                if replace.((i...i+k).to_h {|i| [i, 0]})
                  # If we've succeeded then all of [i, i + k] is zero so we
                  # adjust i so that the next region does not overlap with this
                  # at all.
                  i -= k
                else
                  # Otherwise we might still be able to zero some of these values
                  # but not the last one, so we just go back one.
                  i -= 1
                end
              end
              k /= 2
            end

            # Now try replacing each choice with a smaller value by doing a
            # binary search. This will replace n with 0 or n - 1 if possible, but
            # will also more efficiently replace it with a smaller number than
            # doing multiple subtractions would.
            i = @result.length - 1
            until i.negative?
              # Attempt to replace
              bin_search_down(0, @result[i]) {|v| replace.({i => v}) }
              i -= 1
            end

            # NB from here on this is just showing off cool shrinker tricks and
            # you probably don't need to worry about it and can skip these bits
            # unless they're easy and you want bragging rights for how much
            # better you are at shrinking than the local QuickCheck equivalent.

            # Try sorting out of order ranges of choices, as `sort(x) <= x`, so
            # this is always a lexicographic reduction.
            k = 8
            # while k > 1:
            while k > 1
              (@result.length - k - 1).downto(0).each do |i|
                consider.(@result[0...i] + @result[i...i+k].sort + @result[i+k..])
              end
                k /= 2
            end

            # Try adjusting nearby pairs of integers by redistributing value
            # between them. This is useful for tests that depend on the sum of
            # some generated values.
            [2, 1].each do |k|
              (@result.length - k - 1).downto(0).each do |i|
                j = i + k
                # This check is necessary because the previous changes might have
                # shrunk the size of result, but also it's tedious to write tests
                # for this so I didn't.
                if j < @result.length
                  # Try swapping out of order pairs
                  if @result[i] > @result[j]
                    replace.({j => @result[i], i => @result[j]})
                  end
                  # j could be out of range if the previous swap succeeded.
                  if j < @result.length && @result[i].positive?
                    prev_i = @result[i]
                    prev_j = @result[j]
                    bin_search_down(0, prev_i) {|v|
                      replace.({i => v, j => prev_j + (prev_i - v)})
                    }
                  end
                end
              end
            end
          end
        end

        private

        # Returns a key that can be used for the shrinking order of test cases.
        def sort_key(choices) = [choices.length, choices]

        # Returns n in [lo, hi] such that f(n) is True, where it is assumed and
        # will not be checked that f(hi) is True.
        #
        # Will return `lo` if `f(lo)` is True, otherwise the only guarantee that is
        # made is that `f(n - 1)` is False and `f(n)` is True. In particular this
        # does *not* guarantee to find the smallest value, only a locally minimal
        # one.
        def bin_search_down(low, high, &f)
          return low if f.(low)
          while low + 1 < high
            mid = low + (high - low) / 2
            if f.(mid)
              high = mid
            else
              low = mid
            end
          end
          high
        end
      end

      class DirectoryDb
        def initialize(dir)
          @dir = dir
          Dir.mkdir(@dir)
        rescue SystemCallError => e
          raise unless e.errno == Errno::EEXIST::Errno
        end

        def [](key)
          f = file(key)
          return nil unless File.exist?(f)

          File.read(f)
        end

        def []=(key, value)
          File.write(file(key), value)
        end

        private

        def file(key)
          File.join(@dir, Digest::SHA1.hexdigest(key)[0...10])
        end
      end

      class Error< StandardError; end

      # Attempted to make choices on a test case that has been completed.
      class Frozen < Error; end

      # Raised when a test should stop executing early.
      class StopTest < Error; end

      # Raised when a test has no valid examples.
      class Unsatisfiable < Error; end

      class Status < Struct.new(:value)
        # Test case didn't have enough data to complete
        OVERRUN = self.new(0)

        # Test case contained something that prevented completion
        INVALID = self.new(1)

        # Test case completed just fine but was boring
        VALID = self.new(2)

        # Test case completed and was interesting
        INTERESTING = self.new(3)

        include Comparable

        def <=>(other)
          value <=> other.value
        end
      end
    end
  end
end