meta/recipes-devtools/python/python3-hypothesis/test_binary_search.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135

# This file is part of Hypothesis, which may be found at
# https://github.com/HypothesisWorks/hypothesis/
#
# Most of this work is copyright (C) 2013-2021 David R. MacIver
# (david@drmaciver.com), but it contains contributions by others. See
# CONTRIBUTING.rst for a full list of people who may hold copyright, and
# consult the git log if you need to determine who owns an individual
# contribution.
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
#
# END HEADER
#
# SPDX-License-Identifier: MPL-2.0

"""This file demonstrates testing a binary search.

It's a useful example because the result of the binary search is so clearly
determined by the invariants it must satisfy, so we can simply test for those
invariants.

It also demonstrates the useful testing technique of testing how the answer
should change (or not) in response to movements in the underlying data.
"""

from hypothesis import given, strategies as st


def binary_search(ls, v):
    """Take a list ls and a value v such that ls is sorted and v is comparable
    with the elements of ls.

    Return an index i such that 0 <= i <= len(v) with the properties:

    1. ls.insert(i, v) is sorted
    2. ls.insert(j, v) is not sorted for j < i
    """
    # Without this check we will get an index error on the next line when the
    # list is empty.
    if not ls:
        return 0

    # Without this check we will miss the case where the insertion point should
    # be zero: The invariant we maintain in the next section is that lo is
    # always strictly lower than the insertion point.
    if v <= ls[0]:
        return 0

    # Invariant: There is no insertion point i with i <= lo
    lo = 0

    # Invariant: There is an insertion point i with i <= hi
    hi = len(ls)
    while lo + 1 < hi:
        mid = (lo + hi) // 2
        if v > ls[mid]:
            # Inserting v anywhere below mid would result in an unsorted list
            # because it's > the value at mid. Therefore mid is a valid new lo
            lo = mid
        # Uncommenting the following lines will cause this to return a valid
        # insertion point which is not always minimal.
        # elif v == ls[mid]:
        #   return mid
        else:
            # Either v == ls[mid] in which case mid is a valid insertion point
            # or v < ls[mid], in which case all valid insertion points must be
            # < hi. Either way, mid is a valid new hi.
            hi = mid
    assert lo + 1 == hi
    # We now know that there is a valid insertion point <= hi and there is no
    # valid insertion point < hi because hi - 1 is lo. Therefore hi is the
    # answer we were seeking
    return hi


def is_sorted(ls):
    """Is this list sorted?"""
    for i in range(len(ls) - 1):
        if ls[i] > ls[i + 1]:
            return False
    return True


Values = st.integers()

# We generate arbitrary lists and turn this into generating sorting lists
# by just sorting them.
SortedLists = st.lists(Values).map(sorted)

# We could also do it this way, but that would be a bad idea:
# SortedLists = st.lists(Values).filter(is_sorted)
# The problem is that Hypothesis will only generate long sorted lists with very
# low probability, so we are much better off post-processing values into the
# form we want than filtering them out.


@given(ls=SortedLists, v=Values)
def test_insert_is_sorted(ls, v):
    """We test the first invariant: binary_search should return an index such
    that inserting the value provided at that index would result in a sorted
    set."""
    ls.insert(binary_search(ls, v), v)
    assert is_sorted(ls)


@given(ls=SortedLists, v=Values)
def test_is_minimal(ls, v):
    """We test the second invariant: binary_search should return an index such
    that no smaller index is a valid insertion point for v."""
    for i in range(binary_search(ls, v)):
        ls2 = list(ls)
        ls2.insert(i, v)
        assert not is_sorted(ls2)


@given(ls=SortedLists, v=Values)
def test_inserts_into_same_place_twice(ls, v):
    """In this we test a *consequence* of the second invariant: When we insert
    a value into a list twice, the insertion point should be the same both
    times. This is because we know that v is > the previous element and == the
    next element.

    In theory if the former passes, this should always pass. In practice,
    failures are detected by this test with much higher probability because it
    deliberately puts the data into a shape that is likely to trigger a
    failure.

    This is an instance of a good general category of test: Testing how the
    function moves in responses to changes in the underlying data.
    """
    i = binary_search(ls, v)
    ls.insert(i, v)
    assert binary_search(ls, v) == i