meta/recipes-devtools/python/python3-hypothesis/test_rle.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101

# This file is part of Hypothesis, which may be found at
# https://github.com/HypothesisWorks/hypothesis/
#
# Most of this work is copyright (C) 2013-2021 David R. MacIver
# (david@drmaciver.com), but it contains contributions by others. See
# CONTRIBUTING.rst for a full list of people who may hold copyright, and
# consult the git log if you need to determine who owns an individual
# contribution.
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
#
# END HEADER
#
# SPDX-License-Identifier: MPL-2.0

"""This example demonstrates testing a run length encoding scheme. That is, we
take a sequence and represent it by a shorter sequence where each 'run' of
consecutive equal elements is represented as a single element plus a count. So
e.g.

[1, 1, 1, 1, 2, 1] is represented as [[1, 4], [2, 1], [1, 1]]

This demonstrates the useful decode(encode(x)) == x invariant that is often
a fruitful source of testing with Hypothesis.

It also has an example of testing invariants in response to changes in the
underlying data.
"""

from hypothesis import assume, given, strategies as st


def run_length_encode(seq):
    """Encode a sequence as a new run-length encoded sequence."""
    if not seq:
        return []
    # By starting off the count at zero we simplify the iteration logic
    # slightly.
    result = [[seq[0], 0]]
    for s in seq:
        if (
            # If you uncomment this line this branch will be skipped and we'll
            # always append a new run of length 1. Note which tests fail.
            # False and
            s
            == result[-1][0]
            # Try uncommenting this line and see what problems occur:
            # and result[-1][-1] < 2
        ):
            result[-1][1] += 1
        else:
            result.append([s, 1])
    return result


def run_length_decode(seq):
    """Take a previously encoded sequence and reconstruct the original from
    it."""
    result = []
    for s, i in seq:
        for _ in range(i):
            result.append(s)
    return result


# We use lists of a type that should have a relatively high duplication rate,
# otherwise we'd almost never get any runs.
Lists = st.lists(st.integers(0, 10))


@given(Lists)
def test_decodes_to_starting_sequence(ls):
    """If we encode a sequence and then decode the result, we should get the
    original sequence back.

    Otherwise we've done something very wrong.
    """
    assert run_length_decode(run_length_encode(ls)) == ls


@given(Lists, st.data())
def test_duplicating_an_element_does_not_increase_length(ls, data):
    """The previous test could be passed by simply returning the input sequence
    so we need something that tests the compression property of our encoding.

    In this test we deliberately introduce or extend a run and assert
    that this does not increase the length of our encoding, because they
    should be part of the same run in the final result.
    """
    # We use assume to get a valid index into the list. We could also have used
    # e.g. flatmap, but this is relatively straightforward and will tend to
    # perform better.
    assume(ls)
    i = data.draw(st.integers(0, len(ls) - 1))
    ls2 = list(ls)
    # duplicating the value at i right next to it guarantees they are part of
    # the same run in the resulting compression.
    ls2.insert(i, ls2[i])
    assert len(run_length_encode(ls2)) == len(run_length_encode(ls))