aboutsummaryrefslogtreecommitdiffstats
path: root/scripts/verify-bashisms
blob: a979bd29650977d4c6b46b9be784055682ff8105 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#!/usr/bin/env python3

import sys, os, subprocess, re, shutil

whitelist = (
    # type is supported by dash
    'if type systemctl >/dev/null 2>/dev/null; then',
    'if type systemd-tmpfiles >/dev/null 2>/dev/null; then',
    'type update-rc.d >/dev/null 2>/dev/null; then',
    'command -v',
    # HOSTNAME is set locally
    'buildhistory_single_commit "$CMDLINE" "$HOSTNAME"',
    # False-positive, match is a grep not shell expression
    'grep "^$groupname:[^:]*:[^:]*:\\([^,]*,\\)*$username\\(,[^,]*\\)*"',
    # TODO verify dash's '. script args' behaviour
    '. $target_sdk_dir/${oe_init_build_env_path} $target_sdk_dir >> $LOGFILE'
    )

def is_whitelisted(s):
    for w in whitelist:
        if w in s:
            return True
    return False

SCRIPT_LINENO_RE = re.compile(r' line (\d+) ')
BASHISM_WARNING = re.compile(r'^(possible bashism in.*)$', re.MULTILINE)

def process(filename, function, lineno, script):
    import tempfile

    if not script.startswith("#!"):
        script = "#! /bin/sh\n" + script

    fn = tempfile.NamedTemporaryFile(mode="w+t")
    fn.write(script)
    fn.flush()

    try:
        subprocess.check_output(("checkbashisms.pl", fn.name), universal_newlines=True, stderr=subprocess.STDOUT)
        # No bashisms, so just return
        return
    except subprocess.CalledProcessError as e:
        # TODO check exit code is 1

        # Replace the temporary filename with the function and split it
        output = e.output.replace(fn.name, function)
        if not output or not output.startswith('possible bashism'):
            # Probably starts with or contains only warnings. Dump verbatim
            # with one space indention. Can't do the splitting and whitelist
            # checking below.
            return '\n'.join([filename,
                              ' Unexpected output from checkbashisms.pl'] +
                             [' ' + x for x in output.splitlines()])

        # We know that the first line matches and that therefore the first
        # list entry will be empty - skip it.
        output = BASHISM_WARNING.split(output)[1:]
        # Turn the output into a single string like this:
        # /.../foobar.bb
        #  possible bashism in updatercd_postrm line 2 (type):
        #   if ${@use_updatercd(d)} && type update-rc.d >/dev/null 2>/dev/null; then
        #  ...
        #   ...
        result = []
        # Check the results against the whitelist
        for message, source in zip(output[0::2], output[1::2]):
            if not is_whitelisted(source):
                if lineno is not None:
                    message = SCRIPT_LINENO_RE.sub(lambda m: ' line %d ' % (int(m.group(1)) + int(lineno) - 1),
                                                   message)
                result.append(' ' + message.strip())
                result.extend(['  %s' % x for x in source.splitlines()])
        if result:
            result.insert(0, filename)
            return '\n'.join(result)
        else:
            return None

def get_tinfoil():
    scripts_path = os.path.dirname(os.path.realpath(__file__))
    lib_path = scripts_path + '/lib'
    sys.path = sys.path + [lib_path]
    import scriptpath
    scriptpath.add_bitbake_lib_path()
    import bb.tinfoil
    tinfoil = bb.tinfoil.Tinfoil()
    tinfoil.prepare()
    # tinfoil.logger.setLevel(logging.WARNING)
    return tinfoil

if __name__=='__main__':
    import argparse, shutil

    parser = argparse.ArgumentParser(description='Bashim detector for shell fragments in recipes.')
    parser.add_argument("recipes", metavar="RECIPE", nargs="*", help="recipes to check (if not specified, all will be checked)")
    parser.add_argument("--verbose", default=False, action="store_true")
    args = parser.parse_args()

    if shutil.which("checkbashisms.pl") is None:
        print("Cannot find checkbashisms.pl on $PATH, get it from https://anonscm.debian.org/cgit/collab-maint/devscripts.git/plain/scripts/checkbashisms.pl")
        sys.exit(1)

    # The order of defining the worker function,
    # initializing the pool and connecting to the
    # bitbake server is crucial, don't change it.
    def func(item):
        (filename, key, lineno), script = item
        if args.verbose:
            print("Scanning %s:%s" % (filename, key))
        return process(filename, key, lineno, script)

    import multiprocessing
    pool = multiprocessing.Pool()

    tinfoil = get_tinfoil()

    # This is only the default configuration and should iterate over
    # recipecaches to handle multiconfig environments
    pkg_pn = tinfoil.cooker.recipecaches[""].pkg_pn

    if args.recipes:
        initial_pns = args.recipes
    else:
        initial_pns = sorted(pkg_pn)

    pns = set()
    scripts = {}
    print("Generating scripts...")
    for pn in initial_pns:
        for fn in pkg_pn[pn]:
            # There's no point checking multiple BBCLASSEXTENDed variants of the same recipe
            # (at least in general - there is some risk that the variants contain different scripts)
            realfn, _, _ = bb.cache.virtualfn2realfn(fn)
            if realfn not in pns:
                pns.add(realfn)
                data = tinfoil.parse_recipe_file(realfn)
                for key in data.keys():
                    if data.getVarFlag(key, "func") and not data.getVarFlag(key, "python"):
                        script = data.getVar(key, False)
                        if script:
                            filename = data.getVarFlag(key, "filename")
                            lineno = data.getVarFlag(key, "lineno")
                            # There's no point in checking a function multiple
                            # times just because different recipes include it.
                            # We identify unique scripts by file, name, and (just in case)
                            # line number.
                            attributes = (filename or realfn, key, lineno)
                            scripts.setdefault(attributes, script)


    print("Scanning scripts...\n")
    for result in pool.imap(func, scripts.items()):
        if result:
            print(result)
    tinfoil.shutdown()