aboutsummaryrefslogtreecommitdiffstats
path: root/scripts/contrib/test_build_time.sh
blob: 9e5725ae54eabc0ea456764cd762e5c359167420 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
#!/bin/bash

# Build performance regression test script
#
# Copyright 2011 Intel Corporation
# All rights reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
#
# DESCRIPTION
# This script is intended to be used in conjunction with "git bisect run"
# in order to find regressions in build time, however it can also be used
# independently. It cleans out the build output directories, runs a
# specified worker script (an example is test_build_time_worker.sh) under
# TIME(1), logs the results to TEST_LOGDIR (default /tmp) and returns a
# value telling "git bisect run" whether the build time is good (under
# the specified threshold) or bad (over it). There is also a tolerance
# option but it is not particularly useful as it only subtracts the
# tolerance from the given threshold and uses it as the actual threshold.
#
# It is also capable of taking a file listing git revision hashes to be
# test-applied to the repository in order to get past build failures that
# would otherwise cause certain revisions to have to be skipped; if a
# revision does not apply cleanly then the script assumes it does not
# need to be applied and ignores it.
#
# Please see the help output (syntax below) for some important setup
# instructions.
#
# AUTHORS
# Paul Eggleton <paul.eggleton@linux.intel.com>


syntax() {
    echo "syntax: $0 <script> <time> <tolerance> [patchrevlist]"
    echo ""
    echo "  script       - worker script file (if in current dir, prefix with ./)"
    echo "  time         - time threshold (in seconds, suffix m for minutes)"
    echo "  tolerance    - tolerance (in seconds, suffix m for minutes or % for"
    echo "                 percentage, can be 0)"
    echo "  patchrevlist - optional file listing revisions to apply as patches on top"
    echo ""
    echo "You must set TEST_BUILDDIR to point to a previously created build directory,"
    echo "however please note that this script will wipe out the TMPDIR defined in"
    echo "TEST_BUILDDIR/conf/local.conf as part of its initial setup (as well as your"
    echo "~/.ccache)"
    echo ""
    echo "To get rid of the sudo prompt, please add the following line to /etc/sudoers"
    echo "(use 'visudo' to edit this; also it is assumed that the user you are running"
    echo "as is a member of the 'wheel' group):"
    echo ""
    echo "%wheel ALL=(ALL) NOPASSWD: /sbin/sysctl -w vm.drop_caches=[1-3]"
    echo ""
    echo "Note: it is recommended that you disable crond and any other process that"
    echo "may cause significant CPU or I/O usage during build performance tests."
}

# Note - we exit with 250 here because that will tell git bisect run that
# something bad happened and stop
if [ "$1" = "" ] ; then
   syntax
   exit 250
fi

if [ "$2" = "" ] ; then
   syntax
   exit 250
fi

if [ "$3" = "" ] ; then
   syntax
   exit 250
fi

if ! [[ "$2" =~ ^[0-9][0-9m.]*$ ]] ; then
   echo "'$2' is not a valid number for threshold"
   exit 250
fi

if ! [[ "$3" =~ ^[0-9][0-9m.%]*$ ]] ; then
   echo "'$3' is not a valid number for tolerance"
   exit 250
fi

if [ "$TEST_BUILDDIR" = "" ] ; then
   echo "Please set TEST_BUILDDIR to a previously created build directory"
   exit 250
fi

if [ ! -d "$TEST_BUILDDIR" ] ; then
   echo "TEST_BUILDDIR $TEST_BUILDDIR not found"
   exit 250
fi

git diff --quiet
if [ $? != 0 ] ; then
    echo "Working tree is dirty, cannot proceed"
    exit 251
fi

if [ "$BB_ENV_EXTRAWHITE" != "" ] ; then
   echo "WARNING: you are running after sourcing the build environment script, this is not recommended"
fi

runscript=$1
timethreshold=$2
tolerance=$3

if [ "$4" != "" ] ; then
    patchrevlist=`cat $4`
else
    patchrevlist=""
fi

if [[ timethreshold == *m* ]] ; then
    timethreshold=`echo $timethreshold | sed s/m/*60/ | bc`
fi

if [[ $tolerance == *m* ]] ; then
    tolerance=`echo $tolerance | sed s/m/*60/ | bc`
elif [[ $tolerance == *%* ]] ; then
    tolerance=`echo $tolerance | sed s/%//`
    tolerance=`echo "scale = 2; (($tolerance * $timethreshold) / 100)" | bc`
fi

tmpdir=`grep "^TMPDIR" $TEST_BUILDDIR/conf/local.conf | sed -e 's/TMPDIR[ \t]*=[ \t\?]*"//' -e 's/"//'`
if [ "x$tmpdir" = "x" ]; then
    echo "Unable to determine TMPDIR from $TEST_BUILDDIR/conf/local.conf, bailing out"
    exit 250
fi
sstatedir=`grep "^SSTATE_DIR" $TEST_BUILDDIR/conf/local.conf | sed -e 's/SSTATE_DIR[ \t\?]*=[ \t]*"//' -e 's/"//'`
if [ "x$sstatedir" = "x" ]; then
    echo "Unable to determine SSTATE_DIR from $TEST_BUILDDIR/conf/local.conf, bailing out"
    exit 250
fi

if [ `expr length $tmpdir` -lt 4 ] ; then
    echo "TMPDIR $tmpdir is less than 4 characters, bailing out"
    exit 250
fi

if [ `expr length $sstatedir` -lt 4 ] ; then
    echo "SSTATE_DIR $sstatedir is less than 4 characters, bailing out"
    exit 250
fi

echo -n "About to wipe out TMPDIR $tmpdir, press Ctrl+C to break out...  "
for i in 9 8 7 6 5 4 3 2 1
do
    echo -ne "\x08$i"
    sleep 1
done
echo

pushd . > /dev/null

rm -f pseudodone
echo "Removing TMPDIR $tmpdir..."
rm -rf $tmpdir
echo "Removing TMPDIR $tmpdir-*libc..."
rm -rf $tmpdir-*libc
echo "Removing SSTATE_DIR $sstatedir..."
rm -rf $sstatedir
echo "Removing ~/.ccache..."
rm -rf ~/.ccache

echo "Syncing..."
sync
sync
echo "Dropping VM cache..."
#echo 3 > /proc/sys/vm/drop_caches
sudo /sbin/sysctl -w vm.drop_caches=3 > /dev/null

if [ "$TEST_LOGDIR" = "" ] ; then
    logdir="/tmp"
else
    logdir="$TEST_LOGDIR"
fi
rev=`git rev-parse HEAD`
logfile="$logdir/timelog_$rev.log"
echo -n > $logfile

gitroot=`git rev-parse --show-toplevel`
cd $gitroot
for patchrev in $patchrevlist ; do
    echo "Applying $patchrev"
    patchfile=`mktemp`
    git show $patchrev > $patchfile
    git apply --check $patchfile &> /dev/null
    if [ $? != 0 ] ; then
        echo " ... patch does not apply without errors, ignoring"
    else
        echo "Applied $patchrev" >> $logfile
        git apply $patchfile &> /dev/null
    fi
    rm $patchfile
done

sync
echo "Quiescing for 5s..."
sleep 5

echo "Running $runscript at $rev..."
timeoutfile=`mktemp`
/usr/bin/time -o $timeoutfile -f "%e\nreal\t%E\nuser\t%Us\nsys\t%Ss\nmaxm\t%Mk" $runscript 2>&1 | tee -a $logfile
exitstatus=$PIPESTATUS

git reset --hard HEAD > /dev/null
popd > /dev/null

timeresult=`head -n1 $timeoutfile`
cat $timeoutfile | tee -a $logfile
rm $timeoutfile

if [ $exitstatus != 0 ] ; then
    # Build failed, exit with 125 to tell git bisect run to skip this rev
    echo "*** Build failed (exit code $exitstatus), skipping..." | tee -a $logfile
    exit 125
fi

ret=`echo "scale = 2; $timeresult > $timethreshold - $tolerance" | bc`
echo "Returning $ret" | tee -a $logfile
exit $ret
class="bp">self.taskwhitelist = None self.init_rundepcheck(data) def init_rundepcheck(self, data): self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST", True) or None if self.taskwhitelist: self.twl = re.compile(self.taskwhitelist) else: self.twl = None def _build_data(self, fn, d): tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d) taskdeps = {} basehash = {} for task in tasklist: data = lookupcache[task] if data is None: bb.error("Task %s from %s seems to be empty?!" % (task, fn)) data = '' gendeps[task] -= self.basewhitelist newdeps = gendeps[task] seen = set() while newdeps: nextdeps = newdeps seen |= nextdeps newdeps = set() for dep in nextdeps: if dep in self.basewhitelist: continue gendeps[dep] -= self.basewhitelist newdeps |= gendeps[dep] newdeps -= seen alldeps = sorted(seen) for dep in alldeps: data = data + dep var = lookupcache[dep] if var is not None: data = data + str(var) self.basehash[fn + "." + task] = hashlib.md5(data).hexdigest() taskdeps[task] = alldeps self.taskdeps[fn] = taskdeps self.gendeps[fn] = gendeps self.lookupcache[fn] = lookupcache return taskdeps def finalise(self, fn, d, variant): if variant: fn = "virtual:" + variant + ":" + fn try: taskdeps = self._build_data(fn, d) except: bb.note("Error during finalise of %s" % fn) raise #Slow but can be useful for debugging mismatched basehashes #for task in self.taskdeps[fn]: # self.dump_sigtask(fn, task, d.getVar("STAMP", True), False) for task in taskdeps: d.setVar("BB_BASEHASH_task-%s" % task, self.basehash[fn + "." + task]) def rundep_check(self, fn, recipename, task, dep, depname, dataCache): # Return True if we should keep the dependency, False to drop it # We only manipulate the dependencies for packages not in the whitelist if self.twl and not self.twl.search(recipename): # then process the actual dependencies if self.twl.search(depname): return False return True def read_taint(self, fn, task, stampbase): taint = None try: with open(stampbase + '.' + task + '.taint', 'r') as taintf: taint = taintf.read() except IOError: pass return taint def get_taskhash(self, fn, task, deps, dataCache): k = fn + "." + task data = dataCache.basetaskhash[k] self.runtaskdeps[k] = [] self.file_checksum_values[k] = {} recipename = dataCache.pkg_fn[fn] for dep in sorted(deps, key=clean_basepath): depname = dataCache.pkg_fn[self.pkgnameextract.search(dep).group('fn')] if not self.rundep_check(fn, recipename, task, dep, depname, dataCache): continue if dep not in self.taskhash: bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?", dep) data = data + self.taskhash[dep] self.runtaskdeps[k].append(dep) if task in dataCache.file_checksums[fn]: checksums = bb.fetch2.get_file_checksums(dataCache.file_checksums[fn][task], recipename) for (f,cs) in checksums: self.file_checksum_values[k][f] = cs if cs: data = data + cs taskdep = dataCache.task_deps[fn] if 'nostamp' in taskdep and task in taskdep['nostamp']: # Nostamp tasks need an implicit taint so that they force any dependent tasks to run import uuid data = data + str(uuid.uuid4()) taint = self.read_taint(fn, task, dataCache.stamp[fn]) if taint: data = data + taint logger.warn("%s is tainted from a forced run" % k) h = hashlib.md5(data).hexdigest() self.taskhash[k] = h #d.setVar("BB_TASKHASH_task-%s" % task, taskhash[task]) return h def dump_sigtask(self, fn, task, stampbase, runtime): k = fn + "." + task if runtime == "customfile": sigfile = stampbase elif runtime and k in self.taskhash: sigfile = stampbase + "." + task + ".sigdata" + "." + self.taskhash[k] else: sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[k] bb.utils.mkdirhier(os.path.dirname(sigfile)) data = {} data['basewhitelist'] = self.basewhitelist data['taskwhitelist'] = self.taskwhitelist data['taskdeps'] = self.taskdeps[fn][task] data['basehash'] = self.basehash[k] data['gendeps'] = {} data['varvals'] = {} data['varvals'][task] = self.lookupcache[fn][task] for dep in self.taskdeps[fn][task]: if dep in self.basewhitelist: continue data['gendeps'][dep] = self.gendeps[fn][dep] data['varvals'][dep] = self.lookupcache[fn][dep] if runtime and k in self.taskhash: data['runtaskdeps'] = self.runtaskdeps[k] data['file_checksum_values'] = [(os.path.basename(f), cs) for f,cs in self.file_checksum_values[k].items()] data['runtaskhashes'] = {} for dep in data['runtaskdeps']: data['runtaskhashes'][dep] = self.taskhash[dep] taint = self.read_taint(fn, task, stampbase) if taint: data['taint'] = taint fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.") try: with os.fdopen(fd, "wb") as stream: p = pickle.dump(data, stream, -1) stream.flush() os.chmod(tmpfile, 0664) os.rename(tmpfile, sigfile) except (OSError, IOError) as err: try: os.unlink(tmpfile) except OSError: pass raise err def dump_sigs(self, dataCache, options): for fn in self.taskdeps: for task in self.taskdeps[fn]: k = fn + "." + task if k not in self.taskhash: continue if dataCache.basetaskhash[k] != self.basehash[k]: bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % k) bb.error("The mismatched hashes were %s and %s" % (dataCache.basetaskhash[k], self.basehash[k])) self.dump_sigtask(fn, task, dataCache.stamp[fn], True) class SignatureGeneratorBasicHash(SignatureGeneratorBasic): name = "basichash" def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False): if taskname != "do_setscene" and taskname.endswith("_setscene"): k = fn + "." + taskname[:-9] else: k = fn + "." + taskname if clean: h = "*" elif k in self.taskhash: h = self.taskhash[k] else: # If k is not in basehash, then error h = self.basehash[k] return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.') def stampcleanmask(self, stampbase, fn, taskname, extrainfo): return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True) def invalidate_task(self, task, d, fn): bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task)) bb.build.write_taint(task, d, fn) def dump_this_task(outfile, d): import bb.parse fn = d.getVar("BB_FILENAME", True) task = "do_" + d.getVar("BB_CURRENTTASK", True) bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile") def clean_basepath(a): b = a.rsplit("/", 2)[1] + a.rsplit("/", 2)[2] if a.startswith("virtual:"): b = b + ":" + a.rsplit(":", 1)[0] return b def clean_basepaths(a): b = {} for x in a: b[clean_basepath(x)] = a[x] return b def clean_basepaths_list(a): b = [] for x in a: b.append(clean_basepath(x)) return b def compare_sigfiles(a, b, recursecb = None): output = [] p1 = pickle.Unpickler(open(a, "rb")) a_data = p1.load() p2 = pickle.Unpickler(open(b, "rb")) b_data = p2.load() def dict_diff(a, b, whitelist=set()): sa = set(a.keys()) sb = set(b.keys()) common = sa & sb changed = set() for i in common: if a[i] != b[i] and i not in whitelist: changed.add(i) added = sb - sa removed = sa - sb return changed, added, removed def file_checksums_diff(a, b): from collections import Counter # Handle old siginfo format if isinstance(a, dict): a = [(os.path.basename(f), cs) for f, cs in a.items()] if isinstance(b, dict): b = [(os.path.basename(f), cs) for f, cs in b.items()] # Compare lists, ensuring we can handle duplicate filenames if they exist removedcount = Counter(a) removedcount.subtract(b) addedcount = Counter(b) addedcount.subtract(a) added = [] for x in b: if addedcount[x] > 0: addedcount[x] -= 1 added.append(x) removed = [] changed = [] for x in a: if removedcount[x] > 0: removedcount[x] -= 1 for y in added: if y[0] == x[0]: changed.append((x[0], x[1], y[1])) added.remove(y) break else: removed.append(x) added = [x[0] for x in added] removed = [x[0] for x in removed] return changed, added, removed if 'basewhitelist' in a_data and a_data['basewhitelist'] != b_data['basewhitelist']: output.append("basewhitelist changed from '%s' to '%s'" % (a_data['basewhitelist'], b_data['basewhitelist'])) if a_data['basewhitelist'] and b_data['basewhitelist']: output.append("changed items: %s" % a_data['basewhitelist'].symmetric_difference(b_data['basewhitelist'])) if 'taskwhitelist' in a_data and a_data['taskwhitelist'] != b_data['taskwhitelist']: output.append("taskwhitelist changed from '%s' to '%s'" % (a_data['taskwhitelist'], b_data['taskwhitelist'])) if a_data['taskwhitelist'] and b_data['taskwhitelist']: output.append("changed items: %s" % a_data['taskwhitelist'].symmetric_difference(b_data['taskwhitelist'])) if a_data['taskdeps'] != b_data['taskdeps']: output.append("Task dependencies changed from:\n%s\nto:\n%s" % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps']))) if a_data['basehash'] != b_data['basehash']: output.append("basehash changed from %s to %s" % (a_data['basehash'], b_data['basehash'])) changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basewhitelist'] & b_data['basewhitelist']) if changed: for dep in changed: output.append("List of dependencies for variable %s changed from '%s' to '%s'" % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep])) if a_data['gendeps'][dep] and b_data['gendeps'][dep]: output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep])) if added: for dep in added: output.append("Dependency on variable %s was added" % (dep)) if removed: for dep in removed: output.append("Dependency on Variable %s was removed" % (dep)) changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals']) if changed: for dep in changed: output.append("Variable %s value changed from '%s' to '%s'" % (dep, a_data['varvals'][dep], b_data['varvals'][dep])) changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values']) if changed: for f, old, new in changed: output.append("Checksum for file %s changed from %s to %s" % (f, old, new)) if added: for f in added: output.append("Dependency on checksum of file %s was added" % (f)) if removed: for f in removed: output.append("Dependency on checksum of file %s was removed" % (f)) changed = [] for idx, task in enumerate(a_data['runtaskdeps']): a = a_data['runtaskdeps'][idx] b = b_data['runtaskdeps'][idx] if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b]: changed.append("%s with hash %s\n changed to\n%s with hash %s" % (a, a_data['runtaskhashes'][a], b, b_data['runtaskhashes'][b])) if changed: output.append("runtaskdeps changed from %s to %s" % (clean_basepaths_list(a_data['runtaskdeps']), clean_basepaths_list(b_data['runtaskdeps']))) output.append("\n".join(changed)) if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data: a = a_data['runtaskhashes'] b = b_data['runtaskhashes'] changed, added, removed = dict_diff(a, b) if added: for dep in added: bdep_found = False if removed: for bdep in removed: if b[dep] == a[bdep]: #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep)) bdep_found = True if not bdep_found: output.append("Dependency on task %s was added with hash %s" % (clean_basepath(dep), b[dep])) if removed: for dep in removed: adep_found = False if added: for adep in added: if b[adep] == a[dep]: #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep)) adep_found = True if not adep_found: output.append("Dependency on task %s was removed with hash %s" % (clean_basepath(dep), a[dep])) if changed: for dep in changed: output.append("Hash for dependent task %s changed from %s to %s" % (clean_basepath(dep), a[dep], b[dep])) if callable(recursecb): # If a dependent hash changed, might as well print the line above and then defer to the changes in # that hash since in all likelyhood, they're the same changes this task also saw. recout = recursecb(dep, a[dep], b[dep]) if recout: output = [output[-1]] + recout a_taint = a_data.get('taint', None) b_taint = b_data.get('taint', None) if a_taint != b_taint: output.append("Taint (by forced/invalidated task) changed from %s to %s" % (a_taint, b_taint)) return output def dump_sigfile(a): output = [] p1 = pickle.Unpickler(open(a, "rb")) a_data = p1.load() output.append("basewhitelist: %s" % (a_data['basewhitelist'])) output.append("taskwhitelist: %s" % (a_data['taskwhitelist'])) output.append("Task dependencies: %s" % (sorted(a_data['taskdeps']))) output.append("basehash: %s" % (a_data['basehash'])) for dep in a_data['gendeps']: output.append("List of dependencies for variable %s is %s" % (dep, a_data['gendeps'][dep])) for dep in a_data['varvals']: output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep])) if 'runtaskdeps' in a_data: output.append("Tasks this task depends on: %s" % (a_data['runtaskdeps'])) if 'file_checksum_values' in a_data: output.append("This task depends on the checksums of files: %s" % (a_data['file_checksum_values'])) if 'runtaskhashes' in a_data: for dep in a_data['runtaskhashes']: output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep])) if 'taint' in a_data: output.append("Tainted (by forced/invalidated task): %s" % a_data['taint']) data = a_data['basehash'] for dep in a_data['runtaskdeps']: data = data + a_data['runtaskhashes'][dep] for c in a_data['file_checksum_values']: data = data + c[1] if 'taint' in a_data: data = data + a_data['taint'] h = hashlib.md5(data).hexdigest() output.append("Computed Hash is %s" % h) return output