diff options
Diffstat (limited to 'yocto-poky/scripts/pybootchartgui/pybootchartgui/parsing.py')
-rw-r--r-- | yocto-poky/scripts/pybootchartgui/pybootchartgui/parsing.py | 740 |
1 files changed, 0 insertions, 740 deletions
diff --git a/yocto-poky/scripts/pybootchartgui/pybootchartgui/parsing.py b/yocto-poky/scripts/pybootchartgui/pybootchartgui/parsing.py deleted file mode 100644 index d423b9f77..000000000 --- a/yocto-poky/scripts/pybootchartgui/pybootchartgui/parsing.py +++ /dev/null @@ -1,740 +0,0 @@ -# This file is part of pybootchartgui. - -# pybootchartgui is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# pybootchartgui is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with pybootchartgui. If not, see <http://www.gnu.org/licenses/>. - - -from __future__ import with_statement - -import os -import string -import re -import sys -import tarfile -from time import clock -from collections import defaultdict -from functools import reduce - -from .samples import * -from .process_tree import ProcessTree - -if sys.version_info >= (3, 0): - long = int - -# Parsing produces as its end result a 'Trace' - -class Trace: - def __init__(self, writer, paths, options): - self.processes = {} - self.start = {} - self.end = {} - self.min = None - self.max = None - self.headers = None - self.disk_stats = None - self.ps_stats = None - self.taskstats = None - self.cpu_stats = None - self.cmdline = None - self.kernel = None - self.kernel_tree = None - self.filename = None - self.parent_map = None - self.mem_stats = None - - if len(paths): - parse_paths (writer, self, paths) - if not self.valid(): - raise ParseError("empty state: '%s' does not contain a valid bootchart" % ", ".join(paths)) - - if options.full_time: - self.min = min(self.start.keys()) - self.max = max(self.end.keys()) - - return - - # Turn that parsed information into something more useful - # link processes into a tree of pointers, calculate statistics - self.compile(writer) - - # Crop the chart to the end of the first idle period after the given - # process - if options.crop_after: - idle = self.crop (writer, options.crop_after) - else: - idle = None - - # Annotate other times as the first start point of given process lists - self.times = [ idle ] - if options.annotate: - for procnames in options.annotate: - names = [x[:15] for x in procnames.split(",")] - for proc in self.ps_stats.process_map.values(): - if proc.cmd in names: - self.times.append(proc.start_time) - break - else: - self.times.append(None) - - self.proc_tree = ProcessTree(writer, self.kernel, self.ps_stats, - self.ps_stats.sample_period, - self.headers.get("profile.process"), - options.prune, idle, self.taskstats, - self.parent_map is not None) - - if self.kernel is not None: - self.kernel_tree = ProcessTree(writer, self.kernel, None, 0, - self.headers.get("profile.process"), - False, None, None, True) - - def valid(self): - return len(self.processes) != 0 - return self.headers != None and self.disk_stats != None and \ - self.ps_stats != None and self.cpu_stats != None - - def add_process(self, process, start, end): - self.processes[process] = [start, end] - if start not in self.start: - self.start[start] = [] - if process not in self.start[start]: - self.start[start].append(process) - if end not in self.end: - self.end[end] = [] - if process not in self.end[end]: - self.end[end].append(process) - - def compile(self, writer): - - def find_parent_id_for(pid): - if pid is 0: - return 0 - ppid = self.parent_map.get(pid) - if ppid: - # many of these double forks are so short lived - # that we have no samples, or process info for them - # so climb the parent hierarcy to find one - if int (ppid * 1000) not in self.ps_stats.process_map: -# print "Pid '%d' short lived with no process" % ppid - ppid = find_parent_id_for (ppid) -# else: -# print "Pid '%d' has an entry" % ppid - else: -# print "Pid '%d' missing from pid map" % pid - return 0 - return ppid - - # merge in the cmdline data - if self.cmdline is not None: - for proc in self.ps_stats.process_map.values(): - rpid = int (proc.pid // 1000) - if rpid in self.cmdline: - cmd = self.cmdline[rpid] - proc.exe = cmd['exe'] - proc.args = cmd['args'] -# else: -# print "proc %d '%s' not in cmdline" % (rpid, proc.exe) - - # re-parent any stray orphans if we can - if self.parent_map is not None: - for process in self.ps_stats.process_map.values(): - ppid = find_parent_id_for (int(process.pid // 1000)) - if ppid: - process.ppid = ppid * 1000 - - # stitch the tree together with pointers - for process in self.ps_stats.process_map.values(): - process.set_parent (self.ps_stats.process_map) - - # count on fingers variously - for process in self.ps_stats.process_map.values(): - process.calc_stats (self.ps_stats.sample_period) - - def crop(self, writer, crop_after): - - def is_idle_at(util, start, j): - k = j + 1 - while k < len(util) and util[k][0] < start + 300: - k += 1 - k = min(k, len(util)-1) - - if util[j][1] >= 0.25: - return False - - avgload = sum(u[1] for u in util[j:k+1]) / (k-j+1) - if avgload < 0.25: - return True - else: - return False - def is_idle(util, start): - for j in range(0, len(util)): - if util[j][0] < start: - continue - return is_idle_at(util, start, j) - else: - return False - - names = [x[:15] for x in crop_after.split(",")] - for proc in self.ps_stats.process_map.values(): - if proc.cmd in names or proc.exe in names: - writer.info("selected proc '%s' from list (start %d)" - % (proc.cmd, proc.start_time)) - break - if proc is None: - writer.warn("no selected crop proc '%s' in list" % crop_after) - - - cpu_util = [(sample.time, sample.user + sample.sys + sample.io) for sample in self.cpu_stats] - disk_util = [(sample.time, sample.util) for sample in self.disk_stats] - - idle = None - for i in range(0, len(cpu_util)): - if cpu_util[i][0] < proc.start_time: - continue - if is_idle_at(cpu_util, cpu_util[i][0], i) \ - and is_idle(disk_util, cpu_util[i][0]): - idle = cpu_util[i][0] - break - - if idle is None: - writer.warn ("not idle after proc '%s'" % crop_after) - return None - - crop_at = idle + 300 - writer.info ("cropping at time %d" % crop_at) - while len (self.cpu_stats) \ - and self.cpu_stats[-1].time > crop_at: - self.cpu_stats.pop() - while len (self.disk_stats) \ - and self.disk_stats[-1].time > crop_at: - self.disk_stats.pop() - - self.ps_stats.end_time = crop_at - - cropped_map = {} - for key, value in self.ps_stats.process_map.items(): - if (value.start_time <= crop_at): - cropped_map[key] = value - - for proc in cropped_map.values(): - proc.duration = min (proc.duration, crop_at - proc.start_time) - while len (proc.samples) \ - and proc.samples[-1].time > crop_at: - proc.samples.pop() - - self.ps_stats.process_map = cropped_map - - return idle - - - -class ParseError(Exception): - """Represents errors during parse of the bootchart.""" - def __init__(self, value): - self.value = value - - def __str__(self): - return self.value - -def _parse_headers(file): - """Parses the headers of the bootchart.""" - def parse(acc, line): - (headers, last) = acc - if '=' in line: - last, value = map (lambda x: x.strip(), line.split('=', 1)) - else: - value = line.strip() - headers[last] += value - return headers, last - return reduce(parse, file.read().decode('utf-8').split('\n'), (defaultdict(str),''))[0] - -def _parse_timed_blocks(file): - """Parses (ie., splits) a file into so-called timed-blocks. A - timed-block consists of a timestamp on a line by itself followed - by zero or more lines of data for that point in time.""" - def parse(block): - lines = block.split('\n') - if not lines: - raise ParseError('expected a timed-block consisting a timestamp followed by data lines') - try: - return (int(lines[0]), lines[1:]) - except ValueError: - raise ParseError("expected a timed-block, but timestamp '%s' is not an integer" % lines[0]) - blocks = file.read().decode('utf-8').split('\n\n') - return [parse(block) for block in blocks if block.strip() and not block.endswith(' not running\n')] - -def _parse_proc_ps_log(writer, file): - """ - * See proc(5) for details. - * - * {pid, comm, state, ppid, pgrp, session, tty_nr, tpgid, flags, minflt, cminflt, majflt, cmajflt, utime, stime, - * cutime, cstime, priority, nice, 0, itrealvalue, starttime, vsize, rss, rlim, startcode, endcode, startstack, - * kstkesp, kstkeip} - """ - processMap = {} - ltime = 0 - timed_blocks = _parse_timed_blocks(file) - for time, lines in timed_blocks: - for line in lines: - if not line: continue - tokens = line.split(' ') - if len(tokens) < 21: - continue - - offset = [index for index, token in enumerate(tokens[1:]) if token[-1] == ')'][0] - pid, cmd, state, ppid = int(tokens[0]), ' '.join(tokens[1:2+offset]), tokens[2+offset], int(tokens[3+offset]) - userCpu, sysCpu, stime = int(tokens[13+offset]), int(tokens[14+offset]), int(tokens[21+offset]) - - # magic fixed point-ness ... - pid *= 1000 - ppid *= 1000 - if pid in processMap: - process = processMap[pid] - process.cmd = cmd.strip('()') # why rename after latest name?? - else: - process = Process(writer, pid, cmd.strip('()'), ppid, min(time, stime)) - processMap[pid] = process - - if process.last_user_cpu_time is not None and process.last_sys_cpu_time is not None and ltime is not None: - userCpuLoad, sysCpuLoad = process.calc_load(userCpu, sysCpu, max(1, time - ltime)) - cpuSample = CPUSample('null', userCpuLoad, sysCpuLoad, 0.0) - process.samples.append(ProcessSample(time, state, cpuSample)) - - process.last_user_cpu_time = userCpu - process.last_sys_cpu_time = sysCpu - ltime = time - - if len (timed_blocks) < 2: - return None - - startTime = timed_blocks[0][0] - avgSampleLength = (ltime - startTime)/(len (timed_blocks) - 1) - - return ProcessStats (writer, processMap, len (timed_blocks), avgSampleLength, startTime, ltime) - -def _parse_taskstats_log(writer, file): - """ - * See bootchart-collector.c for details. - * - * { pid, ppid, comm, cpu_run_real_total, blkio_delay_total, swapin_delay_total } - * - """ - processMap = {} - pidRewrites = {} - ltime = None - timed_blocks = _parse_timed_blocks(file) - for time, lines in timed_blocks: - # we have no 'stime' from taskstats, so prep 'init' - if ltime is None: - process = Process(writer, 1, '[init]', 0, 0) - processMap[1000] = process - ltime = time -# continue - for line in lines: - if not line: continue - tokens = line.split(' ') - if len(tokens) != 6: - continue - - opid, ppid, cmd = int(tokens[0]), int(tokens[1]), tokens[2] - cpu_ns, blkio_delay_ns, swapin_delay_ns = long(tokens[-3]), long(tokens[-2]), long(tokens[-1]), - - # make space for trees of pids - opid *= 1000 - ppid *= 1000 - - # when the process name changes, we re-write the pid. - if opid in pidRewrites: - pid = pidRewrites[opid] - else: - pid = opid - - cmd = cmd.strip('(').strip(')') - if pid in processMap: - process = processMap[pid] - if process.cmd != cmd: - pid += 1 - pidRewrites[opid] = pid -# print "process mutation ! '%s' vs '%s' pid %s -> pid %s\n" % (process.cmd, cmd, opid, pid) - process = process.split (writer, pid, cmd, ppid, time) - processMap[pid] = process - else: - process.cmd = cmd; - else: - process = Process(writer, pid, cmd, ppid, time) - processMap[pid] = process - - delta_cpu_ns = (float) (cpu_ns - process.last_cpu_ns) - delta_blkio_delay_ns = (float) (blkio_delay_ns - process.last_blkio_delay_ns) - delta_swapin_delay_ns = (float) (swapin_delay_ns - process.last_swapin_delay_ns) - - # make up some state data ... - if delta_cpu_ns > 0: - state = "R" - elif delta_blkio_delay_ns + delta_swapin_delay_ns > 0: - state = "D" - else: - state = "S" - - # retain the ns timing information into a CPUSample - that tries - # with the old-style to be a %age of CPU used in this time-slice. - if delta_cpu_ns + delta_blkio_delay_ns + delta_swapin_delay_ns > 0: -# print "proc %s cpu_ns %g delta_cpu %g" % (cmd, cpu_ns, delta_cpu_ns) - cpuSample = CPUSample('null', delta_cpu_ns, 0.0, - delta_blkio_delay_ns, - delta_swapin_delay_ns) - process.samples.append(ProcessSample(time, state, cpuSample)) - - process.last_cpu_ns = cpu_ns - process.last_blkio_delay_ns = blkio_delay_ns - process.last_swapin_delay_ns = swapin_delay_ns - ltime = time - - if len (timed_blocks) < 2: - return None - - startTime = timed_blocks[0][0] - avgSampleLength = (ltime - startTime)/(len(timed_blocks)-1) - - return ProcessStats (writer, processMap, len (timed_blocks), avgSampleLength, startTime, ltime) - -def _parse_proc_stat_log(file): - samples = [] - ltimes = None - for time, lines in _parse_timed_blocks(file): - # skip emtpy lines - if not lines: - continue - # CPU times {user, nice, system, idle, io_wait, irq, softirq} - tokens = lines[0].split() - times = [ int(token) for token in tokens[1:] ] - if ltimes: - user = float((times[0] + times[1]) - (ltimes[0] + ltimes[1])) - system = float((times[2] + times[5] + times[6]) - (ltimes[2] + ltimes[5] + ltimes[6])) - idle = float(times[3] - ltimes[3]) - iowait = float(times[4] - ltimes[4]) - - aSum = max(user + system + idle + iowait, 1) - samples.append( CPUSample(time, user/aSum, system/aSum, iowait/aSum) ) - - ltimes = times - # skip the rest of statistics lines - return samples - -def _parse_proc_disk_stat_log(file, numCpu): - """ - Parse file for disk stats, but only look at the whole device, eg. sda, - not sda1, sda2 etc. The format of relevant lines should be: - {major minor name rio rmerge rsect ruse wio wmerge wsect wuse running use aveq} - """ - disk_regex_re = re.compile ('^([hsv]d.|mtdblock\d|mmcblk\d|cciss/c\d+d\d+.*)$') - - # this gets called an awful lot. - def is_relevant_line(linetokens): - if len(linetokens) != 14: - return False - disk = linetokens[2] - return disk_regex_re.match(disk) - - disk_stat_samples = [] - - for time, lines in _parse_timed_blocks(file): - sample = DiskStatSample(time) - relevant_tokens = [linetokens for linetokens in map (lambda x: x.split(),lines) if is_relevant_line(linetokens)] - - for tokens in relevant_tokens: - disk, rsect, wsect, use = tokens[2], int(tokens[5]), int(tokens[9]), int(tokens[12]) - sample.add_diskdata([rsect, wsect, use]) - - disk_stat_samples.append(sample) - - disk_stats = [] - for sample1, sample2 in zip(disk_stat_samples[:-1], disk_stat_samples[1:]): - interval = sample1.time - sample2.time - if interval == 0: - interval = 1 - sums = [ a - b for a, b in zip(sample1.diskdata, sample2.diskdata) ] - readTput = sums[0] / 2.0 * 100.0 / interval - writeTput = sums[1] / 2.0 * 100.0 / interval - util = float( sums[2] ) / 10 / interval / numCpu - util = max(0.0, min(1.0, util)) - disk_stats.append(DiskSample(sample2.time, readTput, writeTput, util)) - - return disk_stats - -def _parse_proc_meminfo_log(file): - """ - Parse file for global memory statistics. - The format of relevant lines should be: ^key: value( unit)? - """ - used_values = ('MemTotal', 'MemFree', 'Buffers', 'Cached', 'SwapTotal', 'SwapFree',) - - mem_stats = [] - meminfo_re = re.compile(r'([^ \t:]+):\s*(\d+).*') - - for time, lines in _parse_timed_blocks(file): - sample = MemSample(time) - - for line in lines: - match = meminfo_re.match(line) - if not match: - raise ParseError("Invalid meminfo line \"%s\"" % match.groups(0)) - sample.add_value(match.group(1), int(match.group(2))) - - if sample.valid(): - mem_stats.append(sample) - - return mem_stats - -# if we boot the kernel with: initcall_debug printk.time=1 we can -# get all manner of interesting data from the dmesg output -# We turn this into a pseudo-process tree: each event is -# characterised by a -# we don't try to detect a "kernel finished" state - since the kernel -# continues to do interesting things after init is called. -# -# sample input: -# [ 0.000000] ACPI: FACP 3f4fc000 000F4 (v04 INTEL Napa 00000001 MSFT 01000013) -# ... -# [ 0.039993] calling migration_init+0x0/0x6b @ 1 -# [ 0.039993] initcall migration_init+0x0/0x6b returned 1 after 0 usecs -def _parse_dmesg(writer, file): - timestamp_re = re.compile ("^\[\s*(\d+\.\d+)\s*]\s+(.*)$") - split_re = re.compile ("^(\S+)\s+([\S\+_-]+) (.*)$") - processMap = {} - idx = 0 - inc = 1.0 / 1000000 - kernel = Process(writer, idx, "k-boot", 0, 0.1) - processMap['k-boot'] = kernel - base_ts = False - max_ts = 0 - for line in file.read().decode('utf-8').split('\n'): - t = timestamp_re.match (line) - if t is None: -# print "duff timestamp " + line - continue - - time_ms = float (t.group(1)) * 1000 - # looks like we may have a huge diff after the clock - # has been set up. This could lead to huge graph: - # so huge we will be killed by the OOM. - # So instead of using the plain timestamp we will - # use a delta to first one and skip the first one - # for convenience - if max_ts == 0 and not base_ts and time_ms > 1000: - base_ts = time_ms - continue - max_ts = max(time_ms, max_ts) - if base_ts: -# print "fscked clock: used %f instead of %f" % (time_ms - base_ts, time_ms) - time_ms -= base_ts - m = split_re.match (t.group(2)) - - if m is None: - continue -# print "match: '%s'" % (m.group(1)) - type = m.group(1) - func = m.group(2) - rest = m.group(3) - - if t.group(2).startswith ('Write protecting the') or \ - t.group(2).startswith ('Freeing unused kernel memory'): - kernel.duration = time_ms / 10 - continue - -# print "foo: '%s' '%s' '%s'" % (type, func, rest) - if type == "calling": - ppid = kernel.pid - p = re.match ("\@ (\d+)", rest) - if p is not None: - ppid = float (p.group(1)) // 1000 -# print "match: '%s' ('%g') at '%s'" % (func, ppid, time_ms) - name = func.split ('+', 1) [0] - idx += inc - processMap[func] = Process(writer, ppid + idx, name, ppid, time_ms / 10) - elif type == "initcall": -# print "finished: '%s' at '%s'" % (func, time_ms) - if func in processMap: - process = processMap[func] - process.duration = (time_ms / 10) - process.start_time - else: - print("corrupted init call for %s" % (func)) - - elif type == "async_waiting" or type == "async_continuing": - continue # ignore - - return processMap.values() - -# -# Parse binary pacct accounting file output if we have one -# cf. /usr/include/linux/acct.h -# -def _parse_pacct(writer, file): - # read LE int32 - def _read_le_int32(file): - byts = file.read(4) - return (ord(byts[0])) | (ord(byts[1]) << 8) | \ - (ord(byts[2]) << 16) | (ord(byts[3]) << 24) - - parent_map = {} - parent_map[0] = 0 - while file.read(1) != "": # ignore flags - ver = file.read(1) - if ord(ver) < 3: - print("Invalid version 0x%x" % (ord(ver))) - return None - - file.seek (14, 1) # user, group etc. - pid = _read_le_int32 (file) - ppid = _read_le_int32 (file) -# print "Parent of %d is %d" % (pid, ppid) - parent_map[pid] = ppid - file.seek (4 + 4 + 16, 1) # timings - file.seek (16, 1) # acct_comm - return parent_map - -def _parse_paternity_log(writer, file): - parent_map = {} - parent_map[0] = 0 - for line in file.read().decode('utf-8').split('\n'): - if not line: - continue - elems = line.split(' ') # <Child> <Parent> - if len (elems) >= 2: -# print "paternity of %d is %d" % (int(elems[0]), int(elems[1])) - parent_map[int(elems[0])] = int(elems[1]) - else: - print("Odd paternity line '%s'" % (line)) - return parent_map - -def _parse_cmdline_log(writer, file): - cmdLines = {} - for block in file.read().decode('utf-8').split('\n\n'): - lines = block.split('\n') - if len (lines) >= 3: -# print "Lines '%s'" % (lines[0]) - pid = int (lines[0]) - values = {} - values['exe'] = lines[1].lstrip(':') - args = lines[2].lstrip(':').split('\0') - args.pop() - values['args'] = args - cmdLines[pid] = values - return cmdLines - -def get_num_cpus(headers): - """Get the number of CPUs from the system.cpu header property. As the - CPU utilization graphs are relative, the number of CPUs currently makes - no difference.""" - if headers is None: - return 1 - if headers.get("system.cpu.num"): - return max (int (headers.get("system.cpu.num")), 1) - cpu_model = headers.get("system.cpu") - if cpu_model is None: - return 1 - mat = re.match(".*\\((\\d+)\\)", cpu_model) - if mat is None: - return 1 - return max (int(mat.group(1)), 1) - -def _do_parse(writer, state, filename, file): - writer.info("parsing '%s'" % filename) - t1 = clock() - paths = filename.split("/") - task = paths[-1] - pn = paths[-2] - start = None - end = None - for line in file: - if line.startswith("Started:"): - start = int(float(line.split()[-1])) - elif line.startswith("Ended:"): - end = int(float(line.split()[-1])) - if start and end: - state.add_process(pn + ":" + task, start, end) - t2 = clock() - writer.info(" %s seconds" % str(t2-t1)) - return state - -def parse_file(writer, state, filename): - if state.filename is None: - state.filename = filename - basename = os.path.basename(filename) - with open(filename, "rb") as file: - return _do_parse(writer, state, filename, file) - -def parse_paths(writer, state, paths): - for path in paths: - if state.filename is None: - state.filename = path - root, extension = os.path.splitext(path) - if not(os.path.exists(path)): - writer.warn("warning: path '%s' does not exist, ignoring." % path) - continue - #state.filename = path - if os.path.isdir(path): - files = sorted([os.path.join(path, f) for f in os.listdir(path)]) - state = parse_paths(writer, state, files) - elif extension in [".tar", ".tgz", ".gz"]: - if extension == ".gz": - root, extension = os.path.splitext(root) - if extension != ".tar": - writer.warn("warning: can only handle zipped tar files, not zipped '%s'-files; ignoring" % extension) - continue - tf = None - try: - writer.status("parsing '%s'" % path) - tf = tarfile.open(path, 'r:*') - for name in tf.getnames(): - state = _do_parse(writer, state, name, tf.extractfile(name)) - except tarfile.ReadError as error: - raise ParseError("error: could not read tarfile '%s': %s." % (path, error)) - finally: - if tf != None: - tf.close() - else: - state = parse_file(writer, state, path) - return state - -def split_res(res, options): - """ Split the res into n pieces """ - res_list = [] - if options.num > 1: - s_list = sorted(res.start.keys()) - frag_size = len(s_list) / float(options.num) - # Need the top value - if frag_size > int(frag_size): - frag_size = int(frag_size + 1) - else: - frag_size = int(frag_size) - - start = 0 - end = frag_size - while start < end: - state = Trace(None, [], None) - if options.full_time: - state.min = min(res.start.keys()) - state.max = max(res.end.keys()) - for i in range(start, end): - # Add this line for reference - #state.add_process(pn + ":" + task, start, end) - for p in res.start[s_list[i]]: - state.add_process(p, s_list[i], res.processes[p][1]) - start = end - end = end + frag_size - if end > len(s_list): - end = len(s_list) - res_list.append(state) - else: - res_list.append(res) - return res_list |