stats.py (6638B)
1 #!/usr/bin/env python3 2 3 import argparse 4 from enum import Enum 5 from decimal import Decimal 6 from statistics import mean, median, stdev 7 8 9 def get_parser(): 10 parser = argparse.ArgumentParser( 11 description="Create statistics out of process records file", 12 ) 13 parser.add_argument("file", help="process records file") 14 parser.add_argument("-i", "--increment", 15 default=1, 16 type=Decimal, 17 help="size of a single histogram interval") 18 parser.add_argument("-r", "--rate", 19 default="linear", 20 choices=["linear", "exponential"], 21 help="set growth rate of histogram intervals") 22 parser.add_argument("-R", "--raw", 23 action="store_true", 24 default=False, 25 help="output just histograms, in csv format for further processing") 26 parser.add_argument("-f", "--filter", 27 default="all", 28 help="comma separated list of histograms to show (age,stime,...)") 29 return parser 30 31 32 class HistogramRate(Enum): 33 EXPONENTIAL = 1 34 LINEAR = 2 35 36 37 def histogram(data, /, start=0.001, step=2, rate=HistogramRate.EXPONENTIAL): 38 data = sorted(data) 39 left, right = sorted([0, start]) 40 histogram_data = {} 41 42 i = 0 43 while i < len(data): 44 if left <= data[i] < right: 45 histogram_data[(left, right)] = histogram_data.get((left, right), 0) + 1 46 i += 1 47 else: 48 if rate == HistogramRate.EXPONENTIAL: 49 size = right - left 50 left = right 51 right += step * size 52 elif rate == HistogramRate.LINEAR: 53 left = right 54 right += step 55 else: 56 raise ValueError(f"Invalid rate type: {rate}") 57 58 return histogram_data 59 60 61 def visualize_histogram(histogram): 62 m = max(histogram.values()) 63 max_width = 60 64 pad = max(map(lambda s: 1 + len(str(s[0])) + 2 + len(str(s[1])) + 1, histogram.keys())) 65 66 for (left, right), v in histogram.items(): 67 bar_width = int(max_width * v / m) 68 print(f"{f'[{left}, {right})':{pad}} {bar_width * '█'} {v}") 69 70 71 def retype(data, header): 72 if header == "PCOMM": 73 return data 74 75 if header in {"TIME", "age", "utime", "stime", "cutime", "cstime"}: 76 return Decimal(data) 77 78 return int(data) 79 80 81 def averageable(header): 82 return header in { 83 "age", 84 "utime", 85 "stime", 86 "nvcsw", 87 "nivcsw", 88 "cutime", 89 "cstime", 90 "inblk", 91 "oublk", 92 "cinblk", 93 "coublk", 94 } 95 96 97 def unit(header): 98 times = { 99 "age", 100 "utime", 101 "stime", 102 "cutime", 103 "cstime", 104 } 105 106 if header in times: 107 return 's' 108 109 return '' 110 111 112 def medianable(header): 113 return averageable(header) 114 115 116 def histogramable(header): 117 return averageable(header) or header in {"UID", "exit", "exitsig"} 118 119 120 def print_pcomm_stats(data): 121 pcomm_stats = {} 122 for d in data: 123 pcomm_stats[d["PCOMM"]] = pcomm_stats.get(d["PCOMM"], 0) + 1 124 125 pcomm_intensive = {} 126 for d in data: 127 prev_i, prev_p = pcomm_intensive.get(d["PCOMM"], (0, 0)) 128 if is_process_intensive(d): 129 prev_i += 1 130 else: 131 prev_p += 1 132 pcomm_intensive[d["PCOMM"]] = (prev_i, prev_p) 133 134 m = max(map(len, pcomm_stats.keys())) 135 for program, count in sorted(pcomm_stats.items(), key=lambda p: p[::-1], reverse=True): 136 intensive, passive = pcomm_intensive[program] 137 print(f"{f'{program}':{m}} {count}\t({intensive}/{passive})") 138 139 140 def is_process_intensive(data): 141 age = data["age"] 142 utime = data["utime"] 143 stime = data["stime"] 144 return age > 0.1 and (utime + stime) / age > 0.3 145 146 147 def main(): 148 parser = get_parser() 149 args = parser.parse_args() 150 151 if args.rate == "exponential" and args.increment <= 1: 152 print("Exponential rate can't have increment <= 1\n") 153 parser.print_help() 154 return 155 156 log_filename = args.file 157 158 lines = [] 159 with open(log_filename, 'r') as f: 160 lines = f.readlines() 161 162 data = [] 163 164 headers = lines[2].strip().split() 165 delim = None 166 if len(headers) == 1: 167 delim = ',' 168 headers = lines[2].strip(delim).split() 169 170 for line in lines[3:-3]: 171 time, line = line.split(maxsplit=1) 172 line_tokens = line.strip(delim).rsplit(maxsplit=len(headers) - 2) 173 line_tokens = [time] + line_tokens 174 175 process_data = {} 176 for i, v in enumerate(headers): 177 process_data[v] = retype(line_tokens[i], v) 178 179 data.append(process_data) 180 181 filtered = list(map(str.lower, args.filter.split(','))) 182 if args.raw: 183 for header in headers: 184 if histogramable(header) and (header.lower() in filtered or args.filter == "all"): 185 print(f"{header}_from,{header}_to,count") 186 h = histogram([d[header] for d in data], start=-1 if not averageable(header) else args.increment, 187 step=args.increment, 188 rate=HistogramRate[args.rate.upper()]) 189 for (start, end), count in h.items(): 190 print(f"{start},{end},{count}") 191 print() 192 return 193 194 print(f"Number of processes handled: {len(data)}") 195 print(lines[0].strip()) 196 print(lines[-1].strip()) 197 print(lines[1].strip()) 198 199 operations = [ 200 (averageable, mean, "Average"), 201 (medianable, median, "Median"), 202 (averageable, min, "Minimal"), 203 (averageable, max, "Maximal"), 204 (averageable, stdev, "Standard deviation for"), 205 ] 206 207 for condition, operation, label in operations: 208 print() 209 for header in headers: 210 if condition(header): 211 print(f"{label} {header}: {operation([d[header] for d in data]): .3f}{unit(header)}") 212 213 # histograms 214 for header in headers: 215 if histogramable(header) and (header in filtered or args.filter == "all"): 216 print() 217 print(f"{header.capitalize()} histogram:") 218 h = histogram([d[header] for d in data], start=-1 if not averageable(header) else args.increment, 219 step=args.increment, 220 rate=HistogramRate[args.rate.upper()]) 221 visualize_histogram(h) 222 223 # pcomm stats 224 if "pcomm" in filtered or args.filter == "all": 225 print() 226 print("PCOMM stats:") 227 print_pcomm_stats(data) 228 229 230 if __name__ == "__main__": 231 main()