projekt

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README

stats.py (6638B)


      1 #!/usr/bin/env python3
      2 
      3 import argparse
      4 from enum import Enum
      5 from decimal import Decimal
      6 from statistics import mean, median, stdev
      7 
      8 
      9 def get_parser():
     10     parser = argparse.ArgumentParser(
     11         description="Create statistics out of process records file",
     12     )
     13     parser.add_argument("file", help="process records file")
     14     parser.add_argument("-i", "--increment",
     15                         default=1,
     16                         type=Decimal,
     17                         help="size of a single histogram interval")
     18     parser.add_argument("-r", "--rate",
     19                         default="linear",
     20                         choices=["linear", "exponential"],
     21                         help="set growth rate of histogram intervals")
     22     parser.add_argument("-R", "--raw",
     23                         action="store_true",
     24                         default=False,
     25                         help="output just histograms, in csv format for further processing")
     26     parser.add_argument("-f", "--filter",
     27                         default="all",
     28                         help="comma separated list of histograms to show (age,stime,...)")
     29     return parser
     30 
     31 
     32 class HistogramRate(Enum):
     33     EXPONENTIAL = 1
     34     LINEAR = 2
     35 
     36 
     37 def histogram(data, /, start=0.001, step=2, rate=HistogramRate.EXPONENTIAL):
     38     data = sorted(data)
     39     left, right = sorted([0, start])
     40     histogram_data = {}
     41 
     42     i = 0
     43     while i < len(data):
     44         if left <= data[i] < right:
     45             histogram_data[(left, right)] = histogram_data.get((left, right), 0) + 1
     46             i += 1
     47         else:
     48             if rate == HistogramRate.EXPONENTIAL:
     49                 size = right - left
     50                 left = right
     51                 right += step * size
     52             elif rate == HistogramRate.LINEAR:
     53                 left = right
     54                 right += step
     55             else:
     56                 raise ValueError(f"Invalid rate type: {rate}")
     57 
     58     return histogram_data
     59 
     60 
     61 def visualize_histogram(histogram):
     62     m = max(histogram.values())
     63     max_width = 60
     64     pad = max(map(lambda s: 1 + len(str(s[0])) + 2 + len(str(s[1])) + 1, histogram.keys()))
     65 
     66     for (left, right), v in histogram.items():
     67         bar_width = int(max_width * v / m)
     68         print(f"{f'[{left}, {right})':{pad}} {bar_width * '█'} {v}")
     69 
     70 
     71 def retype(data, header):
     72     if header == "PCOMM":
     73         return data
     74 
     75     if header in {"TIME", "age", "utime", "stime", "cutime", "cstime"}:
     76         return Decimal(data)
     77 
     78     return int(data)
     79 
     80 
     81 def averageable(header):
     82     return header in {
     83         "age",
     84         "utime",
     85         "stime",
     86         "nvcsw",
     87         "nivcsw",
     88         "cutime",
     89         "cstime",
     90         "inblk",
     91         "oublk",
     92         "cinblk",
     93         "coublk",
     94     }
     95 
     96 
     97 def unit(header):
     98     times = {
     99         "age",
    100         "utime",
    101         "stime",
    102         "cutime",
    103         "cstime",
    104     }
    105 
    106     if header in times:
    107         return 's'
    108 
    109     return ''
    110 
    111 
    112 def medianable(header):
    113     return averageable(header)
    114 
    115 
    116 def histogramable(header):
    117     return averageable(header) or header in {"UID", "exit", "exitsig"}
    118 
    119 
    120 def print_pcomm_stats(data):
    121     pcomm_stats = {}
    122     for d in data:
    123         pcomm_stats[d["PCOMM"]] = pcomm_stats.get(d["PCOMM"], 0) + 1
    124 
    125     pcomm_intensive = {}
    126     for d in data:
    127         prev_i, prev_p = pcomm_intensive.get(d["PCOMM"], (0, 0))
    128         if is_process_intensive(d):
    129             prev_i += 1
    130         else:
    131             prev_p += 1
    132         pcomm_intensive[d["PCOMM"]] = (prev_i, prev_p)
    133 
    134     m = max(map(len, pcomm_stats.keys()))
    135     for program, count in sorted(pcomm_stats.items(), key=lambda p: p[::-1], reverse=True):
    136         intensive, passive = pcomm_intensive[program]
    137         print(f"{f'{program}':{m}} {count}\t({intensive}/{passive})")
    138 
    139 
    140 def is_process_intensive(data):
    141     age = data["age"]
    142     utime = data["utime"]
    143     stime = data["stime"]
    144     return age > 0.1 and (utime + stime) / age > 0.3
    145 
    146 
    147 def main():
    148     parser = get_parser()
    149     args = parser.parse_args()
    150 
    151     if args.rate == "exponential" and args.increment <= 1:
    152         print("Exponential rate can't have increment <= 1\n")
    153         parser.print_help()
    154         return
    155 
    156     log_filename = args.file
    157 
    158     lines = []
    159     with open(log_filename, 'r') as f:
    160         lines = f.readlines()
    161 
    162     data = []
    163 
    164     headers = lines[2].strip().split()
    165     delim = None
    166     if len(headers) == 1:
    167         delim = ','
    168         headers = lines[2].strip(delim).split()
    169 
    170     for line in lines[3:-3]:
    171         time, line = line.split(maxsplit=1)
    172         line_tokens = line.strip(delim).rsplit(maxsplit=len(headers) - 2)
    173         line_tokens = [time] + line_tokens
    174 
    175         process_data = {}
    176         for i, v in enumerate(headers):
    177             process_data[v] = retype(line_tokens[i], v)
    178 
    179         data.append(process_data)
    180 
    181     filtered = list(map(str.lower, args.filter.split(',')))
    182     if args.raw:
    183         for header in headers:
    184             if histogramable(header) and (header.lower() in filtered or args.filter == "all"):
    185                 print(f"{header}_from,{header}_to,count")
    186                 h = histogram([d[header] for d in data], start=-1 if not averageable(header) else args.increment,
    187                               step=args.increment,
    188                               rate=HistogramRate[args.rate.upper()])
    189                 for (start, end), count in h.items():
    190                     print(f"{start},{end},{count}")
    191                 print()
    192         return
    193 
    194     print(f"Number of processes handled: {len(data)}")
    195     print(lines[0].strip())
    196     print(lines[-1].strip())
    197     print(lines[1].strip())
    198 
    199     operations = [
    200         (averageable, mean, "Average"),
    201         (medianable, median, "Median"),
    202         (averageable, min, "Minimal"),
    203         (averageable, max, "Maximal"),
    204         (averageable, stdev, "Standard deviation for"),
    205     ]
    206 
    207     for condition, operation, label in operations:
    208         print()
    209         for header in headers:
    210             if condition(header):
    211                 print(f"{label} {header}: {operation([d[header] for d in data]): .3f}{unit(header)}")
    212 
    213     # histograms
    214     for header in headers:
    215         if histogramable(header) and (header in filtered or args.filter == "all"):
    216             print()
    217             print(f"{header.capitalize()} histogram:")
    218             h = histogram([d[header] for d in data], start=-1 if not averageable(header) else args.increment,
    219                           step=args.increment,
    220                           rate=HistogramRate[args.rate.upper()])
    221             visualize_histogram(h)
    222 
    223     # pcomm stats
    224     if "pcomm" in filtered or args.filter == "all":
    225         print()
    226         print("PCOMM stats:")
    227         print_pcomm_stats(data)
    228 
    229 
    230 if __name__ == "__main__":
    231     main()