#!/usr/bin/env python

import argparse
from enum import Enum
from decimal import Decimal
from statistics import mean, median, stdev


def get_parser():
    parser = argparse.ArgumentParser(
        description="Create statistics out of process records file",
    )
    parser.add_argument("file", help="process records file")
    parser.add_argument("-i", "--increment",
                        default=1,
                        type=Decimal,
                        help="size of a single histogram interval")
    parser.add_argument("-r", "--rate",
                        default="linear",
                        choices=["linear", "exponential"],
                        help="set growth rate of histogram intervals")
    parser.add_argument("-R", "--raw",
                        action="store_true",
                        default=False,
                        help="output just histograms, in csv format for further processing")
    parser.add_argument("-f", "--filter",
                        default="all",
                        help="comma separated list of histograms to show (age,stime,...)")
    return parser


class HistogramRate(Enum):
    EXPONENTIAL = 1
    LINEAR = 2


def histogram(data, /, start=0.001, step=2, rate=HistogramRate.EXPONENTIAL):
    data = sorted(data)
    left, right = sorted([0, start])
    histogram_data = {}

    i = 0
    while i < len(data):
        if left <= data[i] < right:
            histogram_data[(left, right)] = histogram_data.get((left, right), 0) + 1
            i += 1
        else:
            if rate == HistogramRate.EXPONENTIAL:
                size = right - left
                left = right
                right += step * size
            elif rate == HistogramRate.LINEAR:
                left = right
                right += step
            else:
                raise ValueError(f"Invalid rate type: {rate}")

    return histogram_data


def visualize_histogram(histogram):
    m = max(histogram.values())
    max_width = 60
    pad = max(map(lambda s: 1 + len(str(s[0])) + 2 + len(str(s[1])) + 1, histogram.keys()))

    for (left, right), v in histogram.items():
        bar_width = int(max_width * v / m)
        print(f"{f'[{left}, {right})':{pad}} {bar_width * '█'} {v}")


def retype(data, header):
    if header == "PCOMM":
        return data

    if header in {"TIME", "age", "utime", "stime", "cutime", "cstime"}:
        return Decimal(data)

    return int(data)


def averageable(header):
    return header in {
        "age",
        "utime",
        "stime",
        "nvcsw",
        "nivcsw",
        "cutime",
        "cstime",
        "inblk",
        "oublk",
        "cinblk",
        "coublk",
    }


def unit(header):
    times = {
        "age",
        "utime",
        "stime",
        "cutime",
        "cstime",
    }

    if header in times:
        return 's'

    return ''


def medianable(header):
    return averageable(header)


def histogramable(header):
    return averageable(header) or header in {"UID", "exit", "exitsig"}


def print_pcomm_stats(data):
    pcomm_stats = {}
    for d in data:
        pcomm_stats[d["PCOMM"]] = pcomm_stats.get(d["PCOMM"], 0) + 1

    pcomm_intensive = {}
    for d in data:
        prev_i, prev_p = pcomm_intensive.get(d["PCOMM"], (0, 0))
        if is_process_intensive(d):
            prev_i += 1
        else:
            prev_p += 1
        pcomm_intensive[d["PCOMM"]] = (prev_i, prev_p)

    m = max(map(len, pcomm_stats.keys()))
    for program, count in sorted(pcomm_stats.items(), key=lambda p: p[::-1], reverse=True):
        intensive, passive = pcomm_intensive[program]
        print(f"{f'{program}':{m}} {count}\t({intensive}/{passive})")


def is_process_intensive(data):
    age = data["age"]
    utime = data["utime"]
    stime = data["stime"]
    return age > 0.1 and (utime + stime) / age > 0.3


def main():
    parser = get_parser()
    args = parser.parse_args()

    if args.rate == "exponential" and args.increment <= 1:
        print("Exponential rate can't have increment <= 1\n")
        parser.print_help()
        return

    log_filename = args.file

    lines = []
    with open(log_filename, 'r') as f:
        lines = f.readlines()

    data = []

    headers = lines[2].strip().split()
    delim = None
    if len(headers) == 1:
        delim = ','
        headers = lines[2].strip(delim).split()

    for line in lines[3:-3]:
        time, line = line.split(maxsplit=1)
        line_tokens = line.strip(delim).rsplit(maxsplit=len(headers) - 2)
        line_tokens = [time] + line_tokens

        process_data = {}
        for i, v in enumerate(headers):
            process_data[v] = retype(line_tokens[i], v)

        data.append(process_data)

    filtered = args.filter.split(',')
    if args.raw:
        for header in headers:
            if histogramable(header) and (header in filtered or args.filter == "all"):
                print(f"{header}_from,{header}_to,count")
                h = histogram([d[header] for d in data], start=-1 if not averageable(header) else args.increment,
                              step=args.increment,
                              rate=HistogramRate[args.rate.upper()])
                for (start, end), count in h.items():
                    print(f"{start},{end},{count}")
                print()
        return

    print(f"Number of processes handled: {len(data)}")
    print(lines[0].strip())
    print(lines[-1].strip())
    print(lines[1].strip())

    operations = [
        (averageable, mean, "Average"),
        (medianable, median, "Median"),
        (averageable, min, "Minimal"),
        (averageable, max, "Maximal"),
        (averageable, stdev, "Standard deviation for"),
    ]

    for condition, operation, label in operations:
        print()
        for header in headers:
            if condition(header):
                print(f"{label} {header}: {operation([d[header] for d in data]): .3f}{unit(header)}")

    # histograms
    for header in headers:
        if histogramable(header) and (header in filtered or args.filter == "all"):
            print()
            print(f"{header.capitalize()} histogram:")
            h = histogram([d[header] for d in data], start=-1 if not averageable(header) else args.increment,
                          step=args.increment,
                          rate=HistogramRate[args.rate.upper()])
            visualize_histogram(h)

    # pcomm stats
    if "PCOMM" in filtered or args.filter == "all":
        print()
        print("PCOMM stats:")
        print_pcomm_stats(data)


if __name__ == "__main__":
    main()
