#!/usr/bin/python3.9
#
#------------------------------------
# Part of HPCToolkit (hpctoolkit.org)
#------------------------------------
#
# Copyright (c) 2002-2023, Rice University.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
#   notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
#   notice, this list of conditions and the following disclaimer in the
#   documentation and/or other materials provided with the distribution.
#
# * Neither the name of Rice University (RICE) nor the names of its
#   contributors may be used to endorse or promote products derived from
#   this software without specific prior written permission.
#
# This software is provided by RICE and contributors "as is" and any
# express or implied warranties, including, but not limited to, the
# implied warranties of merchantability and fitness for a particular
# purpose are disclaimed. In no event shall RICE or contributors be
# liable for any direct, indirect, incidental, special, exemplary, or
# consequential damages (including, but not limited to, procurement of
# substitute goods or services; loss of use, data, or profits; or
# business interruption) however caused and on any theory of liability,
# whether in contract, strict liability, or tort (including negligence
# or otherwise) arising in any way out of the use of this software, even
# if advised of the possibility of such damage.
#
# $HeadURL$
# $Id$
#
# hpcsummary -- summarize the .log and .hpcrun files in an HPCToolkit
# measurements directory.
#
# Usage: ./hpcsummary [-ahV] [directory ...]
#
#  -a adds summary info for blocks, errors and trolls
#  -h displays the help message
#  -V displays hpctoolkit version
#

VERSION = '2024.01.1-release'

import getopt
import os
import re
import sys

# Global variables
DATA = []
BASE2INDEX = {}
MAX_TID = 0
HAS_TROLLS = False
NUM_ERRORS = 0

# Constants
BLOCKS = 'blocks'
ERRORS = 'errors'
SAMPLES = 'samples'
TROLLS = 'trolls'
BASE = 'base'
FILESIZE = 'filesize'
NAME = 'name'
WARNING_LIMIT = 50

# Regular expressions
old_sum_re = re.compile(r'recorded:\s*(\d+).+blocked:\s*(\d+).+errant:\s*(\d+)')
new_sum_re = re.compile(r'recorded:\s*(\d+).+blocked:\s*(\d+).+errant:\s*(\d+).+trolled:\s*(\d+)')


def warn(mesg):
    global NUM_ERRORS, WARNING_LIMIT
    NUM_ERRORS += 1
    if NUM_ERRORS <= WARNING_LIMIT:
        print >>sys.stderr, mesg
    return


def usage():
    print '''usage: %s [-ahV] [directory ...]

  -a, --all
    include summary info for blocked, errant and trolled samples

  -h, --help
    display this message

  -V, --version
    display HPCToolkit version

The directories are HPCToolkit measurements directories,
or else the current directory if omitted.''' % sys.argv[0]
    sys.exit(0)


#
# Hpcrun and log file names:
#   <program-name>-<mpi-rank>-<thread-id>-<machine-id>-<pid>-<gen>.hpcrun
#   <program-name>-<mpi-rank>-<thread-id>-<machine-id>-<pid>-<gen>.log
#
# The '-<gen>' part is optional, and we accept both for backward
# compatibility.  If present, <gen> should be a single digit number.
#
# Note: need to be careful if <program-name> contains dashes.
#
def parse_name(fname, ext):
    l = fname[0 : len(fname) - len(ext)].rsplit('-', 5)
    if len(l) < 5:
        raise 'parse-name-error'
    elif len(l) == 5:
        base = '-'.join([l[0], l[1], l[3], l[4]])
        tid = int(l[2])
    elif len(l) == 6 and len(l[5]) == 1:
        base = '-'.join([l[0], l[1], l[3], l[4], l[5]])
        tid = int(l[2])
    else:
        base = '-'.join([l[0], l[1], l[2], l[4], l[5]])
        tid = int(l[3])
    return (base, tid)


def do_log_file(fname):
    global DATA, BASE2INDEX, HAS_TROLLS

    try:
        (base, tid) = parse_name(fname, '.log')
    except:
        warn('bad .log file name: %s' % fname)
        return
    try:
        file = open(fname, 'r')
    except IOError:
        warn('unable to open: %s' % fname)
        return

    # Find the SUMMARY line.
    l = file.readline()
    while l:
        if l.startswith('SUMMARY'):
            break
        l = file.readline()
    else:
        warn('unable to find SUMMARY line in: %s' % fname)
        file.close
        return

    # New summary format with trolls.
    obj = re.search(new_sum_re, l)
    if obj:
        samples, blocks, errors, trolls = map(int, obj.groups())
        DATA.append(
            { BLOCKS : blocks,
              ERRORS : errors,
              SAMPLES : samples,
              TROLLS : trolls,
              BASE : base })
        BASE2INDEX[base] = len(DATA) - 1
        HAS_TROLLS = True

    # Old summary format without trolls.
    elif re.search(old_sum_re, l):
        obj = re.search(old_sum_re, l)
        samples, blocks, errors = map(int, obj.groups())
        DATA.append(
            { BLOCKS : blocks,
              ERRORS : errors,
              SAMPLES : samples,
              BASE : base })
        BASE2INDEX[base] = len(DATA) - 1

    # Unable to match summary line.
    else:
        warn('bad SUMMARY line in: %s' % fname)

    file.close
    return


def do_hpcrun_file(fname):
    global DATA, BASE2INDEX, MAX_TID

    try:
        (base, tid) = parse_name(fname, '.hpcrun')
    except:
        warn('bad .hpcrun file name: %s' % fname)
        return
    try:
        n = BASE2INDEX[base]
    except KeyError:
        warn('missing or corrupt .log file for: %s' % fname)
        return
    if DATA[n][BASE] != base:
        warn('internal DATA corruption for: %s' % fname)
        return

    try:
        st = os.stat(fname)
    except OSError:
        warn('unable to stat: %s' % fname)
        return
    MAX_TID = max(tid, MAX_TID)
    DATA[n][(tid, FILESIZE)] = st.st_size
    DATA[n][(tid, NAME)] = fname
    return


def display_metric(dir, title, label, metric):
    global DATA, NUM_ERRORS

    print '=' * 72
    if dir != '.':
        print 'Directory: %s' % dir
    if isinstance(metric, tuple):
        tid = metric[0]
        print '%s, thread %d' % (title, tid)
    else:
        tid = 0
        print title

    # Copy the subset of processes in DATA that have a value for this
    # metric.  This is awkward, but it's better than segfaulting if
    # the directory is corrupt or has missing metrics.

    total_nproc = len(DATA)
    COPY = []
    for n in range(total_nproc):
        if DATA[n].has_key(metric):
            COPY.append(DATA[n])
    nproc = len(COPY)

    # Warn if some or all processes are missing this metric.
    if nproc == 0:
        print 'Warning: no processes have a value for this metric'
        NUM_ERRORS += 1
        return
    if nproc < total_nproc:
        print 'Warning: %d processes are missing a value for this metric' % (
            total_nproc - nproc)
        NUM_ERRORS += 1

    COPY.sort(lambda x, y : cmp(x[metric], y[metric]))
    total = sum([elt[metric] for elt in COPY])

    print
    print '%7s  %14s  %s' % ('rank', label, 'file')
    if nproc <= 16:
        for n in range(nproc):
            print '%7d  %14d  %s' % (
                n, COPY[n][metric], COPY[n][(tid, NAME)])
    else:
        for dec in range(11):
            n = (dec * (nproc - 1)) / 10
            print '%6d%%  %14d  %s' % (
                (10 * dec), COPY[n][metric], COPY[n][(tid, NAME)])
    print
    print '%s: procs: %d, total: %d, avg: %0.1f, dir: %s' % (
        label, nproc, total, total / float(nproc), dir)
    return


def do_dir(dir):
    global DATA, MAX_TID, HAS_TROLLS
    global BLOCKS, ERRORS, SAMPLES, TROLLS
    global origdir, opt_display_all

    os.chdir(origdir)
    try:
        os.chdir(dir)
    except OSError:
        warn('unable to cd to %s' % (dir))
        return

    DATA = []
    BASE2INDEX = {}
    MAX_TID = 0
    HAS_TROLLS = False

    # Read and process .log and .hpcrun files.
    DIRLIST = os.listdir('.')
    for file in DIRLIST:
        if file.endswith('.log'):
            do_log_file(file)
    if len(DATA) == 0:
        warn('no valid .log files in directory %s' % (dir))
        return

    for file in DIRLIST:
        if file.endswith('.hpcrun'):
            do_hpcrun_file(file)

    # Print the summaries.
    if opt_display_all:
        display_metric(dir, 'Blocked Samples', 'blocks', BLOCKS)
        display_metric(dir, 'Errant Samples', 'errors', ERRORS)
        if HAS_TROLLS:
            display_metric(dir, 'Samples with Trolls', 'trolls', TROLLS)
    display_metric(dir, 'Recorded Samples', 'samples', SAMPLES)
    for t in range(MAX_TID + 1):
        display_metric(dir, 'File Size', 'size', (t, FILESIZE))


#
# Main program.
#
if sys.version_info < (2, 4):
    sys.exit('This script requires Python version 2.4 or later.')

try:
    opts, args = getopt.getopt(sys.argv[1:], 'ahV', ['all', 'help', 'version'])
except getopt.GetoptError:
    usage()

opt_display_all = False
for opt, arg in opts:
    if opt in ('-a', '--all'):
        opt_display_all = True
    elif opt in ('-h', '--help'):
        usage()
    elif opt in ('-V', '--version'):
        print 'hpcsummary: A member of HPCToolkit, version %s' % VERSION
        sys.exit(0)
    else:
        sys.exit('unknown option: %s' % opt)

# Process each directory, or else '.'.
if len(args) == 0:
    args = ['.']

origdir = os.getcwd()
for dir in args:
    do_dir(dir)

print '=' * 72

# Warn if there were any errors.
if NUM_ERRORS > 0:
    print
    print '!! Warning: summary finished with %d errors !!' % NUM_ERRORS
    print
