Source code for stosim.analysis.harvester

#!/usr/bin/python

'''
harvester
==========

This module helps to collect certain files and values from the whole data set,
based on search criteria.
Main functions to use are collect_files and collect_values.
'''

import os
from subprocess import Popen


# ---- main functions ----

[docs]def collect_files(searches, filepath, target_dir):
    ''' For each search, collect all .log-files from the data directories that matches the criteria.
        Create a subdir for each search in target_dir where the .log-files go, numbered from 1 to n.

        :param dict searches: a dict, where the keys are names of searches and values
                  are lists of key-value tuples which dirnames should match
        :param string filepath: path to data directories
        :param string target_dir: directory where the subdirs should go
        :returns: a list with searches that didn't match any folders
   '''
    failed = []

    for s in list(searches.keys()):
        if not os.path.exists('%s/%s' % (target_dir, s)):
            os.makedirs('%s/%s' % (target_dir, s))

        foldernames = [fn for fn in os.listdir(filepath) if matches(fn, searches[s])]
        if len(foldernames) == 0:
            failed.append(s)

        copied_files = 0
        for fon in foldernames:
            filenames = [fin for fin in os.listdir('%s/%s' % (filepath, fon)) if fin.endswith('.dat')]
            for fin in filenames:
                copied_files += 1
                Popen("cp %s/%s/%s '%s/%s/log%d.dat'" % (filepath, fon, fin, target_dir, s, copied_files), shell=True).wait()
    return failed


[docs]def collect_values(filepath, delim, outfile_name, cols=[], selector='all'):
    '''
    Collect specific x/y values from a bunch of .log files (to be found via the filepath)
    and write them into a new file.

    TODO: how to pass custom selectors?

    :param string filepath: path to the log files
    :param string delim: delimiter in the data files
    :param string outfile_name: path and name of file to write into
    :param list cols: columns to select, at least one,
                      we consider the first as x, the second as y
    :param selector: method to specify how to choose lines from the data files
                     one of ['all', 'last', 'max_x', 'max_y', 'min_x', 'min_y']
    '''
    assert(selector in ['all', 'last', 'max_x', 'max_y', 'min_x', 'min_y'])
    from . import harvester
    selector = harvester.__getattribute__('select_%s' % selector)
    vals = []
    files = [f for f in os.listdir(filepath) if f.endswith('.dat')]
    for f in files:
        with open('%s/%s' % (filepath, f), 'r') as tmp:
            vals.extend(selector(tmp, cols, delim))
    with open(outfile_name, 'w') as out:
        for v in vals:
            out.write('%s' % str(v))


# ---- selectors ----

[docs]def select_all(filep, cols, delim):
    ''' select column values of all lines

    :param file filep: pointer to data file
    :param list cols: list of column indices from which to collect
    :param string delim: delimiter in the data files
    :returns: string with lines
    '''
    s = ''
    for line in [l for l in filep.readlines() if not l.startswith('#')]:
        s += '%s\n' % ' '.join([line.split(delim)[cols[i]-1].strip() for i in range(len(cols))])
    return s


[docs]def select_last(filep, cols, delim):
    ''' :returns: a string with the last values from this file and columns

    :param file filep: pointer to data file
    :param list cols: list of column indices from which to collect
    :param string delim: delimiter in the data files
    :returns: string with line
    '''
    lines = [l for l in filep.readlines() if not l.startswith('#')]
    last_line = lines[len(lines)-1].split(delim)
    return '%s\n' % ' '.join([last_line[cols[i]-1].strip() for i in range(len(cols))])


[docs]def select_max_x(filep, cols, delim):
    ''':returns: a string with the values from the line with maximal x-value

    :param file filep: pointer to data file
    :param list cols: list of column indices from which to collect
    :param string delim: delimiter in the data files
    '''
    return extreme(filep, cols, delim, sel=max, by=0)


[docs]def select_max_y(filep, cols, delim):
    ''':returns: a string with the values from the line with maximal y-value

    :param file filep: pointer to data file
    :param list cols: list of column indices from which to collect
    :param string delim: delimiter in the data files
    '''
    return extreme(filep, cols, delim, sel=max, by=1)


[docs]def select_min_x(filep, cols, delim):
    ''':returns: a string with the values from the line with minimal x-value

    :param file filep: pointer to data file
    :param list cols: list of column indices from which to collect
    :param string delim: delimiter in the data files
    '''
    return extreme(filep, cols, delim, sel=min, by=0)


[docs]def select_min_y(filep, cols, delim):
    ''':returns: a string with the values from the line with minimal y-value

    :param file filep: pointer to data file
    :param list cols: list of column indices from which to collect
    :param string delim: delimiter in the data files
    '''
    return extreme(filep, cols, delim, sel=min, by=1)


# ---- helpers ----

[docs]def matches(string, search):
    ''':returns: True if string contains all key/value pairs in search s, False otherwise
       :param string string: string to search in
       :param dict search: key-values in this dict are the search
    '''
    for k, v in search:
        if string.rfind('%s%s' % (k, v)) < 0:
            return False
    return True


[docs]def extreme(filep, cols, delim, sel=max, by=0):
    ''' Helper for selectors. Gets lines with maximal or minimal value,
        looking for those values in a column of choice.

    :param file filep: pointer to data file
    :param list cols: list of column indices from which to collect
    :param string delim: delimiter in the data files
    :param function sel: function to selct value from a list, max or min
    :param int by: the column of choice (0 for x, 1 for y), default 0

    :returns: a string with the values from the line with minimal y-value
    '''
    assert(sel in [max, min])
    assert(by in [0,1])
    lines = [l for l in filep.readlines() if not l.startswith('#')]
    vals = [float(l.split(delim)[cols[by]-1]) for l in lines]
    line = lines[vals.index(sel(vals))].split(delim)
    return '%s\n' % ' '.join([line[cols[i]-1].strip() for i in range(len(cols))])