#!/usr/bin/python
'''
harvester
==========
This module helps to collect certain files and values from the whole data set,
based on search criteria.
Main functions to use are collect_files and collect_values.
'''
import os
from subprocess import Popen
# ---- main functions ----
[docs]def collect_files(searches, filepath, target_dir):
''' For each search, collect all .log-files from the data directories that matches the criteria.
Create a subdir for each search in target_dir where the .log-files go, numbered from 1 to n.
:param dict searches: a dict, where the keys are names of searches and values
are lists of key-value tuples which dirnames should match
:param string filepath: path to data directories
:param string target_dir: directory where the subdirs should go
:returns: a list with searches that didn't match any folders
'''
failed = []
for s in list(searches.keys()):
if not os.path.exists('%s/%s' % (target_dir, s)):
os.makedirs('%s/%s' % (target_dir, s))
foldernames = [fn for fn in os.listdir(filepath) if matches(fn, searches[s])]
if len(foldernames) == 0:
failed.append(s)
copied_files = 0
for fon in foldernames:
filenames = [fin for fin in os.listdir('%s/%s' % (filepath, fon)) if fin.endswith('.dat')]
for fin in filenames:
copied_files += 1
Popen("cp %s/%s/%s '%s/%s/log%d.dat'" % (filepath, fon, fin, target_dir, s, copied_files), shell=True).wait()
return failed
[docs]def collect_values(filepath, delim, outfile_name, cols=[], selector='all'):
'''
Collect specific x/y values from a bunch of .log files (to be found via the filepath)
and write them into a new file.
TODO: how to pass custom selectors?
:param string filepath: path to the log files
:param string delim: delimiter in the data files
:param string outfile_name: path and name of file to write into
:param list cols: columns to select, at least one,
we consider the first as x, the second as y
:param selector: method to specify how to choose lines from the data files
one of ['all', 'last', 'max_x', 'max_y', 'min_x', 'min_y']
'''
assert(selector in ['all', 'last', 'max_x', 'max_y', 'min_x', 'min_y'])
from . import harvester
selector = harvester.__getattribute__('select_%s' % selector)
vals = []
files = [f for f in os.listdir(filepath) if f.endswith('.dat')]
for f in files:
with open('%s/%s' % (filepath, f), 'r') as tmp:
vals.extend(selector(tmp, cols, delim))
with open(outfile_name, 'w') as out:
for v in vals:
out.write('%s' % str(v))
# ---- selectors ----
[docs]def select_all(filep, cols, delim):
''' select column values of all lines
:param file filep: pointer to data file
:param list cols: list of column indices from which to collect
:param string delim: delimiter in the data files
:returns: string with lines
'''
s = ''
for line in [l for l in filep.readlines() if not l.startswith('#')]:
s += '%s\n' % ' '.join([line.split(delim)[cols[i]-1].strip() for i in range(len(cols))])
return s
[docs]def select_last(filep, cols, delim):
''' :returns: a string with the last values from this file and columns
:param file filep: pointer to data file
:param list cols: list of column indices from which to collect
:param string delim: delimiter in the data files
:returns: string with line
'''
lines = [l for l in filep.readlines() if not l.startswith('#')]
last_line = lines[len(lines)-1].split(delim)
return '%s\n' % ' '.join([last_line[cols[i]-1].strip() for i in range(len(cols))])
[docs]def select_max_x(filep, cols, delim):
''':returns: a string with the values from the line with maximal x-value
:param file filep: pointer to data file
:param list cols: list of column indices from which to collect
:param string delim: delimiter in the data files
'''
return extreme(filep, cols, delim, sel=max, by=0)
[docs]def select_max_y(filep, cols, delim):
''':returns: a string with the values from the line with maximal y-value
:param file filep: pointer to data file
:param list cols: list of column indices from which to collect
:param string delim: delimiter in the data files
'''
return extreme(filep, cols, delim, sel=max, by=1)
[docs]def select_min_x(filep, cols, delim):
''':returns: a string with the values from the line with minimal x-value
:param file filep: pointer to data file
:param list cols: list of column indices from which to collect
:param string delim: delimiter in the data files
'''
return extreme(filep, cols, delim, sel=min, by=0)
[docs]def select_min_y(filep, cols, delim):
''':returns: a string with the values from the line with minimal y-value
:param file filep: pointer to data file
:param list cols: list of column indices from which to collect
:param string delim: delimiter in the data files
'''
return extreme(filep, cols, delim, sel=min, by=1)
# ---- helpers ----
[docs]def matches(string, search):
''':returns: True if string contains all key/value pairs in search s, False otherwise
:param string string: string to search in
:param dict search: key-values in this dict are the search
'''
for k, v in search:
if string.rfind('%s%s' % (k, v)) < 0:
return False
return True
[docs]def extreme(filep, cols, delim, sel=max, by=0):
''' Helper for selectors. Gets lines with maximal or minimal value,
looking for those values in a column of choice.
:param file filep: pointer to data file
:param list cols: list of column indices from which to collect
:param string delim: delimiter in the data files
:param function sel: function to selct value from a list, max or min
:param int by: the column of choice (0 for x, 1 for y), default 0
:returns: a string with the values from the line with minimal y-value
'''
assert(sel in [max, min])
assert(by in [0,1])
lines = [l for l in filep.readlines() if not l.startswith('#')]
vals = [float(l.split(delim)[cols[by]-1]) for l in lines]
line = lines[vals.index(sel(vals))].split(delim)
return '%s\n' % ' '.join([line[cols[i]-1].strip() for i in range(len(cols))])