Source code for stosim.analysis.compressor
#!/usr/bin/python
# -*- coding: utf-8 -*-
'''
compressor
==========
Compresses data (e.g. averaging)
'''
import os
import math
[docs]def avg_stats(xCol, yCol, numFiles, filePrefix='', fileSuffix='', filePath='.', delim=',', outName=None):
'''
This function can take several data files and transfer them into a file that is formatted
ready to be plotted by gnuplot (each line is "<x> <mean of y vals> <std of y vals>").
Data files should be named using an index starting by 1 and all have the same prefix and/or suffix arround that index.
In gnuplot, you could then say "plot outName smooth unique with yerrorlines"
:param int xCol: x column
:param int yCol: y column
:param int numFiles: the number of files to average over
:param string filePrefix: prefix in filenames
:param string fileSuffix: suffix in filenames
:param string filePath: path to files
:param string delim: delimiter used between columns
:param string outName: Name of the result file, defaults to <filePrefix><yCol><fileSuffix>.out
'''
assert os.path.exists(filePath), 'File path %s does not exist' % filePath
if len(filePath) > 1 and not filePath.endswith("/"): filePath += "/"
if numFiles is None:
numFiles = len([f for f in os.listdir(filePath) if f.endswith(fileSuffix) and f.startswith(filePrefix)])
assert numFiles, 'numFiles is zero or not set'
assert xCol, 'xCol is not set'
assert yCol, 'yCol is not set'
# ---- First, we'll collect all y values from all files
d = {} # this will store a list of y values for each x
for i in range(1, numFiles+1, 1):
f = open(filePath + filePrefix + str(i) + fileSuffix, 'r')
hasMoreRows = True
while hasMoreRows:
s = f.readline().strip().split(delim)
if s == ['']:
hasMoreRows = False
else:
# disregard comments and unsuitable lines
if s[0].startswith('#') or len(s) < xCol or len(s) < yCol:
continue
x = s[xCol-1].strip()
if not x == '' and x not in d:
d[x] = []
try:
# we assume that y values are numeric! Also,other
# errors might happen here when file is corrupted
d[x].append(float(s[int(yCol)-1]))
except Exception as e:
print("ERROR")
f.close()
# ---- Then, we compute mean and standard deviation for the v values for
# each x and write them to target file
if outName is None: outName = '%s%s%s%s.out' % (filePath, filePrefix, str(yCol), fileSuffix)
out = open(outName, 'w')
keys = list(d.keys())
keys.sort()
for x in keys:
# -- mean
sum = 0.0
for y in d[x]:
sum += y
mean = sum / float(len(d[x]))
# -- standard deviation (std) or standard error (ste)
# On the difference between them, see the very readable intro at
# http://ww1.cpa-apc.org:8080/publications/archives/PDF/1996/Oct/strein2.pdf
std = 0.0
for y in d[x]:
std += math.pow(y - mean, 2)
l = len(d[x])
if l > 1:
l -= 1
std /= l
std = math.sqrt(std)
ste = std / math.sqrt(len(d[x])) # we're not using this, #40 should give a configurable choice
out.write('%s %f %f\n' % (str(x), mean, std))
out.close()