00001
00002 """Performs unweighted linear regression. Can be invoked from command line by
00003 providing data pairs through stdin.
00004
00005 Nathan Baker, 2003"""
00006
00007 import math
00008 from sys import stdin, stdout, stderr
00009
00010 """ Accepts list of x,y pairs as input. Returns dictionary of resuls """
00011 def fit(data):
00012 outdict = {}
00013
00014
00015 xmean = 0
00016 ymean = 0
00017 ndata = len(data)
00018 for pair in data:
00019 x = pair[0]
00020 y = pair[1]
00021 xmean = xmean + x
00022 ymean = ymean + y
00023 xmean = xmean/float(ndata)
00024 ymean = ymean/float(ndata)
00025 outdict["x mean"] = xmean
00026 outdict["y mean"] = ymean
00027
00028
00029 sxx = 0
00030 syy = 0
00031 sxy = 0
00032 for pair in data:
00033 x = pair[0]
00034 y = pair[1]
00035 sxx = sxx + (x-xmean)*(x-xmean)
00036 syy = syy + (y-ymean)*(y-ymean)
00037 sxy = sxy + (x-xmean)*(y-ymean)
00038 covxx = sxx/float(ndata)
00039 covyy = syy/float(ndata)
00040 covxy = sxy/float(ndata)
00041 outdict["xx covariance"] = covxx
00042 outdict["xy covariance"] = covxy
00043 outdict["yy covariance"] = covyy
00044
00045
00046 b = sxy/sxx
00047 outdict["slope"] = b
00048
00049
00050 a = ymean - b*xmean
00051 outdict["intercept"] = a
00052
00053
00054 r2 = sxy*sxy/sxx/syy
00055 outdict["correlation coefficient (r^2)"] = r2
00056
00057
00058 s2 = (syy - b*sxy)/(float(ndata)-2)
00059 s = math.sqrt(s2)
00060 outdict["residual variance (s^2)"] = s2
00061
00062
00063 eb = s/math.sqrt(sxx)
00064 outdict["slope error"] = eb
00065
00066
00067 ea = s*math.sqrt((1/float(ndata)) + xmean*xmean/sxx)
00068 outdict["intercept error"] = ea
00069
00070 return outdict
00071
00072 """Main driver; reads from stdin"""
00073 def main():
00074 infile = stdin
00075 stdout.write("Reading data from %s...\n" % infile.name)
00076
00077 data = []
00078 while (1):
00079 line = infile.readline()
00080 if line == "":
00081 break
00082 line.strip()
00083 words = line.split()
00084 try:
00085 pair1 = float(words[0])
00086 pair2 = float(words[1])
00087 data.append((pair1, pair2))
00088 except Exception, str:
00089 stderr.write("Ignoring unparseable line: %s\n" % line)
00090 stdout.write("Read %d data points.\n" % len(data));
00091 fitdict = fit(data);
00092 keys = fitdict.keys()
00093 keys.sort()
00094 stdout.write("\nRESULTS:\n")
00095 for key in keys:
00096 stdout.write("%s: %g\n" % (key, fitdict[key]))
00097
00098
00099
00100 if __name__ == "__main__":
00101 main()