Changeset 164
- Timestamp:
- 04/29/08 15:35:34 (7 months ago)
- Files:
-
- projects/AsynCluster/trunk/svpmc/model.py (modified) (3 diffs)
- projects/AsynCluster/trunk/svpmc/params.py (modified) (5 diffs)
- projects/AsynCluster/trunk/svpmc/pmc.py (modified) (1 diff)
- projects/AsynCluster/trunk/svpmc/project.py (modified) (3 diffs)
- projects/AsynCluster/trunk/svpmc/sample.py (modified) (1 diff)
- projects/AsynCluster/trunk/svpmc/test/project-spec.txt (modified) (1 diff)
- projects/AsynCluster/trunk/svpmc/test/project.py (deleted)
- projects/AsynCluster/trunk/svpmc/test/test_project.py (added)
- projects/AsynCluster/trunk/svpmc/test/test_sample.py (modified) (1 diff)
- projects/AsynCluster/trunk/svpmc/test/test_tseries.py (modified) (1 diff)
- projects/AsynCluster/trunk/svpmc/test/test_weave.py (modified) (1 diff)
- projects/AsynCluster/trunk/svpmc/tseries.py (modified) (1 diff)
- projects/AsynCluster/trunk/svpmc/weave.py (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
projects/AsynCluster/trunk/svpmc/model.py
r163 r164 1 # sv PMC: Stochastic Volatility Inference via Population Monte Carlo1 # svpmc: Stochastic Volatility Inference via Population Monte Carlo 2 2 # 3 3 # Copyright (C) 2007-2008 by Edwin A. Suominen, http://www.eepatents.com … … 49 49 50 50 51 class SpecParser(object):52 """53 Instantiate me with the path of a file that defines a project54 specification. Then you can call my two public methods to get what a model55 needs to run the project.56 """57 re_dashes = re.compile("^\-{10,}$")58 re_xy = re.compile("([0-9]+),([0-9]+)")59 60 def __init__(self, specFile):61 self.specDir = os.path.dirname(specFile)62 self.tables = {}63 Nf = None64 linePrev = ""65 fh = open(specFile)66 for line in fh:67 line = line.strip()68 if line.startswith('#'):69 continue70 if not line:71 Nf = None72 elif Nf:73 fieldList = line.split()74 fieldList += [''] * (Nf - len(fieldList))75 fieldList[Nf-1] = " ".join(fieldList[Nf-1:])76 rows.append(fieldList[:Nf])77 elif self.re_dashes.match(line) and linePrev:78 fields = linePrev.split()79 Nf = len(fields)80 rows = self.tables[fields[0].lower()] = []81 else:82 linePrev = line83 fh.close()84 85 def paramNames(self):86 """87 Returns the names of the parameter sequence for my project.88 """89 return [row[0] for row in self.tables['parameter']]90 91 def timeSeries(self):92 """93 Returns a list of L{tseries.TimeSeries} objects loaded and transformed94 as defined in my specification along with a dict of descriptions for95 the time series, keyed by name.96 """97 tsList = []98 descriptionDict = {}99 for name, filePath, transform, description in self.tables['series']:100 filePath = os.path.join(self.specDir, filePath) + ".dat"101 tsObject = tseries.TimeSeries(name, filePath)102 for methodName in transform.split(','):103 getattr(tsObject, methodName)()104 tsList.append(tsObject)105 descriptionDict[name] = description106 return tsList, descriptionDict107 108 def priorContainer(self):109 """110 Returns an instance of L{params.PriorContainer} with111 L{params.FlexArray} objects loaded with a L{params.Prior} object for112 each array element of each of my parameters, along with a dict of113 descriptions for the parameters, keyed by name.114 """115 descriptionDict = {}116 p = len(self.tables['series'])117 container = params.PriorContainer()118 for name, dims, description in self.tables['parameter']:119 shape = eval(dims)120 if not isinstance(shape, tuple):121 shape = (shape,)122 shape = [int(x) for x in shape]123 pa = container.priorArray(name, *shape)124 for priorSpec in self.tables[name]:125 index = priorSpec[0]126 kw = {'dname':priorSpec[1]}127 for k, name in enumerate(('loc', 'scale', 'a', 'b')):128 stringValue = priorSpec[k+2]129 if stringValue != '':130 kw[name] = float(stringValue)131 for j in xrange(shape[0]):132 if len(shape) == 1:133 if index.isdigit() and j != int(index):134 continue135 pa[j] = params.Prior(**kw)136 else:137 for k in xrange(shape[1]):138 if index == "I" and j != k:139 continue140 if index != ":":141 match = self.re_xy.match(index)142 if match and match.group(0) != "%d,%d" % (j,k):143 continue144 pa[j,k] = params.Prior(**kw)145 descriptionDict[name] = description146 return container, descriptionDict147 148 149 51 class ModelManager(object): 150 52 """ … … 177 79 ########################################################################### 178 80 """ 179 def __init__(self, p rojectManager):81 def __init__(self, paramNames, tsList): 180 82 self.mgr = projectManager 181 self.modelObj = Model( 182 paramNames=projectManager.paramNames, 183 tsList=projectManager.timeSeries) 83 self.modelObj = Model(paramNames=paramNames, tsList=tsList) 184 84 self.queue = NullQueue(1) 185 85 reactor.addSystemEventTrigger( projects/AsynCluster/trunk/svpmc/params.py
r161 r164 1 # sv PMC: Stochastic Volatility Inference via Population Monte Carlo1 # svpmc: Stochastic Volatility Inference via Population Monte Carlo 2 2 # 3 3 # Copyright (C) 2007-2008 by Edwin A. Suominen, http://www.eepatents.com … … 62 62 63 63 Call an instance of me with a hash array and you'll get a new instance with 64 just the subset of objects referenced by the array. 65 66 Call the instance with a single object ID and you'll get that object, just67 like you'd get a numeric scalar by addressing a single element of a SciPy68 array.64 just the subset of objects referenced by the array. Call the instance with 65 a single object ID and you'll get that object, just like you'd get a 66 numeric scalar by addressing a single element of a SciPy array. In either 67 case, you can supply a raveled list of the new values (objects, numeric 68 scalars, whatever) to be used instead of the instance's current objects. 69 69 70 70 You can add two like-dimensioned instances of me. Each element of the sum … … 98 98 self._I = s.arange(self._N).reshape(*self._shape) 99 99 100 def __call__(self, I): 100 def __call__(self, I, valueList=None): 101 if valueList is None: 102 valueList = self._O 101 103 if isinstance(I, int): 102 return self._O[I]104 return valueList[I] 103 105 newVersion = self.__class__(*I.shape) 104 106 for j, k in enumerate(I.ravel()): 105 newVersion._O[j] = self._O[k]107 newVersion._O[j] = valueList[k] 106 108 return newVersion 107 109 … … 152 154 For each of my objects, get the specified numeric attribute. 153 155 154 Returns the result (must be a numeric scalar) of each attribute value 155 in an array of the same shape as my object array. 156 Returns the result of each attribute value in an array of the same 157 shape as my object array. 158 159 If any value is not a numeric scalar, the result will be another 160 instance of me with the results (of whatever type) as its objects. 156 161 """ 157 162 if name == 'shape': 158 163 return self._shape 159 x = s.empty(self.shape) 164 x = [] 165 allNumeric = True 160 166 for k in self._I.ravel(): 161 167 thisValue = getattr(self._O[k], name) 162 if not isinstance(thisValue, (int, long, float)): 163 raise ValueError( 164 "Attribute must have a numeric scalar value") 165 x.ravel()[k] = thisValue 166 return x 168 if allNumeric and not isinstance(thisValue, (int, long, float)): 169 allNumeric = False 170 x.append(thisValue) 171 if allNumeric: 172 return s.array(x).reshape(*self._shape) 173 return self(self._I, x) 167 174 168 175 def __setattr__(self, name, value): … … 196 203 object array. The results must all be numeric scalars. 197 204 198 TODO: If the result of each call is not a numeric scalar, the result199 will be another instance of me with the results (of whatever type) as200 itsobjects.205 If the result of any call is not a numeric scalar, the result will be 206 another instance of me with the results (of whatever type) as its 207 objects. 201 208 """ 202 209 indicesOfArrayArgs = [ 203 210 k for k, arg in enumerate(args) if s.shape(arg) == self.shape] 204 211 theseArgs = [arg for arg in args] 205 x = s.empty(self.shape) 212 x = [] 213 allNumeric = True 206 214 for j in self._I.ravel(): 207 215 for k in indicesOfArrayArgs: 208 216 theseArgs[k] = args[k].ravel()[j] 209 217 thisValue = getattr(self._O[j], methodName)(*theseArgs) 210 if not isinstance(thisValue, (int, long, float)): 211 raise ValueError( 212 "Called method must return a numeric scalar value") 213 x.ravel()[j] = thisValue 214 return x 218 if allNumeric and not isinstance(thisValue, (int, long, float)): 219 allNumeric = False 220 x.append(thisValue) 221 if allNumeric: 222 return s.array(x).reshape(*self._shape) 223 return self(self._I, x) 215 224 216 225 projects/AsynCluster/trunk/svpmc/pmc.py
r163 r164 1 # sv PMC: Stochastic Volatility Inference via Population Monte Carlo1 # svpmc: Stochastic Volatility Inference via Population Monte Carlo 2 2 # 3 3 # Copyright (C) 2007-2008 by Edwin A. Suominen, http://www.eepatents.com projects/AsynCluster/trunk/svpmc/project.py
r163 r164 1 # sv PMC: Stochastic Volatility Inference via Population Monte Carlo1 # svpmc: Stochastic Volatility Inference via Population Monte Carlo 2 2 # 3 3 # Copyright (C) 2008 by Edwin A. Suominen, http://www.eepatents.com … … 17 17 18 18 """ 19 19 Management of an overall SVpmc run. 20 20 """ 21 21 22 22 import os.path, re 23 23 import scipy as s 24 25 import tseries, params 24 from Scientific.IO import NetCDF 25 26 import tseries, params, model 26 27 27 28 28 29 class ProjectManager(object): 29 30 """ 30 Instantiate me with the path of a file that defines a project 31 specification. Then you can call my two public methods to get what a model 32 needs to run the project. 31 Instantiate me with the path of a file within a project directory that 32 defines a project specification and the name of a NetCDF file to be created 33 in the project directory. If no NetCDF file is specified, one will be 34 created with the name 'svpmc.nc'. 35 36 You can specify the PMC population size with the keyword I{m}. It defaults 37 to 10,000. 38 39 @ivar n: The number of samples. 40 41 @ivar p: The number of time series. 42 43 @ivar xcorrs: The number of cross-correlations between series, computed as 44 C{(p**2 + p)/2} 45 46 @ivar m: The number of population members in each population monte carlo 47 iteration. 48 33 49 """ 34 50 re_dashes = re.compile("^\-{10,}$") 35 51 re_xy = re.compile("([0-9]+),([0-9]+)") 36 37 def __init__(self, specFile, ncFile="params.nc"): 38 self.specDir = os.path.dirname(specFile) 52 dimensions = { 53 'n': 'sample', 54 'p': 'series', 55 'xcorrs': 'cross_correlation', 56 'm': 'member', 57 } 58 59 def __init__(self, specFile, ncFileName="svpmc.nc", m=10000): 60 self.m = m 61 self.iteration = 0 62 specDir = os.path.dirname(specFile) 39 63 self.tables = self._parseSpec(specFile) 40 self.cdf = NetCDF.NetCDFFile(os.path.join(self.specDir, ncFile), 'w') 41 self.tsList, tsDescriptions = self._timeSeries() 42 # TODO 43 44 def _get_paramNames(self): 45 """ 46 Returns the names of the parameter sequence for my project. 47 """ 48 return [row[0] for row in self.tables['parameter']] 49 paramNames = property(_get_paramNames) 64 tsList, seriesTitles = self._setupTimeSeries(specDir) 65 paramTitles, dimensions = self._setupParams() 66 self._setupCDF( 67 os.path.join(specDir, ncFile), tsList, seriesTitles, paramTitles) 68 self.mgr = model.ModelManager(self.paramNames, tsList) 50 69 51 70 def _parseSpec(self, filePath): … … 74 93 return tables 75 94 76 def _timeSeries(): 77 """ 78 Returns a list of L{tseries.TimeSeries} objects loaded and transformed 79 as defined in my specification along with a dict of descriptions for 80 the time series, keyed by name. 81 """ 82 tsList = [] 83 descriptionDict = {} 84 for name, filePath, transform, description in self.tables['series']: 85 filePath = os.path.join(self.specDir, filePath) + ".dat" 86 tsObject = tseries.TimeSeries(name, filePath) 95 def _setupTimeSeries(self, fileDir): 96 """ 97 Given the specification defined in my I{tables} dict, loads a list of 98 L{tseries.TimeSeries} objects from '.dat' files in the specified 99 directory. 100 101 Returns a list of the objects along with a list of the time series 102 titles. 103 """ 104 prevObj = None 105 tsList, titles = [], [] 106 for series, transform, title in self.tables['series']: 107 filePath = os.path.join(fileDir, series) + ".dat" 108 tsObject = tseries.TimeSeries(title, filePath) 87 109 for methodName in transform.split(','): 88 110 getattr(tsObject, methodName)() 111 if prevObj: 112 tsObject = tsObject.intersect(prevObj) 89 113 tsList.append(tsObject) 90 descriptionDict[name] = description 91 return tsList, descriptionDict 92 93 def _priorContainer(self): 94 """ 95 Returns an instance of L{params.PriorContainer} with 96 L{params.FlexArray} objects loaded with a L{params.Prior} object for 97 each array element of each of my parameters, along with a dict of 98 descriptions for the parameters, keyed by name. 99 """ 100 descriptionDict = {} 101 p = len(self.tables['series']) 102 container = params.PriorContainer() 103 for name, dims, description in self.tables['parameter']: 104 shape = eval(dims) 105 if not isinstance(shape, tuple): 106 shape = (shape,) 107 shape = [int(x) for x in shape] 108 pa = container.priorArray(name, *shape) 114 titles.append(title) 115 prevObj = tsObject 116 return tsList, titles 117 118 def _setupParams(self): 119 """ 120 Given the specification defined in my I{tables} dict, constructs an 121 instance of L{params.PriorContainer} with L{params.FlexArray} objects 122 loaded with a L{params.Prior} object for each array element of each of 123 my parameters. 124 125 Sets my I{priorContainer} attribute to the container and returns a list 126 of the parameter titles and lists of dimensions compatible with my 127 NetCDF specification. 128 """ 129 def parseDims(dims): 130 shape = [] 131 dimSequence = [] 132 for dim in dims.split(','): 133 if dim not in self.dimensions: 134 raise ValueError( 135 "Invalid dimension specification '%s'" % dims) 136 shape.append(getattr(self, dim)) 137 dimSequence.append(self.dimensions[dim]) 138 dimensions.append(dimSequence) 139 return tuple(shape) 140 141 def constructPriors(priorSpec, shape): 142 index = priorSpec[0] 143 kw = {'dname':priorSpec[1]} 144 for k, name in enumerate(('loc', 'scale', 'a', 'b')): 145 stringValue = priorSpec[k+2] 146 if stringValue != '': 147 kw[name] = float(stringValue) 148 for j in xrange(shape[0]): 149 if len(shape) == 1: 150 if index.isdigit() and j != int(index): 151 continue 152 pa[j] = params.Prior(**kw) 153 else: 154 for k in xrange(shape[1]): 155 if index == "I" and j != k: 156 continue 157 if index != ":": 158 match = self.re_xy.match(index) 159 if match and match.group(0) != "%d,%d" % (j,k): 160 continue 161 pa[j,k] = params.Prior(**kw) 162 163 self.paramNames = [] 164 titles, dimensions = [], [] 165 p = self.p = len(self.tables['series']) 166 xcorrs = self.xcorrs = int(0.5*(p**2 + p)) 167 self.paramContainer = params.PriorContainer() 168 for name, dims, title in self.tables['parameter']: 169 shape = parseDims(dims) 170 pa = self.paramContainer.priorArray(name, *shape) 109 171 for priorSpec in self.tables[name]: 110 index = priorSpec[0] 111 kw = {'dname':priorSpec[1]} 112 for k, name in enumerate(('loc', 'scale', 'a', 'b')): 113 stringValue = priorSpec[k+2] 114 if stringValue != '': 115 kw[name] = float(stringValue) 116 for j in xrange(shape[0]): 117 if len(shape) == 1: 118 if index.isdigit() and j != int(index): 119 continue 120 pa[j] = params.Prior(**kw) 121 else: 122 for k in xrange(shape[1]): 123 if index == "I" and j != k: 124 continue 125 if index != ":": 126 match = self.re_xy.match(index) 127 if match and match.group(0) != "%d,%d" % (j,k): 128 continue 129 pa[j,k] = params.Prior(**kw) 130 descriptionDict[name] = description 131 return container, descriptionDict 172 constructPriors(priorSpec, shape) 173 titles.append(title) 174 self.paramNames.append(name) 175 return titles, dimensions 176 177 def _setupCDF(self, filePath, tsList, sTitles, pTitles, pDims): 178 """ 179 """ 180 tsData = s.row_stack([ts() for ts in tsList]) 181 self.n = tsData.shape[1] 182 cdf = self.cdf = NetCDF.NetCDFFile(filePath, 'w') 183 cdf.title = self.tables['project'][0][0] 184 # Dimensions 185 for attrName, name in self.dimensions.iteritems(): 186 cdf.createDimension(name, getattr(self, attrName)) 187 cdf.createDimension("iteration", None) 188 # Time Series 189 obs = cdf.createVariable( 190 "observations", 'f', ('series', 'sample')) 191 obs.long_name = "Observations, %d time series " % self.p +\ 192 "with %d samples each" % self.n 193 for k, title in enumerate(sTitles): 194 obs[k,:] = tsData[k,:].astype('f') 195 setattr(obs, "series_%02d" % (k,), title) 196 # Last-Sample Log-Volatility 197 logVol = cdf.createVariable( 198 "log_volatility", 'f', ('iteration', 'member', 'series')) 199 logVol.long_name = "Last Sample of Simulated Log-Volatilities" 200 # Parameters 201 for k, name in enumerate(self.paramNames): 202 pDim = ['iteration', 'member'] + pDims[k] 203 param = cdf.createVariable(name, 'f', tuple(pDim)) 204 param.long_name = pTitles[k] 205 # Log-Likelihood 206 L = cdf.createVariable("log_likelihood", 'f', ('iteration', 'member')) 207 L.long_name = "Log-Likelihood of Observations Given Parameters" 208 # Done setting up, save what we've got thus far 209 cdf.sync() 210 return cdf 211 212 def writeParams(self, parameters): 213 """ 214 Write the supplied population of I{parameters}, housed in a 215 L{param.FlexArray} instance containing L{param.ParameterContainer} 216 objects, as the latest iteration of the open CDF record. 217 """ 218 if len(parameters) != self.m: 219 raise ValueError( 220 "Each iteration record must have %d parameters" % self.m) 221 for name in self.paramNames: 222 var = self.cdf.variables[name] 223 for member, paramArray in enumerate(getattr(parameters, name)): 224 var[self.iteration, member, :] = paramArray.astype('f') 225 self.iteration += 1 226 227 228 projects/AsynCluster/trunk/svpmc/sample.py
r154 r164 1 # sv PMC: Stochastic Volatility Inference via Population Monte Carlo1 # svpmc: Stochastic Volatility Inference via Population Monte Carlo 2 2 # 3 3 # Copyright (C) 2007-2008 by Edwin A. Suominen, http://www.eepatents.com projects/AsynCluster/trunk/svpmc/test/project-spec.txt
r161 r164 19 19 20 20 21 Project 22 ------------------------------------------------------------------------------- 23 Foreign Currency Returns over U.S. Dollar 21 24 22 Series File Transform Description 25 26 Series Transform Title 23 27 ------------------------------------------------------------------------------- 24 dm dm-us v2r Deutchmark vs. USD25 bpus-bp inv,v2r British Pound vs. USD26 jy jy-us v2r Japanese Yen vs. USD28 dm-us v2r Deutchmark vs. USD 29 us-bp inv,v2r British Pound vs. USD 30 jy-us v2r Japanese Yen vs. USD 27 31 28 32 29 33 30 Parameter Dimensions Description34 Parameter Dimensions Title 31 35 ------------------------------------------------------------------------------- 32 36 a p Drift 33 37 b p,p Lag-1 cross-correlation for VAR of returns 34 cr 0.5*(p**2+p)Innovation shock cross-correlations38 cr xcorrs Innovation shock cross-correlations 35 39 d p Volatility offset 36 40 e p,p Lag-1 cross-correlation for VAR of volatilities 37 41 fs p Volatility shock standard deviations 38 fr 0.5*(p**2+p)Volatility shock cross-correlations42 fr xcorrs Volatility shock cross-correlations 39 43 g p Volatility/innovation shock correlations 40 44 projects/AsynCluster/trunk/svpmc/test/test_sample.py
r151 r164 1 # sv PMC: Stochastic Volatility Inference via Population Monte Carlo1 # svpmc: Stochastic Volatility Inference via Population Monte Carlo 2 2 # 3 3 # Copyright (C) 2007-2008 by Edwin A. Suominen, http://www.eepatents.com projects/AsynCluster/trunk/svpmc/test/test_tseries.py
r160 r164 1 # sv PMC: Stochastic Volatility Inference via Population Monte Carlo1 # svpmc: Stochastic Volatility Inference via Population Monte Carlo 2 2 # 3 3 # Copyright (C) 2007-2008 by Edwin A. Suominen, http://www.eepatents.com projects/AsynCluster/trunk/svpmc/test/test_weave.py
r147 r164 1 # sv PMC: Stochastic Volatility Inference via Population Monte Carlo1 # svpmc: Stochastic Volatility Inference via Population Monte Carlo 2 2 # 3 3 # Copyright (C) 2007-2008 by Edwin A. Suominen, http://www.eepatents.com projects/AsynCluster/trunk/svpmc/tseries.py
r160 r164 1 # sv PMC: Stochastic Volatility Inference via Population Monte Carlo1 # svpmc: Stochastic Volatility Inference via Population Monte Carlo 2 2 # 3 3 # Copyright (C) 2007-2008 by Edwin A. Suominen, http://www.eepatents.com projects/AsynCluster/trunk/svpmc/weave.py
r154 r164 1 # sv PMC: Stochastic Volatility Inference via Population Monte Carlo1 # svpmc: Stochastic Volatility Inference via Population Monte Carlo 2 2 # 3 3 # Copyright (C) 2008 by Edwin A. Suominen, http://www.eepatents.com
