Changeset 160
- Timestamp:
- 04/26/08 23:27:47 (7 months ago)
- Files:
-
- projects/AsynCluster/trunk/svpmc/model.py (modified) (14 diffs)
- projects/AsynCluster/trunk/svpmc/params.py (modified) (3 diffs)
- projects/AsynCluster/trunk/svpmc/test/project-spec.txt (added)
- projects/AsynCluster/trunk/svpmc/test/test_model.py (modified) (8 diffs)
- projects/AsynCluster/trunk/svpmc/test/test_tseries.py (modified) (3 diffs)
- projects/AsynCluster/trunk/svpmc/test/util.py (modified) (1 diff)
- projects/AsynCluster/trunk/svpmc/tseries.py (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
projects/AsynCluster/trunk/svpmc/model.py
r159 r160 20 20 """ 21 21 22 import os.path 22 import os.path, re 23 23 import scipy as s 24 24 from scipy import linalg … … 31 31 from twisted_goodies.pybywire.pack import Packer, Unpacker 32 32 33 import sample, weave, params33 import tseries, sample, weave, params 34 34 35 35 … … 51 51 class SpecParser(object): 52 52 """ 53 """ 54 def __init__(self, projectSpec): 55 self.spec = projectSpec 53 Instantiate me with the path of a file that defines a project 54 specification. Then you can call my two public methods to get what a model 55 needs to run the project. 56 """ 57 re_dashes = re.compile("^\-{10,}$") 58 re_xy = re.compile("([0-9]+),([0-9]+)") 59 60 def __init__(self, specFile): 61 self.specDir = os.path.dirname(specFile) 62 self.tables = {} 63 Nf = None 64 linePrev = "" 65 fh = open(specFile) 66 for line in fh: 67 line = line.strip() 68 if line.startswith('#'): 69 continue 70 if not line: 71 Nf = None 72 elif Nf: 73 fieldList = line.split() 74 fieldList += [''] * (Nf - len(fieldList)) 75 fieldList[Nf-1] = " ".join(fieldList[Nf-1:]) 76 rows.append(fieldList[:Nf]) 77 elif self.re_dashes.match(line) and linePrev: 78 fields = linePrev.split() 79 Nf = len(fields) 80 rows = self.tables[fields[0].lower()] = [] 81 else: 82 linePrev = line 83 fh.close() 84 85 def paramNames(self): 86 """ 87 Returns the names of the parameter sequence for my project. 88 """ 89 return [row[0] for row in self.tables['parameter']] 90 91 def timeSeries(self): 92 """ 93 Returns a list of L{tseries.TimeSeries} objects loaded and transformed 94 as defined in my specification. 95 """ 96 tsList = [] 97 for name, filePath, transform, description in self.tables['series']: 98 filePath = os.path.join(self.specDir, filePath) + ".dat" 99 tsObject = tseries.TimeSeries(name, filePath) 100 for methodName in transform.split(','): 101 getattr(tsObject, methodName)() 102 tsList.append(tsObject) 103 return tsList 56 104 57 105 def priorContainer(self): 58 106 """ 59 """ 60 61 def timeSeries(self): 62 """ 63 """ 64 107 Returns an instance of L{params.PriorContainer} with 108 L{params.FlexArray} objects loaded with a L{params.Prior} object for 109 each array element of each of my parameters. 110 """ 111 p = len(self.tables['series']) 112 container = params.PriorContainer() 113 for name, dims, description in self.tables['parameter']: 114 shape = eval(dims) 115 if not isinstance(shape, tuple): 116 shape = (shape,) 117 shape = [int(x) for x in shape] 118 pa = container.priorArray(name, *shape) 119 for priorSpec in self.tables[name]: 120 index = priorSpec[0] 121 kw = {'dname':priorSpec[1]} 122 for k, name in enumerate(('loc', 'scale', 'a', 'b')): 123 stringValue = priorSpec[k+2] 124 if stringValue != '': 125 kw[name] = float(stringValue) 126 for j in xrange(shape[0]): 127 if len(shape) == 1: 128 if index.isdigit() and j != int(index): 129 continue 130 pa[j] = params.Prior(**kw) 131 else: 132 for k in xrange(shape[1]): 133 if index == "I" and j != k: 134 continue 135 if index != ":": 136 match = self.re_xy.match(index) 137 if match and match.group(0) != "%d,%d" % (j,k): 138 continue 139 pa[j,k] = params.Prior(**kw) 140 return container 141 65 142 66 143 class ModelManager(object): … … 70 147 I handle calls to the model object in either local or remote mode. Calls in 71 148 local mode are run through an instance of L{NullQueue} in the main thread. 149 150 Instantiate me with the text of a specification for the project and model 151 I'm going to manage. 152 72 153 """ 73 154 remoteMode = False … … 242 323 var = property(_get_var) 243 324 244 def _get_rv(self):245 if 'rv' not in self.cache:246 # Concurrent cross-correlations between volatility shocks247 self.cache['rv'] = s.matrix(linalg.cholesky(248 self.covarMatrix(self.fs, self.fr), lower=True))249 return self.cache['rv']250 rv = property(_get_rv)251 252 325 253 326 #--- Support -------------------------------------------------------------- … … 258 331 standard deviations and I{cr} cross-correlations. 259 332 260 The I{cs} argument is in the form s0, s1,..., sp. The I{cr} argument is 261 in the form r00, r01, r02,..., r0p, r11, r12,..., r1p,..., rpp 333 The I{cs} argument is in the form s0, s1,..., sp. It must have C{p} 334 elements. 335 336 The I{cr} argument must have C{0.5*(p**2 + p)} elements and is in the 337 form r00, r01, r02,..., r0p, r11, r12,..., r1p,..., rpp 262 338 """ 263 339 i = 0 … … 270 346 i += 1 271 347 return cv 272 348 273 349 def decaySamples(self, tol): 274 350 """ … … 345 421 ri = linalg.inv( 346 422 linalg.cholesky( 347 self.covarMatrix(s elf.cs, self.cr), lower=True))423 self.covarMatrix(s.ones(self.p), self.cr), lower=True)) 348 424 self.cache['lv_work'] = y, ri 349 425 # Run the C code where the heavy lifting is done … … 351 427 return L0, L, h[:,1:] 352 428 353 def hybridGibbs(self, z, x, sigma, tol=1E-4):429 def hybridGibbs(self, z, x, sigma, rv, tol=1E-4): 354 430 """ 355 431 Does one iteration of a hybrid Gibbs sampler for the log-volatilities, … … 365 441 """ 366 442 L_total = 0 367 acceptances = 0368 rv = self.rv369 443 N = self.decaySamples(tol) 370 444 # Initialize volatility shocks from cross-correlation of the IID 371 # variates... 372 v = s.array(rv * z) 445 # variates, if positive definite... 446 v = s.array(rv*z) 447 if v is None: 448 # (If not, return worst possible likelihood for this illegal fs, fr 449 # parameter combination) 450 return 373 451 # ...and initial log-volatilites, from the offset alone... 374 452 h0 = self.d … … 376 454 # with their volatility shocks 377 455 zp = self.zProposal(z, sigma) 378 vp = s.array(rv *zp)456 vp = s.array(rv*zp) 379 457 # Do conditional draws of each sample in turn 380 458 for k in xrange(self.n): … … 389 467 dL = LVp[1] - LV[1] 390 468 if dL > 0 or self.logUniform() < dL: 391 acceptances += 1392 469 LV = LVp 393 470 z[:,k] = zp[:,k] … … 396 473 h0 = LV[2][:,0] 397 474 L_total += LV[0] 398 return L_total , float(acceptances)/self.n475 return L_total 399 476 400 477 … … 439 516 # ...including the latent parameter of IID normal variates 440 517 z = paramContainer.z.copy() 518 # Concurrent cross-correlations between volatility shocks, bail out 519 # with worst possible likelihood if invalid 520 try: 521 rv = s.matrix(linalg.cholesky( 522 self.covarMatrix(self.fs, self.fr), lower=True)) 523 except linalg.LinAlgError: 524 return -s.inf, z 441 525 # Innovations as residuals of the reversed VAR(1) 442 526 x = self.var.reverse(self.a, self.b) 527 # Run the hybrid Gibbs rounds, returning the likelihood from the last 528 # one along with the state of the IID variate vector at that point 443 529 for k in xrange(self.Mv-1): 444 self.hybridGibbs(z, x, sigma)445 return self.hybridGibbs(z, x, sigma)[0], z446 530 self.hybridGibbs(z, x, rv, sigma) 531 return self.hybridGibbs(z, x, rv, sigma), z 532 projects/AsynCluster/trunk/svpmc/params.py
r159 r160 32 32 33 33 PARAM_NAMES = [ 34 # Approximately p*(3+2.5) parameters total, e.g., 125 for p=534 # Total parameters: 3*p**2 + 6*p 35 35 #------------------------------------------------------------ 36 36 'a', # Drift (p x 1) 37 37 38 38 'b', # Lag-1 cross-correlation for VAR of returns (p x p) 39 40 'cs', # Innovation shock standard deviations (vector with s0, s1,..., sp)41 39 42 40 'cr', # Innovation shock cross-correlation coefficients (vector with … … 160 158 paramNames = PARAM_NAMES 161 159 162 def __init__(self, paramSpec): 163 # TODO 164 pass 160 def priorArray(self, paramName, *shape): 161 array = getattr(self, paramName, None) 162 if array is None: 163 array = FlexArray(*shape) 164 setattr(self, paramName, array) 165 return array 165 166 166 167 … … 172 173 For all underlying dist objects, you must also specify I{loc} and I{scale} as 173 174 additional parameters. Some of the objects also require I{a} and I{b}. 175 176 The exception is that you can use 'constant' as a distribution name. Then 177 the only pertinent parameter is the constant, unvarying I{loc}. 174 178 """ 175 179 paramNames = ['dname', 'loc', 'scale', 'a', 'b'] 180 181 class Constant(object): 182 def __init__(self, loc): 183 self.loc = loc 184 def pdf(self, x): 185 return 1.0 * (x == self.loc) 186 def rvs(self, N): 187 return self.loc * s.ones(N) 188 def ppf(self, *args): 189 return 0.0 176 190 177 191 def _get_distObj(self): 178 192 if 'dist' not in self.cache: 179 if not hasattr(distributions, self.dname): 193 if self.dname == 'constant': 194 self.cache['dist'] = self.Constant(self.loc) 195 elif not hasattr(distributions, self.dname): 180 196 raise ImportError( 181 197 "No distribution '%s' in scipy.stats.distributions" \ 182 198 % self.dname) 183 distClass = getattr(distributions, self.dname) 184 args = [] 185 for xName in ('a', 'b'): 186 xValue = getattr(self, xName, None) 187 if xValue is not None: 188 args.append(xValue) 189 kw = {'loc':self.loc, 'scale':self.scale} 190 self.cache['dist'] = distClass(*args, **kw) 199 else: 200 distClass = getattr(distributions, self.dname) 201 args = [] 202 for xName in ('a', 'b'): 203 xValue = getattr(self, xName, None) 204 if xValue is not None: 205 args.append(xValue) 206 kw = {'loc':self.loc, 'scale':self.scale} 207 self.cache['dist'] = distClass(*args, **kw) 191 208 return self.cache['dist'] 192 209 distObj = property(_get_distObj) projects/AsynCluster/trunk/svpmc/test/test_model.py
r159 r160 20 20 """ 21 21 22 import os.path 22 23 import scipy as s 23 24 from scipy import stats, random, linalg, signal … … 33 34 34 35 VERBOSE = True 36 37 38 class Test_SpecParser(util.TestCase): 39 def setUp(self): 40 self.sp = model.SpecParser( 41 os.path.join(os.path.dirname(__file__), "project-spec.txt")) 42 43 def test_parse(self): 44 paramNames = [row[0] for row in self.sp.tables['parameter']] 45 for name, rows in self.sp.tables.iteritems(): 46 if name == 'series': 47 self.failUnlessEqual(len(rows), 3) 48 elif name == 'parameter': 49 self.failUnlessEqual(len(rows), 8) 50 else: 51 self.failUnless(name in paramNames) 52 53 def test_timeSeries(self): 54 tsList = self.sp.timeSeries() 55 self.failUnlessEqual(len(tsList), 3) 56 57 def test_priorContainer(self): 58 container = self.sp.priorContainer() 59 self.fail("TODO") 35 60 36 61 … … 134 159 y = s.row_stack([ 135 160 util.TS_LIST[k].intersect(util.TS_LIST[1-k])() for k in (0,1)]) 161 162 def rv(self, fs, fr): 163 return s.matrix(linalg.cholesky( 164 self.model.covarMatrix(fs, fr), lower=True)) 136 165 137 166 def _check_uncorr(self, x, y, plot=True): … … 351 380 self.model.g = s.array([0.0]) 352 381 z = s.randn(self.model.p, self.model.n) 353 rv = s.matrix(linalg.cholesky( 354 self.model.covarMatrix(self.model.fs, self.model.fr), lower=True)) 355 v = s.array(rv * z) 382 v = s.array(self.rv(self.model.fs, self.model.fr) * z) 356 383 return self.model.logVolatilities( 357 384 z, v, s.zeros_like(z), self.model.d.copy())[2] … … 365 392 self.model.d = s.array([0.0]) 366 393 self.model.e = s.array([[e]]) 367 self.model.fs = [1.0]368 self.model.fr = []369 394 self.model.g = s.array([0.0]) 370 395 z = s.randn(1, self.model.n) … … 563 588 'd': [0], 564 589 'e': [[0]], 565 'fs': [ 1.0],590 'fs': [0.5], 566 591 'fr': [], 567 592 'g': [corr], … … 578 603 self.N).transpose() 579 604 self.h = signal.lfilter( 580 [1], [1.0, -self.model.e[0][0]], self. iv[1,:])605 [1], [1.0, -self.model.e[0][0]], self.model.fs[0]*self.iv[1,:]) 581 606 x = s.exp(0.5 * self.h) * self.iv[0,:] 582 607 # Instantiate a model to be tested and set its parameters … … 586 611 587 612 def _runHG(self, N, sigma): 613 L = [] 588 614 z = s.randn(1, self.N) 589 L = []615 rv = self.rv(self.model.fs, self.model.fr) 590 616 for k in xrange(N): 591 L_this , acceptRate = self.model.hybridGibbs(z, self.x, sigma)617 L_this = self.model.hybridGibbs(z, self.x, rv, sigma) 592 618 L.append(L_this) 593 print "%03d: L=%6.1f, %d%% acceptance rate" \ 594 % (k, L_this, 100*acceptRate) 619 print "%03d: L=%6.1f" % (k, L_this) 595 620 h = self.model.logVolatilities( 596 621 z, z.copy(), self.x, self.model.d.copy())[-1] projects/AsynCluster/trunk/svpmc/test/test_tseries.py
r135 r160 96 96 class Test_TimeSeries(util.TestCase): 97 97 def setUp(self): 98 self.prices = tseries.TimeSeries('prices', data=PRICES)98 self.prices = tseries.TimeSeries('prices', text=PRICES) 99 99 100 100 def test_convertInPlace(self): … … 103 103 104 104 def test_v2r_earnings(self): 105 earnings = tseries.TimeSeries('earnings', data=EARNINGS).intersect(self.prices)105 earnings = tseries.TimeSeries('earnings', text=EARNINGS).intersect(self.prices) 106 106 self.prices.v2r(earnings=earnings()) 107 107 self.failUnlessAlmostEqual(self.prices()[1], s.log(1.02), places=3) … … 126 126 client to it. 127 127 """ 128 self.prices = tseries.TimeSeries('prices', data=PRICES)128 self.prices = tseries.TimeSeries('prices', text=PRICES) 129 129 return self.getReferenceToRoot(self.CopyableReturner(self.prices)) 130 130 projects/AsynCluster/trunk/svpmc/test/util.py
r146 r160 38 38 39 39 TS_LIST = [ 40 TimeSeries(os.path.join(os.path.dirname(__file__), "%s-us.dat" % x)).v2r() 40 TimeSeries( 41 x, os.path.join(os.path.dirname(__file__), "%s-us.dat" % x)).v2r() 41 42 for x in ('dm', 'jy')] 42 43 projects/AsynCluster/trunk/svpmc/tseries.py
r135 r160 159 159 160 160 def __init__( 161 self, name OrFilePath, data=None, j=None, k=None):162 self.name = name OrFilePath163 if data is None:164 data = FileLoader( nameOrFilePath)()165 elif isinstance(data, str):166 data = TextLoader( data)()161 self, name, filePath=None, text=None, data=None, j=None, k=None): 162 self.name = name 163 if filePath: 164 data = FileLoader(filePath)() 165 elif text: 166 data = TextLoader(text)() 167 167 params.Parameterized.__init__(self, data=data[slice(j, k),:]) 168 168 … … 264 264 which may be limited to the slice I{j:k}. 265 265 """ 266 return self.__class__(self.name, self.data.copy(), j,k)266 return self.__class__(self.name, data=self.data.copy(), j=j, k=k) 267 267 268 268 @transform
