Changeset 157
- Timestamp:
- 04/24/08 20:15:13 (9 months ago)
- Files:
-
- projects/AsynCluster/trunk/svpmc/model.c (modified) (5 diffs)
- projects/AsynCluster/trunk/svpmc/model.py (modified) (8 diffs)
- projects/AsynCluster/trunk/svpmc/test/test_model.py (modified) (8 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
projects/AsynCluster/trunk/svpmc/model.c
r156 r157 93 93 94 94 int k0, k1, k2; 95 double norm, normp, L, Lp ;95 double norm, normp, L, Lp, dL; 96 96 97 97 … … 103 103 // which we build a multivariate log-volatility proposal for this 104 104 // multivariate sample. 105 105 106 106 // For each time series... 107 107 for(k1=0; k1<Nz[0]; k1++) { … … 113 113 for(k2=0; k2<M; k2++) { 114 114 // Generate a uniform, symmetric, random walk proposal 115 normp = norm + wiggle * (drand48() - 0.5);115 normp = norm + (double)wiggle * (drand48() - 0.5); 116 116 // Do the ratio in log space 117 117 Lp = -0.5 * pow(normp, 2); … … 155 155 // The multivariate scratchpad y is used as follows, by column: 156 156 // ---------------------------------------------------------------------------- 157 // 0 2*(1-sigma**2) for bivariate normal probability density function158 // 1 0.5*log(pi*2*sigma**2) offset for bivariate normal pdf157 // 0 Bivariate norm PDF: sqrt(1-sigma**2) 158 // 1 Bivariate norm PDF, log-scaling: log(sqrt(2*pi)*sqrt(1-sigma**2)) 159 159 // 2 Innovation values after inverse-scaling by log-volatility 160 160 // 3 Inverse-scaled innovation values after decorrelation … … 218 218 // The log-likelihood for this sample is simply the argument of the 219 219 // exponential of the normal pdf, with the reciprocal scaling of the 220 // exponential being done in log space by subtraction. 221 L -= pow(Y2(k1, 3) / Y2(k1, 0), 2) + Y2(k1, 1); 220 // exponential being done in log space by subtraction of the log of the 221 // scaling coefficient 222 L -= 0.5 * pow(Y2(k1, 3) / Y2(k1, 0), 2) + Y2(k1, 1); 222 223 // Since the innovation has been inverse-scaled by the square root of the 223 224 // proposed volatility, the probability density needs to be scaled as projects/AsynCluster/trunk/svpmc/model.py
r156 r157 191 191 _logU_index = -1 192 192 193 keyAttrs = {'tsList':None, 'Mv':100, 'Mz':10 }193 keyAttrs = {'tsList':None, 'Mv':100, 'Mz':100} 194 194 paramNames = params.PARAM_NAMES 195 195 … … 223 223 var = property(_get_var) 224 224 225 def _get_rv(self): 226 if 'rv' not in self.cache: 227 # Concurrent cross-correlations between volatility shocks 228 self.cache['rv'] = s.matrix( 229 linalg.cholesky(self.covarMatrix(self.f), lower=True)) 230 return self.cache['rv'] 231 rv = property(_get_rv) 232 225 233 226 234 #--- Support -------------------------------------------------------------- … … 273 281 z = z.copy() 274 282 self.inline( 275 'z', 'M', 'wiggle', M=self.Mz, wiggle=sigma /s.sqrt(self.Mz))283 'z', 'M', 'wiggle', M=self.Mz, wiggle=sigma) 276 284 return z 277 285 … … 296 304 """ 297 305 # Mostly empty log-volatility array 298 h = s.column_stack([h0, s. empty_like(v)])306 h = s.column_stack([h0, s.zeros_like(v)]) 299 307 # Working arrays with intialized values 300 308 if 'lv_work' in self.cache: … … 303 311 # Scratchpad with some constants 304 312 y = s.empty((self.p, 4)) 305 y[:,0] = 2 *(1 - self.g**2)306 y[:,1] = 0.5 * s.log(s.pi*y[:,0])313 y[:,0] = s.sqrt(1 - self.g**2) 314 y[:,1] = s.log(s.sqrt(2*s.pi) * y[:,0]) 307 315 # Inverse of concurrent cross-correlations between innovation shocks 308 316 ri = linalg.inv( … … 313 321 return L0, L, h[:,1:] 314 322 315 def hybridGibbs(self, z, x, sigma ):323 def hybridGibbs(self, z, x, sigma, N=100): 316 324 """ 317 325 Does one iteration of a hybrid Gibbs sampler for the log-volatilities, … … 322 330 """ 323 331 L_total = 0 324 if 'hg_work' in self.cache: 325 rv, N = self.cache['hg_work'] 326 else: 327 # Concurrent cross-correlations between volatility shocks 328 rv = s.matrix( 329 linalg.cholesky(self.covarMatrix(self.f), lower=True)) 330 N = 100 # TODO: Use decaySamples() 331 self.cache['hg_work'] = rv, N 332 acceptances = 0 333 rv = self.rv 332 334 # Initialize volatility shocks from cross-correlation of the IID 333 335 # variates... 334 v = rv * z336 v = s.array(rv * z) 335 337 # ...and initial log-volatilites, from the offset alone... 336 h0 = self.d .copy()338 h0 = self.d 337 339 # ...and a set of random-walk proposals for the IID variates, along 338 340 # with their volatility shocks 339 341 zp = self.zProposal(z, sigma) 340 vp = rv * zp;342 vp = s.array(rv * zp) 341 343 # Do conditional draws of each sample in turn 342 344 for k in xrange(self.n): 343 # We need to compare the current and proposed log-volatilities for 344 # this sample, given the previous log-volatility sample 345 346 # Current 345 # We need to compare the current log-volatilities for 346 # this sample, given the previous log-volatility sample... 347 347 LV = self.logVolatilities(z[:,k:k+N], v[:,k:k+N], x[:,k:k+N], h0) 348 # Proposal348 # ...to a log-volatility proposal 349 349 zpk = s.column_stack([zp[:,k], z[:,k+1:k+N]]) 350 350 vpk = s.column_stack([vp[:,k], v[:,k+1:k+N]]) 351 351 LVp = self.logVolatilities(zpk, vpk, x[:,k:k+N], h0) 352 352 # Metropolis-Hastings accept/reject of the proposal 353 if LVp[1] > LV[1] or LVp[1] - LV[1] > self.logUniform(): 353 dL = LVp[1] - LV[1] 354 if dL > 0 or self.logUniform() < dL: 355 acceptances += 1 354 356 LV = LVp 355 z[ k] = zp[k]356 v[ k] = vp[k]357 z[:,k] = zp[:,k] 358 v[:,k] = vp[:,k] 357 359 # Update stuff for the next sample 358 h0 = LV[2][ 0]360 h0 = LV[2][:,0] 359 361 L_total += LV[0] 360 return L_total 362 return L_total, float(acceptances)/self.n 361 363 362 364 … … 401 403 # Innovations as residuals of the reversed VAR(1) 402 404 x = self.var.reverse(self.a, self.b) 403 for k in xrange(self.Mv): 404 L = self.hybridGibbs(z, x, sigma) 405 return L, z 405 for k in xrange(self.Mv-1): 406 self.hybridGibbs(z, x, sigma) 407 return self.hybridGibbs(z, x, sigma)[0], z 408 projects/AsynCluster/trunk/svpmc/test/test_model.py
r156 r157 246 246 247 247 248 class Test_Model_misc(util.TestCase): 249 def test_logUniform(self): 250 N = 35000 251 import timeit 252 tObserved = timeit.Timer( 253 "M.logUniform()", 254 "import model\nM = model.Model()").timeit(N) 255 tBaseline = timeit.Timer( 256 "s.log(s.rand())", 257 "import scipy as s").timeit(N) 258 percent = 100 * tObserved/tBaseline 259 if VERBOSE: 260 print "The efficient method took %2d%% as long as the stupid way" \ 261 % percent 262 self.failUnless(percent < 40) 263 264 def _check_normality(self, x): 265 p = stats.normaltest(x)[1] 266 if VERBOSE: 267 fig = self.fig 268 sp = fig.add_subplot(211) 269 sp.plot(x) 270 sp = fig.add_subplot(212) 271 sp.hist(x, bins=20) 272 self.failUnless( 273 p > 0.05, "Deviation from normality with significance p=%f" % p) 274 275 def test_zProposal(self): 276 N = 1000 277 z = s.zeros((2,3)) 278 modelObj = model.Model() 279 z1 = s.empty(N) 280 z2 = s.empty(N) 281 for k in xrange(N): 282 z = modelObj.zProposal(z, 0.5) 283 z1[k] = z[0,0] 284 z2[k] = z[1,1] 285 self._check_normality(z1[0.7*N:]) 286 self._check_normality(z2[0.7*N:]) 287 self.failUnless(stats.pearsonr(z1, z2)[1] > 0.05) 288 if VERBOSE: 289 self.fig.add_subplot(111).plot(z1, z2) 290 291 248 292 class Test_Model_logVolatilities_VAR(Multivariate_BaseCase): 249 293 def setUp(self): … … 252 296 def _draw_h(self): 253 297 self.model.c = s.array([0.0]) 254 self.model.g = s.array([0.0 ])298 self.model.g = s.array([0.0, 0.0]) 255 299 z = s.randn(self.model.p, self.model.n) 256 300 rv = s.matrix( … … 259 303 return self.model.logVolatilities( 260 304 z, v, s.zeros_like(z), self.model.d.copy())[2] 261 305 306 def test_univariate_LPF(self): 307 e = 0.95 308 self.model.tsList = [self.model.tsList[0]] 309 n = self.model.n 310 self.model.c = [] 311 self.model.d = s.array([0.0]) 312 self.model.e = s.array([[e]]) 313 self.model.f = [] 314 self.model.g = s.array([0.0]) 315 z = s.randn(1, self.model.n) 316 h = self.model.logVolatilities( 317 z, z.copy(), s.zeros_like(z), s.array([0.0]))[2] 318 # IIR filtering 319 fig = self.fig 320 sp = fig.add_subplot(211) 321 pxx = sp.psd(h[0,n/2:], NFFT=32)[0] 322 ratio = ( pxx[-1]/pxx[0] ) / ( 2*((1-e)/(1+e))**2 ) 323 self.failUnless(ratio > 0.4, "Too little rolloff in PSD") 324 self.failUnless(ratio < 1.5, "Too much rolloff in PSD") 325 sp = fig.add_subplot(212) 326 sp.plot(h[0,:]) 327 262 328 def test_indep_offset(self): 263 329 n = self.model.n … … 272 338 ratio = h[m,n/2:].mean() / ( 1.0/(1-em) ) 273 339 self.failUnlessAlmostEqual(ratio, 1.0, 0) 274 340 275 341 def test_indep_shocks(self): 276 342 n = self.model.n … … 322 388 323 389 324 class Test_Model_ decorrelate(Multivariate_BaseCase):390 class Test_Model_logVolatilities_decorrelate(Multivariate_BaseCase): 325 391 def setUp(self): 326 392 self.model = model.Model() 327 393 328 def test_basic(self): 329 n = 10000 330 for p in xrange(2, 4): 394 def _pGenerator(self, low, high): 395 for p in xrange(low, high): 331 396 # Tweak the model object to make it testable for this p value 332 397 self.model.tsList = [util.TS_LIST[0]] * p 333 for correlation in s.linspace(0.1, 0.9, 5): 334 # Generate some multivariate normal test data 335 r = s.maximum(s.identity(p), correlation*s.ones((p,p))) 336 print "\n\n", p 337 print r 338 x = random.multivariate_normal(s.zeros(p), r, n).transpose() 339 # Set the model's innovation shock correlations parameter 340 self.model.c = correlation * s.ones( 341 sum([p-k-1 for k in xrange(p)])) 342 # Get the decorrelated values from the model 343 y = self.model.decorrelate(x) 398 self.model.d = s.zeros(p) 399 self.model.e = s.zeros((p, p)) 400 self.model.g = s.zeros(p) 401 # Generate some bogus arrays 402 z = s.zeros((p, 1)) 403 v = s.zeros((p, 1)) 404 h0 = s.zeros((p, 1)) 405 # Yield a container with p and room for one or more 2-tuples 406 # containing a correlation parameter and arrays of innovations to 407 # decorrelate and test using that parameter value 408 container = [p] 409 yield container 410 # Use the logVolatilities method to decorrelate the innovations 411 # that the caller put into the container I just yielded 412 for self.model.c, x in container[1:]: 413 xd = s.zeros_like(x) 414 for k in xrange(x.shape[1]): 415 self.model.logVolatilities(z, v, x[:,k], h0) 416 y = self.model.cache['lv_work'][0] 417 xd[:,k] = y[:,3] 344 418 # Check for non-correlation 345 419 for j in xrange(p): … … 347 421 if j == k: 348 422 continue 349 self._check_uncorr(y[j,:], y[k,:], plot=False) 423 self._check_uncorr(xd[j,:], xd[k,:], plot=False) 424 425 def test_basic(self): 426 n = 1000 427 for pc in self._pGenerator(2, 4): 428 p = pc[0] 429 for correlation in s.linspace(0.1, 0.9, 5): 430 # Generate some multivariate normal test data 431 r = s.maximum(s.identity(p), correlation*s.ones((p,p))) 432 if VERBOSE: 433 print "\n\n", p 434 print r 435 c = correlation * s.ones(sum([p-k-1 for k in xrange(p)])) 436 x = random.multivariate_normal(s.zeros(p), r, n).transpose() 437 pc.append((c, x)) 350 438 351 439 def test_complex(self): 352 n = 10000 353 for p in xrange(2, 7): 440 n = 1000 441 for pc in self._pGenerator(2, 7): 442 p = pc[0] 354 443 # Stuff specific to this number of time series 355 444 N_correlations = sum([p-k-1 for k in xrange(p)]) 356 445 cMax = 0.90 / s.sqrt(p/1.5) 357 # Tweak the model object to make it testable for this p value358 self.model.tsList = [util.TS_LIST[0]] * p359 446 for i in xrange(10): 360 447 # Randomly generate a set of cross-correlations and set the 361 448 # model's innovation shock correlations parameter 362 self.model.c = 0.05 + cMax * s.rand(N_correlations)449 c = 0.05 + cMax * s.rand(N_correlations) 363 450 # Generate some multivariate normal test data having the 364 451 # generated correlations 365 r = self.model.nw.covarMatrix(self.model.c) 366 print "\n\n", p 367 print r 452 r = self.model.covarMatrix(c) 453 if VERBOSE: 454 print "\n\n", p 455 print r 368 456 x = random.multivariate_normal(s.zeros(p), r, n).transpose() 369 # Get the decorrelated values 370 y = self.model.decorrelate(x) 371 # Check for non-correlation 372 for j in xrange(p): 373 for k in xrange(p): 374 if j == k: 375 continue 376 self._check_uncorr(y[j,:], y[k,:], plot=False) 377 378 379 class Test_Model(Multivariate_BaseCase): 457 pc.append((c, x)) 458 459 460 class Model_BC_Mixin(object): 461 def _get_model(self): 462 if not hasattr(self, '_model'): 463 # Instantiate a model to be tested and set its parameters 464 if not hasattr(self, 'x'): 465 self.x = s.zeros(self.N) 466 self._model = model.Model(tsList=[tseries.TimeSeries( 467 'test', data=s.column_stack([s.arange(self.N), self.x]))]) 468 for name, value in self.params.iteritems(): 469 setattr(self._model, name, s.array(value)) 470 return self._model 471 model = property(_get_model) 472 473 474 class Test_Model_logVolatilities(Model_BC_Mixin, util.TestCase): 475 N = 100 476 params = { 477 'a': [0], 478 'b': [[0]], 479 'c': [], 480 'd': [0], 481 'e': [[0]], 482 'f': [], 483 'g': [0], 484 } 485 486 def _check_L(self, distObj, hValue): 487 args = [hValue * s.ones((1, self.N)) for k in (0, 1)] + \ 488 [None] + \ 489 [s.zeros(1)] 490 # Test 100 points from -8 to +8, most of them close to zero 491 for x0 in s.linspace(-2, +2, 100): 492 x0 = x0**3 493 args[2] = x0 * s.ones((1, self.N)) 494 L0, L, h = self.model.logVolatilities(*args) 495 self.failUnlessAlmostEqual(s.exp(L0), distObj.pdf(x0), 6) 496 self.failUnlessAlmostEqual(self.N*L0, L) 497 self.failUnless(s.equal(h, hValue).all()) 498 499 def test_hZeros(self): 500 self._check_L(stats.distributions.norm(0, 1), 0) 501 502 def test_hOnes(self): 503 self._check_L(stats.distributions.norm(0, s.sqrt(s.exp(1))), 1) 504 505 def test_hMinusTwos(self): 506 self._check_L(stats.distributions.norm(0, s.sqrt(s.exp(-2))), -2) 507 508 509 class Test_Model_hybridGibbs(Model_BC_Mixin, Multivariate_BaseCase): 380 510 N = 10000 381 511 corr = 0.0 … … 389 519 'g': [corr], 390 520 } 391 for key, value in params.iteritems(): 392 params[key] = s.array(value) 521 522 523 class Test_Model_likelihood(Model_BC_Mixin, Multivariate_BaseCase): 524 N = 500 525 corr = 0.0 526 params = { 527 'a': [0], 528 'b': [[0]], 529 'c': [], 530 'd': [0.0], 531 'e': [[0.95]], 532 'f': [], 533 'g': [corr], 534 } 393 535 394 536 class Mock_ParameterContainer(object): … … 403 545 return pc 404 546 405 def setUp(self): 406 # Parameters 547 def _setupModel(self, **kw): 548 # Any special model parameters 549 for name, value in kw.iteritems(): 550 setattr(self.model, name, s.array(value)) 407 551 # Simulate a single time series of data per the parameters 408 552 self.iv = random.multivariate_normal( 409 [0, 0], [[1.0, self.corr], [self.corr, 1.0]], self.N).transpose() 410 #--- DEBUG, sinusoidal variates for log-volatility -------------- 411 self.iv[1,:] = s.sin(s.linspace(0, 2*s.pi, self.N)) 412 #----------------------------------------------------------------- 413 h = signaltools.lfilter( 414 [1], [1.0, self.params['e'][0][0]], self.iv[1,:]) 415 self.x = s.exp(0.5*h) * self.iv[0,:] 553 [0, 0], 554 [[1.0, self.model.g[0]], [self.model.g[0], 1.0]], 555 self.N).transpose() 556 self.h = signaltools.lfilter( 557 [1], [1.0, -self.model.e[0][0]], self.iv[1,:]) 558 x = s.exp(0.5 * self.h) * self.iv[0,:] 416 559 # Instantiate a model to be tested and set its parameters 417 self.model = model.Model(tsList=[tseries.TimeSeries( 418 'test', data=s.column_stack([s.arange(self.N), self.x]))]) 419 for name, value in self.params.iteritems(): 420 setattr(self.model, name, value) 421 422 def test_likelihood(self): 423 N_plot = self.N 424 sigma = 0.05 425 self.model.n0 = 1000 426 paramContainer = self.pcFactory() 427 # DEBUG 428 paramContainer.latent = 0.0*self.iv[1,:] + 0.0*s.randn(1, self.N) 429 L, z, p = self.model.likelihood(paramContainer, sigma) 560 self.model.tsList=[tseries.TimeSeries( 561 'test', data=s.column_stack([s.arange(self.N), x]))] 562 self.x = s.row_stack([x]) 563 564 def _runHG(self, N, sigma): 565 z = s.randn(1, self.N) 566 L = [] 567 for k in xrange(N): 568 L_this, acceptRate = self.model.hybridGibbs(z, self.x, sigma) 569 L.append(L_this) 570 print "%03d: L=%6.1f, %d%% acceptance rate" \ 571 % (k, L_this, 100*acceptRate) 572 h = self.model.logVolatilities( 573 z, z.copy(), self.x, self.model.d.copy())[-1] 430 574 if VERBOSE: 431 575 fig = self.fig 432 vectors = (self. iv[1,:], self.x, p, z[0])576 vectors = (self.x[0,:], self.h, h[0,:]) 433 577 for k, vector in enumerate(vectors): 434 sp = fig.add_subplot(100*len(vectors) + k + 11) 435 sp.plot(vector[:N_plot]) 436 #self._check_corr(self.iv[1,:], z[0]) 578 spNumber = 100*(len(vectors)+1) + k + 11 579 sp = fig.add_subplot(spNumber) 580 sp.plot(vector) 581 fig.add_subplot(spNumber+1).plot(L) 582 self._check_corr(self.h, h[0,:]) 437 583 584 def test_hybridGibbs_highCorrelation(self): 585 self._setupModel(e=[[0.95]]) 586 # Sigma=0.5 seems to give us the supposedly ideal ~44% acceptance rate 587 self._runHG(50, 0.5) 588 589 def test_hybridGibbs_lowCorrelation(self): 590 self._setupModel(e=[[0.4]]) 591 self._runHG(50, 0.5) 592 593 def test_hybridGibbs_noCorrelation(self): 594 self._setupModel(e=[[0.0]]) 595 self._runHG(50, 0.5)
