Changeset 199
- Timestamp:
- 05/27/08 15:37:01 (8 months ago)
- Files:
-
- projects/AsynCluster/branches/metropolis-within-gibbs/svpmc/model.c (modified) (10 diffs)
- projects/AsynCluster/branches/metropolis-within-gibbs/svpmc/model.py (modified) (3 diffs)
- projects/AsynCluster/branches/metropolis-within-gibbs/svpmc/pmc.py (modified) (1 diff)
- projects/AsynCluster/branches/metropolis-within-gibbs/svpmc/test/test_model.py (modified) (5 diffs)
- projects/AsynCluster/trunk/doc/svpmc/example/svpmc.conf (modified) (1 diff)
- projects/AsynCluster/trunk/svpmc/model.c (modified) (11 diffs)
- projects/AsynCluster/trunk/svpmc/model.py (modified) (2 diffs)
- projects/AsynCluster/trunk/svpmc/project.py (modified) (1 diff)
- projects/AsynCluster/trunk/svpmc/test/test_model.py (modified) (4 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
projects/AsynCluster/branches/metropolis-within-gibbs/svpmc/model.c
r198 r199 273 273 // sc Constants for multivariate normal PDF 274 274 275 //--- Return value s (tuple) ---276 // 0 Lx: Log-likelihood of innovations given simulated log-volatilities277 // 1 Lh: Log-likelihood of simulated log-volatilities275 //--- Return value (double float) --- 276 // The linear likelihood P(x|w) of the innovations given the parameters, 277 // integrated over the ni log-volatility simulations 278 278 279 279 … … 290 290 int k0, k1, k2, k3; 291 291 double val; 292 double Lx = 0;293 double Lh = 0;294 292 295 293 // Multivariate innovation for one current time-series sample … … 301 299 double he[2][Nt]; 302 300 // Log probability density of the value of z for current and proposal 303 // evaluations 304 double Lz [2];301 // evaluations, working values 302 double Lzk[2]; 305 303 // Log-likelihood of the innovation for the first time-series sample in current 306 // and proposal evalutions 307 double Lxk[2] ;304 // and proposal evalutions, working values and accumulated for each iteration 305 double Lxk[2], Lx[ni]; 308 306 309 307 // A set of evaluation sample structs … … 311 309 // A struct for the model parameters 312 310 struct parameters P; 313 314 // The scalar results go here315 py::tuple result(2);316 311 317 312 // Generate parameter struct … … 328 323 } 329 324 330 331 325 // For each iteration... 332 326 for(ki=0; ki<ni; ki++) { 327 328 // Initialize this iteration's accumulator 329 Lx[ki] = 0; 333 330 334 331 // The "previous" log-volatility of the first time-series sample is set to … … 354 351 // evaluation struct for the next pair of evaluations. 355 352 for(k2=0; k2<2; k2++) { 356 Lz[k2] = 0;357 353 se[k2].Lx = 0; 354 Lzk[k2] = 0; 355 // Lxk[.] will be set to a value in se[.].Lx that has already been 356 // multivariate-accumulated, so it doesn't need clearing here 358 357 } 359 358 … … 379 378 // Store the log-density of z for the first time-series sample to 380 379 // compare current vs. proposal 381 Lz [k2] += normLogDensity(val);380 Lzk[k2] += normLogDensity(val); 382 381 } 383 382 PA1(se[k2].z, k3) = val; … … 411 410 // Metropolis-hastings selection of current or proposal using Lz+Lx for 412 411 // each evaluation 413 val = Lz [1] + se[1].Lx - Lz[0] - se[0].Lx;414 if(val > 0 ) {415 // Proposal accepted with probability p=1412 val = Lzk[1] + se[1].Lx - Lzk[0] - se[0].Lx; 413 if(val > 0 || log(uniform(0, 1)) < val) { 414 // Proposal accepted... 416 415 ke = 1; 417 } else if(log(uniform(0, 1)) < val) { 418 // Proposal accepted with probability Pr(zp) / Pr(z) 419 ke = 1; 420 Lh += val; 416 // ...overwrite z for this sample with the proposal on z 417 for(k1=0; k1<Nt; k1++) 418 Z2(k1, k0) = zp[k1]; 421 419 } else { 422 420 // Proposal rejected 423 421 ke = 0; 424 }425 426 // If the proposal was accepted, overwrite z for this sample with the427 // proposal on z428 if(ke == 1) {429 for(k1=0; k1<Nt; k1++)430 Z2(k1, k0) = zp[k1];431 422 } 432 423 … … 440 431 } 441 432 442 // Accumulate the log-likelihood of the innovation given the selected 443 // log-volatility evaluation 444 Lx += Lxk[ke]; 433 // The joint probability of the innovation and the variates underlying the 434 // log-volatility simulation, conditional on the parameters, is equal to 435 // the likelihood of the innovation, conditional on the simulated 436 // variates and the parameters, times the probability of the 437 // simulated variates, conditional on the parameters: 438 // 439 // P(x,z|w) = P(x|z,w) * P(z|w) 440 // 441 // P(z|w) is conditional on w because of the correlation between sequential 442 // z values due to the VAR(1) in volatility shocks. Fortunately, the way 443 // that we've simulated z allows us to just evaluate P(z) using the joint 444 // normal distribution. 445 446 // Accumulate Log(P(xk,zk|w) = Lxk + Lzk) to eventually get Log(P(x,z|w) 447 Lx[ki] += Lxk[ke] + Lzk[ke]; 445 448 446 449 // Done with log-volatility draw for this time-series sample … … 464 467 } 465 468 466 // The pair of results is the integrated log-likelihood of the innovations 467 // given the log-volatilities simulated over the iterations... 468 result[0] = Lx / ni; 469 // ...and the integrated log-density of the log-volatilities simulated over the 470 // iterations, conditional on the starting z 471 result[1] = Lh; 472 return_val = result; 469 // Compute the result, the integrated P(x|w) over the simulations on z 470 471 // Compute the maximum log density // and save it to subtract from everybody 472 // that the maximum log density is normalized to zero 473 val = -HUGE_VAL; 474 for(ki=0; ki<ni; ki++) { 475 if(Lx[ki] > val) 476 val = Lx[ki]; 477 } 478 // Accumulate the linearized normalized densities, borrowing Lzk[0] to do so 479 Lzk[0] = 0; 480 for(ki=0; ki<ni; ki++) 481 Lzk[0] += exp(Lx[ki] - val); 482 // ...and return the denormalized, log mean of the linear densities 483 return_val = log(Lzk[0]) + val - log(ni); projects/AsynCluster/branches/metropolis-within-gibbs/svpmc/model.py
r198 r199 129 129 # Unpack string result from remote call 130 130 result = list(pack.Unpacker(result)) 131 for k, name in enumerate(('Lx', ' Lh', 'z', 'h')):131 for k, name in enumerate(('Lx', 'z', 'h')): 132 132 setattr(paramContainer, name, result[k]) 133 133 return paramContainer … … 255 255 returns a tuple containing the following: 256 256 257 1. The log-likelihood of my observations given the simulated 258 log-volatility values, integrated over the simulation rounds. 259 260 2. The log-probability of the simulated log-volatilities, 261 integrated over the simulation rounds. 262 263 3. The IID normal variates underlying the simulated 257 1. The log-likelihood of my observations given the parameters, from 258 an integration of the join probability of the observations and 259 simulated log-volatilities over the simulation rounds. 260 261 2. The IID normal variates underlying the simulated 264 262 log-volatilities after the last simulation round. 265 263 266 4. The multivariate log-volatility value for the last time-series264 3. The multivariate log-volatility value for the last time-series 267 265 sample after the last simulation round. (Useful for 268 266 extrapolating from the fitted model.) … … 300 298 301 299 # Run the hybrid Gibbs rounds 302 Lx , Lh= self.inline(300 Lx = self.inline( 303 301 'z', 'h', 'x', 'w', 'ni', 304 302 'd', 'e', 'g', 'rz', 'ri', 'sc', 305 303 w=self.wiggle, ni=self.Ni) 306 return Lx, Lh,z, h304 return Lx, z, h projects/AsynCluster/branches/metropolis-within-gibbs/svpmc/pmc.py
r198 r199 198 198 def weight(paramContainer): 199 199 if s.isfinite(paramContainer.Lx): 200 L = paramContainer.Lx + paramContainer.Lp \ 201 - paramContainer.Lj - paramContainer.Lh 200 L = paramContainer.Lx + paramContainer.Lp - paramContainer.Lj 202 201 info = " ".join([ 203 202 "%+12.2f" % (getattr(paramContainer, x),) 204 for x in ('Lx', 'Lp', 'Lj' , 'Lh')])203 for x in ('Lx', 'Lp', 'Lj')]) 205 204 else: 206 205 info = "" projects/AsynCluster/branches/metropolis-within-gibbs/svpmc/test/test_model.py
r198 r199 609 609 inlineVars.update({'x':2*s.ones((1,Ns)), 'z':s.zeros((1, Ns))}) 610 610 modelObj = model.Model() 611 print "\n k Lx LhSimulated Volatility Shocks"611 print "\n k Lx Simulated Volatility Shocks" 612 612 print "-"*100 613 613 for k in xrange(Nr): 614 Lx , Lh= modelObj.inlineDirect(614 Lx = modelObj.inlineDirect( 615 615 modelObj.code['likelihood'], **inlineVars) 616 616 zLine = " ".join(["%+4.2f" % xx for xx in inlineVars['z'][0,:]]) 617 print "%3d: %+6.1f % +7.1f%s%s" % (k, Lx, Lh, " "*5, zLine)617 print "%3d: %+6.1f %s%s" % (k, Lx, " "*5, zLine) 618 618 619 619 def _simRounds(self, Nr, Ns, inlineVars, name, values): … … 636 636 """ 637 637 With univariate, Ni=20 is significantly better than Ni=10, but not 638 significantly worse than Ni=40. 638 significantly worse than Ni=40. Tested with wiggles of 0.5 and 1.0, and 639 eValue=0.95. 639 640 """ 640 641 Nr = 1000 641 wiggle = 1.0642 wiggle = 0.5 642 643 eValue = 0.95 643 644 Ns = 1000 … … 646 647 647 648 def test_highCorrelation_c(self): 648 wiggle = 1.0649 wiggle = 0.5 649 650 eValue = 0.97 650 651 Ns, Ni = 200, 20 … … 654 655 655 656 def test_highCorrelation_model(self): 656 wiggle = 1.0657 wiggle = 0.5 657 658 eValue = 0.97 658 659 Ns, Ni = 1000, 20 … … 661 662 pc.z = s.zeros((1, Ns)) 662 663 modelObj.y = self._x_to_y(x) 663 Lx, Lh,z, hn = modelObj.likelihood(pc)664 Lx, z, hn = modelObj.likelihood(pc) 664 665 self._plot(x, (iv[1,:], z[0,:]), (h, self._z_to_h(z[0,:], eValue))) projects/AsynCluster/trunk/doc/svpmc/example/svpmc.conf
r196 r199 27 27 Variable Value Description 28 28 ------------------------------------------------------------------------------- 29 D 6 Number of jump deviations in the D-kernel sim 30 Df 0.2 Fractional value of each dev from prev (or 1.0) 29 D 5 Number of jump deviations in the D-kernel sim 30 Ds 0.1 Starting jump deviation value 31 Df 0.2 Fractional value of each dev from prev (or start) 31 32 wiggle 0.5 Range +/- of uniform random walks on z for h sim 32 N1 30 Proposals on z for each importance-sampling of h33 N1 10 Proposals on z for each importance-sampling of h 33 34 N2 20 Hybrid-Gibbs iterations for h sim 34 35 projects/AsynCluster/trunk/svpmc/model.c
r196 r199 86 86 // ---------------------------------------------------------------------------- 87 87 88 // Run evaluations until odds of error in selection between proposals null 89 // proposal on z will be less than 1/100. 90 #define MIN_DIFF 0.01 91 88 92 89 93 // Model parameters … … 107 111 struct sample 108 112 { 113 // Log-likelihood of the time-series sample in the current evaluation, given 114 // h at this point 115 double Lxk; 109 116 // Log-likelihood of the innovations up to the current one, given the history 110 117 // of h to this point … … 223 230 // multivariate innovation for this sample, conditional on the sample's 224 231 // log-volatility. 225 226 // Now the log-likelihoood. For each time series... 232 233 S->Lxk = 0; 234 235 // For each time series... 227 236 for(k=0; k<N; k++) { 228 237 // Offset the decorrelated innovation by the mean, which is the correlation … … 237 246 val = SPA1(S, x0, k) / SPP2(P, sc, k, 0); 238 247 // Fast squaring instead of pow() 239 S->Lx -= 0.5 * val*val + SPP2(P, sc, k, 1);248 S->Lxk -= 0.5 * val*val + SPP2(P, sc, k, 1); 240 249 // Since the innovation has been inverse-scaled by the square root of the 241 250 // proposed volatility, the probability density needs to be scaled as 242 251 // well. In log space, this is a subtraction of half the log-volatility. 243 S->Lx -= 0.5 * SPA1(S, h, k); 244 } 245 } 246 247 248 // Importance-samples one of the proposals based on a weight computed from the 249 // log-density of z and the log-likelihood of the innovation given the 250 // proposal's log-volatility. 252 S->Lxk -= 0.5 * SPA1(S, h, k); 253 } 254 S->Lx += S->Lxk; 255 } 256 257 258 // Returns with zero only if the non-null proposals have converged close enough 259 // to the baseline value (the null proposal) to finish likelihood computation 260 int notCloseEnough(struct sample sp[], int n) 261 { 262 int k; 263 double L; 264 for(k=1; k<n; k++) { 265 L = sp[k].Lxk - sp[0].Lxk; 266 if(L > MIN_DIFF || L < -MIN_DIFF) 267 return 1; 268 } 269 return 0; 270 } 271 272 273 // Importance-samples one of the non-null proposals based on a weight computed 274 // from the log-density of z and the log-likelihood of the innovation given the 275 // proposal's log-volatility, conditional on the null proposal. 251 276 // 252 277 // Returns a struct with an integer index to the sampled proposal's struct and … … 262 287 struct selection result; 263 288 264 // Normalize log-densities to prevent overflow 265 for(k=0; k<n; k++) { 266 val = sp[k].Lx + Lz[k]; 289 // Log-densities densities are relative to the null-proposal, and with a 290 // maximum for normalization in selection 291 for(k=1; k<n; k++) { 292 val = sp[k].Lx + Lz[k] - sp[0].Lx - Lz[0]; 267 293 if(val > L_max) 268 294 L_max = val; … … 272 298 // Compute linear probability density for each proposal, relative to the 273 299 // maximum one 274 for(k= 0; k<n; k++) {300 for(k=1; k<n; k++) { 275 301 val = exp(Lp[k] - L_max); 276 302 Dp[k] = val; 277 303 Dp_sum += val; 278 304 } 279 305 280 306 // Accept-reject sampling with uniform random proposal selection 281 307 do { 282 308 // Randomly select one of the proposals... 283 result.kp = (int) uniform( 0, n);309 result.kp = (int) uniform(1, n); 284 310 // ...until the selected proposal is accepted, with probability 285 311 // proportional to its relative weight … … 305 331 // ni Number of hybrid-Gibbs iterations (int) 306 332 // np Number of proposals to try per log-volatility simulation (int) 307 // ne Max number of time-series samples to evaluate per proposal (int)308 333 309 334 //--- Model parameters, direct --- … … 385 410 // the log-likelihood of the innovation for that sample becomes substantially 386 411 // the same for all proposals. 387 412 388 413 k1 = k0; 389 414 … … 405 430 for(k3=0; k3<Nt; k3++) { 406 431 val = Z2(k3, k1); 407 if(k1==k0) { 408 // First time-series sample, use a proposal on z instead of z itself 409 val += uniform(-w, +w); 432 if(k1 == k0) { 433 // First time-series sample, use a proposal on z instead of z 434 // itself... 435 if(k2 != 0) 436 // ...but only actually change z if this isn't the null proposal 437 val += uniform(-w, +w); 410 438 zp[k2] = val; 411 439 Lz[k2] = normLogDensity(val); … … 432 460 k1++; 433 461 434 } while (k1<Ns && k1<k0+ne);435 462 } while (k1<Ns && notCloseEnough(sp, np)); 463 436 464 // Importance sample proposal using pProb=exp(Lz+Lx) for each... 437 465 spSelection = importanceSample(sp, Lz, np); … … 480 508 // simulated log-volatility and Lh, the total log-density of all of the 481 509 // simulated log-volatilities 482 result[0] = Lx ;483 result[1] = Lh ;510 result[0] = Lx / ni; 511 result[1] = Lh / ni; 484 512 return_val = result; projects/AsynCluster/trunk/svpmc/model.py
r196 r199 193 193 exploration of multivariate space needs more iterations. 194 194 195 @ivar Ne: The number of successive time-series samples to include in the 196 evaluation of each log-volatility proposal. For some weird reason, Ne=3 197 is optimal, at least for univariate. 198 199 """ 200 keyAttrs = {'y':None, 'wiggle':1.0, 'N1': 30, 'N2':10, 'Ne':3} 195 """ 196 keyAttrs = {'y':None, 'wiggle':1.0, 'N1': 30, 'N2':10} 201 197 202 198 #--- Properties ----------------------------------------------------------- … … 309 305 sc[:,1] = s.log(s.sqrt(2*s.pi) * sc[:,0]) 310 306 311 print self.wiggle, self.N1, self.N2, self.Ne312 307 # Run the hybrid Gibbs rounds, returning the likelihood from the last 313 308 # one along with the state of the IID variate vector at that point. 314 309 Lx, Lh = self.inline( 315 310 'z', 'h', 'x', 316 'w', 'ni', 'np', 'ne',311 'w', 'ni', 'np', 317 312 'd', 'e', 'g', 'rz', 'ri', 'sc', 318 w=self.wiggle, np=self.N1 , ni=self.N2, ne=self.Ne)319 return Lx /self.N2, Lh/(self.N2*self.Ne), z, h313 w=self.wiggle, np=self.N1+1, ni=self.N2) 314 return Lx, Lh, z, h projects/AsynCluster/trunk/svpmc/project.py
r196 r199 102 102 value = int(value) 103 103 setattr(self, name, value) 104 V = [ 1.0]104 V = [self.Ds] 105 105 for k in xrange(self.D-1): 106 106 V.append(self.Df * V[-1]) projects/AsynCluster/trunk/svpmc/test/test_model.py
r192 r199 602 602 return iv, h, x 603 603 604 def _inlineVars(self, wiggle, eValue, N1, N2=1 , Ne=3):604 def _inlineVars(self, wiggle, eValue, N1, N2=1): 605 605 return { 606 606 'h':s.empty(1), 607 607 'sc':s.array([1, s.log(s.sqrt(2*s.pi))]), 608 608 'w':wiggle, 609 'np':N1 ,609 'np':N1+1, 610 610 'ni':N2, 611 'ne':Ne,612 611 'd':s.array([0.0]), 613 612 'e':s.array([[float(eValue)]]), … … 669 668 Ns, N1, N2 = 10, 5, 20 670 669 inlineVars = self._inlineVars(wiggle, eValue, N1, N2) 671 inlineVars.update({'x':2*s.ones( Ns), 'z':s.zeros((1, Ns))})670 inlineVars.update({'x':2*s.ones((1, Ns)), 'z':s.zeros((1, Ns))}) 672 671 modelObj = model.Model() 673 672 print "\n k Lx Lh Simulated Volatility Shocks" … … 683 682 bins = s.linspace(0.65, 0.95, 20) 684 683 for k, value in enumerate(values): 684 print k, value 685 685 inlineVars[name] = value 686 686 correlations = [] … … 694 694 sp.set_title("%s=%d" % (name, value)) 695 695 696 def test_optimize_Ne(self):697 # Empirically, it looks like Ne=3 is best, just slightly better than698 # Ne=2 and Ne=4. Beyond Ne=4 things decline significantly.699 Nr = 1000700 wiggle = 3.0701 eValue = 0.95702 Ns, N1, N2 = 1000, 10, 2703 inlineVars = self._inlineVars(wiggle, eValue, N1, N2)704 self._simRounds(Nr, Ns, inlineVars, 'ne', (2,3,4,5,6))705 706 696 def test_optimize_N1(self): 707 697 # Empirically, it looks like np=10 is best. Going to np=20 barely gives 708 698 # any improvement, and obviously doubles the amount of computation 709 699 # time. Dropping to np=5 is significantly worse. 710 Nr = 1000711 wiggle = 3.0700 Nr = 500 701 wiggle = 0.5 712 702 eValue = 0.95 713 Ns, N2 = 1000, 2703 Ns, N2 = 500, 10 714 704 inlineVars = self._inlineVars(wiggle, eValue, 1, N2) 715 705 self._simRounds(Nr, Ns, inlineVars, 'np', (2,5,10,20)) 716 706 717 707 def test_optimize_N2(self): 718 # Empirically, it looks like ni=2 is best. It's noticeably better than719 # ni=1 (though not dramatically so), but going to anything larger720 # doesn't showany improvement.721 Nr = 1000722 wiggle = 2.0708 # Empirically, it looks like ni=20 is best. It's noticeably better than 709 # ni=10 (though not dramatically so), but going to ni=40 doesn't show 710 # any improvement. 711 Nr = 500 712 wiggle = 0.5 723 713 eValue = 0.95 724 Ns, N1 = 1000, 10714 Ns, N1 = 500, 10 725 715 inlineVars = self._inlineVars(wiggle, eValue, N1, 1) 726 self._simRounds(Nr, Ns, inlineVars, 'ni', ( 1,2,3,4))716 self._simRounds(Nr, Ns, inlineVars, 'ni', (5,10,20,40)) 727 717 728 718 def test_highCorrelation_c(self): 729 Ne = 3 730 wiggle = 2.0 719 wiggle = 0.5 731 720 eValue = 0.97 732 Ns, N1, N2 = 200, 10, 2 733 inlineVars = self._inlineVars(wiggle, eValue, N1, N2 , Ne)721 Ns, N1, N2 = 200, 10, 20 722 inlineVars = self._inlineVars(wiggle, eValue, N1, N2) 734 723 for hReal, hSim in self._likelihood(Ns, 1, inlineVars): 735 724 self._plot((hReal, hSim)) 736 725 737 726 def test_highCorrelation_model(self): 738 wiggle = 2.0739 eValue = 0.9 5740 Ns, N1, N2 = 1000, 10, 2 727 wiggle = 0.5 728 eValue = 0.97 729 Ns, N1, N2 = 1000, 10, 20 741 730 iv, h, x = self._simData(Ns, eValue) 742 731 pc, modelObj = self._modelAndParamFactory(wiggle, N1, N2, e=[[eValue]])
