Changeset 190

Show
Ignore:
Timestamp:
05/21/08 23:13:52 (7 months ago)
Author:
edsuom
Message:

Got C-based hybrid Gibbs working; Three-sample Lh evaluation appears optimal, to my surprise

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • projects/AsynCluster/trunk/svpmc/model.c

    r189 r190  
    250250 
    251251 
    252 // Returns with zero only if the proposals are close enough to finish 
    253 // likelihood computation 
    254 int notCloseEnough(struct sample sp[], int n) 
    255 { 
    256   int k; 
    257   double L; 
    258   double Lmin = +HUGE_VAL;   // Anything will be less positive 
    259   double Lmax = -HUGE_VAL;   // Anything will be more positive 
    260   for(k=0; k<n; k++) { 
    261     L = sp[k].L_diff; 
    262     if(L < Lmin) 
    263       Lmin = L; 
    264     if(L > Lmax) 
    265       Lmax = L; 
    266   } 
    267   return (Lmax - Lmin > 0.001); 
    268 } 
    269  
    270  
    271252// Importance-samples one of the proposals based on a weight computed from the 
    272253// log-density of z and the log-likelihood of the innovation given the 
     
    323304// h    Simulated last-sample multivariate log-volatility, [p] (overwritten) 
    324305// w    Wiggle value for +/- proposals (float) 
     306// ni   Number of hybrid-Gibbs iterations (int) 
    325307// np   Number of proposals to try per log-volatility simulation (int) 
    326308// ne   Max number of time-series samples to evaluate per proposal (int) 
     
    346328const int Ns = Nz[1]; 
    347329 
    348 int k0, k1, k2, k3; 
     330int ki, k0, k1, k2, k3; 
    349331double val, L_diff; 
    350332double Lx = 0; 
     
    383365  sp[k0].x0 = (double *) malloc(sizeof(double) * Nt); 
    384366  sp[k0].x1 = (double *) malloc(sizeof(double) * Nt); 
     367} 
     368 
     369 
     370// For each iteration... 
     371for(ki=0; ki<ni; ki++) { 
     372 
    385373  // The "previous" log-volatility of the first time-series sample is set to 
    386374  // the log-volatility offset. 
    387375  // 
    388376  // TODO: Treat it as another latent parameter and simulate its value 
    389   for(k1=0; k1<Nt; k1++) 
    390     PA1(sp[k0].h, k1) = D1(k1); 
    391 
    392  
    393 // For each time-series sample... 
    394 for(k0=0; k0<Ns; k0++) { 
    395    
    396   // Simulate np sets of h samples from separate proposals on z from the 
    397   // current time-series sample to some subsequent time-series sample at which 
    398   // the log-likelihood of the innovation for that sample becomes substantially 
    399   // the same for all proposals. 
    400  
    401   k1 = k0; 
    402  
    403   // Clear the log-likelihood accumulator of each proposal struct for the next 
    404   // proposal evalution 
    405   for(k2=0; k2<np; k2++) 
    406     sp[k2].Lx = 0; 
    407  
    408   do { 
    409  
    410     // Prepare current-innovation vector xk 
    411     for(k2=0; k2<Nt; k2++) 
    412       PA1(xk, k2) = X2(k2, k1); 
     377  for(k0=0; k0<np; k0++) { 
     378    for(k1=0; k1<Nt; k1++) 
     379      PA1(sp[k0].h, k1) = D1(k1); 
     380  } 
     381 
     382  // For each time-series sample... 
     383  for(k0=0; k0<Ns; k0++) { 
    413384     
    414     // For each proposal... 
    415     for(k2=0; k2<np; k2++) { 
    416  
    417       // Set z... 
    418       for(k3=0; k3<Nt; k3++) { 
    419         val = Z2(k3, k1); 
     385    // Simulate np sets of h samples from separate proposals on z from the 
     386    // current time-series sample to some subsequent time-series sample at which 
     387    // the log-likelihood of the innovation for that sample becomes substantially 
     388    // the same for all proposals. 
     389   
     390    k1 = k0; 
     391   
     392    // Clear the log-likelihood accumulator of each proposal struct for the next 
     393    // proposal evalution 
     394    for(k2=0; k2<np; k2++) 
     395      sp[k2].Lx = 0; 
     396   
     397    do { 
     398   
     399      // Prepare current-innovation vector xk 
     400      for(k2=0; k2<Nt; k2++) 
     401        PA1(xk, k2) = X2(k2, k1); 
     402       
     403      // For each proposal... 
     404      for(k2=0; k2<np; k2++) { 
     405   
     406        // Set z... 
     407        for(k3=0; k3<Nt; k3++) { 
     408          val = Z2(k3, k1); 
     409          if(k1==k0) { 
     410            // First time-series sample, use a proposal on z instead of z itself 
     411            val += uniform(-w, +w); 
     412            zp[k2] = val; 
     413            Lz[k2] = normLogDensity(val); 
     414          } 
     415          PA1(sp[k2].z, k3) = val; 
     416        } 
     417        // ...compute the multivariate log-volatility 
     418        logVol(&sp[k2], &P); 
     419        // ...and the log-likelihood of the innovation for this time-series 
     420        // sample, given the log-volatility 
     421        logLikelihood(&sp[k2], &P, xk); 
     422         
     423        // Save the first log-volatility and the log-likelihood based on it (and 
     424        // it alone, at this point) as the log-likelihood of the innovation given 
     425        // the log-volatility of the proposal 
    420426        if(k1==k0) { 
    421           // First time-series sample, use a proposal on z instead of z itself 
    422           val += uniform(-w, +w); 
    423           zp[k2] = val; 
    424           Lz[k2] = normLogDensity(val); 
     427          for(k3=0; k3<Nt; k3++) 
     428            hp[k2][k3] = PA1(sp[k2].h, k3); 
     429          Lxk[k2] = sp[k2].Lx; 
    425430        } 
    426         PA1(sp[k2].z, k3) = val; 
    427431      } 
    428       // ...compute the multivariate log-volatility 
    429       logVol(&sp[k2], &P); 
    430       // ...and the log-likelihood of the innovation for this time-series 
    431       // sample, given the log-volatility 
    432       logLikelihood(&sp[k2], &P, xk); 
    433        
    434       // Save the first log-volatility and the log-likelihood based on it (and 
    435       // it alone, at this point) as the log-likelihood of the innovation given 
    436       // the log-volatility of the proposal 
    437       if(k1==k0) { 
    438         for(k3=0; k3<Nt; k3++) 
    439           hp[k2][k3] = PA1(sp[k2].h, k3); 
    440         Lxk[k2] = sp[k2].Lx; 
    441       } 
     432   
     433      // Ready for the next time series sample, assuming there is one 
     434      k1++; 
     435     
     436    } while (k1<Ns && k1<k0+ne); 
     437     
     438    // Importance sample proposal using pProb=exp(Lz+Lx) for each... 
     439    spSelection = importanceSample(sp, Lz, np); 
     440    // ...add the log-likelihood of the innovation given the selected 
     441    // log-volatility proposal to what will be the total log-likelihood of the 
     442    // innovations given the entire simulated h... 
     443    Lx += Lxk[spSelection.kp]; 
     444    // ...add the log-density of the selected log-volatility proposal to what 
     445    // will be the total log-density of the entire simulated h... 
     446    Lh += spSelection.Lp; 
     447    // ...and update the normal variate underlying the current time-series sample 
     448    // to that on which the selected log-volatility proposal was based. 
     449    for(k1=0; k1<Nt; k1++) 
     450      Z2(k1, k0) = zp[spSelection.kp]; 
     451   
     452    //--- DEBUG ----------------------------------------------------------------- 
     453    //printf("\n%d %d\n---------------------------\n",k0, spSelection.kp); 
     454    //for(k1=0; k1<n; k1++) 
     455    //  printf("%d %f %f\n", k1, Lxk[k1], sp[k1].Lx); 
     456    //--------------------------------------------------------------------------- 
     457   
     458    // Set the initial log-volatility of the proposals for the next time-series 
     459    // sample to the log-volatility of the selected proposal for this time-series 
     460    // sample. 
     461    for(k1=0; k1<Nt; k1++) { 
     462      val = hp[spSelection.kp][k1]; 
     463      for(k2=0; k2<np; k2++) 
     464        PA1(sp[k2].h, k1) = val; 
    442465    } 
    443  
    444     // Ready for the next time series sample, assuming there is one 
    445     k1++; 
    446    
    447   } while (k1<Ns && k1<k0+ne); 
    448    
    449   // Importance sample proposal using pProb=exp(Lz+Lx) for each... 
    450   spSelection = importanceSample(sp, Lz, np); 
    451   // ...add the log-likelihood of the innovation given the selected 
    452   // log-volatility proposal to what will be the total log-likelihood of the 
    453   // innovations given the entire simulated h... 
    454   Lx += Lxk[spSelection.kp]; 
    455   // ...add the log-density of the selected log-volatility proposal to what 
    456   // will be the total log-density of the entire simulated h... 
    457   Lh += spSelection.Lp; 
    458   // ...and update the normal variate underlying the current time-series sample 
    459   // to that on which the selected log-volatility proposal was based. 
    460   for(k1=0; k1<Nt; k1++) 
    461     Z2(k1, k0) = zp[spSelection.kp]; 
    462  
    463   //--- DEBUG ----------------------------------------------------------------- 
    464   //printf("\n%d %d\n---------------------------\n",k0, spSelection.kp); 
    465   //for(k1=0; k1<n; k1++) 
    466   //  printf("%d %f %f\n", k1, Lxk[k1], sp[k1].Lx); 
    467   //--------------------------------------------------------------------------- 
    468  
    469   // Set the initial log-volatility of the proposals for the next time-series 
    470   // sample to the log-volatility of the selected proposal for this time-series 
    471   // sample. 
    472   for(k1=0; k1<Nt; k1++) { 
    473     val = hp[spSelection.kp][k1]; 
    474     for(k2=0; k2<np; k2++) 
    475       PA1(sp[k2].h, k1) = val; 
    476   } 
    477  
    478   // Done with log-volatility draw for this time-series sample 
     466   
     467    // Done with log-volatility draw for this time-series sample 
     468  } 
     469 
     470  // Done with iterations 
    479471} 
    480472 
    481473// Save the multivariate log-volatility simulated for the last time-series 
    482 // sample 
     474// sample in the last iteration 
    483475for(k0=0; k0<Nt; k0++) 
    484476  H1(k0) = PA1(sp[spSelection.kp].h, k0); 
  • projects/AsynCluster/trunk/svpmc/model.py

    r189 r190  
    299299        # Run the hybrid Gibbs rounds, returning the likelihood from the last 
    300300        # one along with the state of the IID variate vector at that point. 
    301         Lx = 0 
    302         Lh = 0 
    303         for name in ('z', 'h', 'x', 
    304                      'w', 'np', 'ne', 
    305                      'd', 'e', 'g', 'rz', 'ri', 'sc'): 
    306             print name, locals().get(name, getattr(self, name, "-")) 
    307  
    308         for k in xrange(self.N2): 
    309             result = self.inline( 
    310                 'z', 'h', 'x', 
    311                 'w', 'np', 'ne', 
    312                 'd', 'e', 'g', 'rz', 'ri', 'sc', 
    313                 w=self.wiggle, np=self.N1, ne=2) 
    314             Lx += result[0] 
    315             Lh += result[1] 
     301        Lx, Lh = self.inline( 
     302            'z', 'h', 'x', 
     303            'w', 'ni', 'np', 'ne', 
     304            'd', 'e', 'g', 'rz', 'ri', 'sc', 
     305            w=self.wiggle, np=self.N1, ni=self.N2, ne=2) 
    316306        return Lx, Lh, z, h 
  • projects/AsynCluster/trunk/svpmc/test/test_model.py

    r189 r190  
    595595        return signal.lfilter([1], [1.0, -eValue], fsValue*z) 
    596596 
    597     def _x_to_y(self, x, aValue=0.0, bValue=1.0): 
     597    def _x_to_y(self, x, aValue=0.0, bValue=0.0): 
    598598        """ 
    599599        Runs the innovations through a VAR(1) to get simulated 
     
    611611        return iv, h, x 
    612612 
    613     def _modelAndParamFactory(self, wiggle, Ns, N1, N2, **kw): 
     613    def _modelAndParamFactory(self, wiggle, N1, N2, **kw): 
    614614        """ 
    615615        Returns a parameter container populated from my I{params} dict, 
     
    627627                s.array(kw.get(name, self.params[name]), dtype='d')) 
    628628        # Return the parameter container along with a model to be tested 
    629         modelObj = model.Model(
     629        modelObj = model.Model(N1=N1, N2=N2, wiggle=wiggle
    630630        modelObj.paramNames = util.PARAM_NAMES 
    631         modelObj.N1=N1 
    632         modelObj.N2=N2 
    633631        return pc, modelObj 
    634632 
     
    644642            sp.grid(True) 
    645643     
    646     def _likelihood(self, eValue, wiggle, Ns, Nr, N1, N2, Ne): 
     644    def _likelihood(self, eValue, wiggle, Ns, Nr, N1, N2, Ne, params): 
    647645        """ 
    648646        Runs C code for L{model.Model.likelihood} independent of the method 
     
    650648        """ 
    651649        z = s.zeros((1, Ns)) 
    652         kw = { 
    653             'h':s.empty(1), 
    654             'sc':s.array([1, s.log(s.sqrt(2*s.pi))]), 
    655             'z':z, 
    656             'w':wiggle, 
    657             'np':N1, 
    658             'ne':Ne, 
    659             'd':s.array([0.0]), 
    660             'e':s.array([[float(eValue)]]), 
    661             'g':s.array([0.0]), 
    662             'rz':s.array([[1.0]]), 
    663             'ri':s.array([[1.0]]) 
    664             } 
     650        params['z'] = z 
    665651        modelObj = model.Model() 
    666652        for j in xrange(Nr): 
    667653            iv, hReal, x = self._simData(Ns, eValue) 
    668             kw['x'] = s.row_stack([x]) 
    669             for k in xrange(N2): 
    670                 modelObj.inlineDirect(modelObj.code['likelihood'], **kw) 
     654            params['x'] = s.row_stack([x]) 
     655            modelObj.inlineDirect(modelObj.code['likelihood'], **params) 
    671656            hSim = self._z_to_h(z[0,:], eValue, 1.0) 
    672657            yield hReal, hSim 
     
    684669            print "%3d: %+5.1f  %+6.1f%s%s" % (k, Lx[k], Lh[k], " "*5, zLine) 
    685670 
    686     def test_highCorrelation_h(self): 
     671    def test_highCorrelation(self): 
     672        # Empirically, it looks like Ne=3 is best, just slightly better than 
     673        # Ne=2 and Ne=4. Beyond Ne=4 things decline significantly. 
     674        Nr = 2000 
     675        wiggle = 3.0 
     676        eValue = 0.95 
     677        Ns, N1, N2 = 1000, 10, 1 
     678        fig = self.fig 
     679        Ne_values = (2,3,4,5,6,) 
     680        params = { 
     681            'h':s.empty(1), 
     682            'sc':s.array([1, s.log(s.sqrt(2*s.pi))]), 
     683            'w':wiggle, 
     684            'np':N1, 
     685            'ni':N2, 
     686            'd':s.array([0.0]), 
     687            'e':s.array([[float(eValue)]]), 
     688            'g':s.array([0.0]), 
     689            'rz':s.array([[1.0]]), 
     690            'ri':s.array([[1.0]]) 
     691            } 
     692        for k, Ne in enumerate(Ne_values): 
     693            params['ne'] = Ne 
     694            correlations = [] 
     695            for hReal, hSim in self._likelihood( 
     696                eValue, wiggle, Ns, Nr, N1, N2, Ne, params): 
     697                corrInfo = stats.spearmanr(hReal, hSim) 
     698                if VERBOSE: 
     699                    print corrInfo[0] 
     700                correlations.append(corrInfo[0]) 
     701            sp = fig.add_subplot(100*len(Ne_values)+11+k) 
     702            sp.hist(s.array(correlations)) 
     703            sp.set_title("Ne=%d" % Ne) 
     704        #self._check(x, iv[1,:], zSim[-1,:], eValue) 
     705     
     706    def test_highCorrelation_c(self): 
    687707        Ne = 2 
    688708        wiggle = 3.0 
     
    692712            self._plot((hReal, hSim)) 
    693713     
    694     def test_highCorrelation_correlation(self): 
    695         # Empirically, it looks like Ne=2 is best (possibly Ne=3, but why?) 
    696         Ne = 2 
    697         Nr = 10000 
    698         wiggle = 3.0 
    699         eValue = 0.99 
    700         Ns, N1, N2 = 200, 20, 1 
    701         correlations = [] 
    702         for hReal, hSim in self._likelihood(eValue, wiggle, Ns, Nr, N1, N2, Ne): 
    703             corrInfo = stats.spearmanr(hReal, hSim) 
    704             if VERBOSE: 
    705                 print corrInfo[0] 
    706             correlations.append(corrInfo[0]) 
    707             #self.failUnless(corrInfo[0] > 0.8) 
    708         sp = self.fig.add_subplot(111) 
    709         sp.hist(s.array(correlations)) 
    710         sp.set_title("Ne=%d" % Ne) 
    711         #self._check(x, iv[1,:], zSim[-1,:], eValue) 
    712  
    713714    def test_highCorrelation_model(self): 
    714715        wiggle = 3.0 
    715716        eValue = 0.95 
    716         Ns, N1, N2 = 300, 20, 1 
     717        Ns, N1, N2 = 1000, 2, 1 
    717718        iv, h, x = self._simData(Ns, eValue) 
    718         pc, modelObj = self._modelAndParamFactory( 
    719             wiggle, Ns, N1, N2, e=[[eValue]]) 
     719        pc, modelObj = self._modelAndParamFactory(wiggle, N1, N2, e=[[eValue]]) 
    720720        pc.z = s.zeros((1, Ns)) 
    721721        modelObj.y = self._x_to_y(x)