Changeset 189

Show
Ignore:
Timestamp:
05/21/08 18:16:16 (7 months ago)
Author:
edsuom
Message:

C code for model.likelihood appears to be working, but unit test for the python method has very slow h-to-z response

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • projects/AsynCluster/trunk/svpmc/model.c

    r188 r189  
    128128{ 
    129129  int kp; 
    130   double Lh
     130  double Lp
    131131}; 
    132132 
     
    250250 
    251251 
    252 // Returns with zero only if the post-first proposals are close enough to 
    253 // finish likelihood computation 
    254 int notCloseEnough(struct sample sp[][2], int n) 
     252// Returns with zero only if the proposals are close enough to finish 
     253// likelihood computation 
     254int notCloseEnough(struct sample sp[], int n) 
    255255{ 
    256256  int k; 
     
    259259  double Lmax = -HUGE_VAL;   // Anything will be more positive 
    260260  for(k=0; k<n; k++) { 
    261     L = sp[k][1].L_diff; 
     261    L = sp[k].L_diff; 
    262262    if(L < Lmin) 
    263263      Lmin = L; 
     
    276276// the log-probability of the proposal's having been selected. 
    277277struct selection \ 
    278 importanceSample(struct sample sp[][2], double Lz[], int n) 
    279 { 
    280   int k, kpMax
     278importanceSample(struct sample sp[], double Lz[], int n) 
     279{ 
     280  int k
    281281  double val; 
    282282  double Lp[n], Dp[n]; 
    283   double L_min = +HUGE_VAL; // Anything will be more negative 
    284   double pMax = -HUGE_VAL;  // Anything will be more positive 
     283  double L_max = -HUGE_VAL; // Anything will be more positive 
    285284  struct selection result; 
    286285 
    287286  // Normalize log-densities to prevent overflow 
    288287  for(k=0; k<n; k++) { 
    289     val = sp[k][1].Lx + Lz[k]; 
    290     if(val < L_min
    291       L_min = val; 
     288    val = sp[k].Lx + Lz[k]; 
     289    if(val > L_max
     290      L_max = val; 
    292291    Lp[k] = val; 
    293292  } 
    294293 
    295   // Compute linear probability density for each proposal and their maximum 
    296   for(k=0; k<n; k++) { 
    297     val = exp(Lp[k] - L_min); 
    298     Dp[k] = val; 
    299     if(val > pMax) { 
    300       pMax = val; 
    301       kpMax = k; 
    302     } 
    303   } 
     294  // Compute linear probability density for each proposal, relative to the 
     295  // maximum one 
     296  for(k=0; k<n; k++) 
     297    Dp[k] = exp(Lp[k] - L_max); 
    304298 
    305299  // Accept-reject sampling with uniform random proposal selection 
     
    309303    // ...until the selected proposal is accepted, with probability 
    310304    // proportional to its relative weight 
    311   } while(result.kp != kpMax && uniform(0, pMax) > Dp[result.kp]); 
     305  } while(uniform(0, 1.0) > Dp[result.kp]); 
    312306 
    313307  // The probability density of the selected log-volatility proposal is just 
    314308  // the linear, unnormalized weight of the selected sample 
    315   result.Lh = Lp[result.kp]; 
     309  result.Lp = Lp[result.kp]; 
    316310  return result; 
    317311} 
     
    329323// h    Simulated last-sample multivariate log-volatility, [p] (overwritten) 
    330324// w    Wiggle value for +/- proposals (float) 
    331 // n    Number of proposals to try per time-series sample (int) 
     325// np   Number of proposals to try per log-volatility simulation (int) 
     326// ne   Max number of time-series samples to evaluate per proposal (int) 
    332327 
    333328//--- Model parameters, direct --- 
     
    351346const int Ns = Nz[1]; 
    352347 
    353 int notFirst; 
    354348int k0, k1, k2, k3; 
    355349double val, L_diff; 
     
    359353// Multivariate innovation for one current time-series sample 
    360354double *xk; 
    361 // Log probability density of each proposal on z 
    362 double Lz[n]; 
     355// The value of z and h for each proposal, at the first time-series sample of 
     356// the evaluation interval 
     357double zp[np], hp[np][Nt]; 
     358// Log probability density of the value of z for each proposal 
     359double Lz[np]; 
     360// Log-likelihood of the innovation for the first time-series sample in 
     361// proposal evalutions, for each proposal 
     362double Lxk[np]; 
    363363 
    364364// A struct for the result of the importance-sampling function 
    365365struct selection spSelection; 
    366 // Two sets of proposal sample structs, one for the first time-series sample in 
    367 // each log-volatility computation (from which the innovation's log-volatility 
    368 // is computed) and another for all time-series samples thereafter. 
    369 struct sample sp[n][2]; 
     366// A set of proposal sample structs 
     367struct sample sp[np]; 
    370368// A struct for the model parameters 
    371369struct parameters P; 
     
    378376P.rz = rz_array; P.ri = ri_array; P.sc = sc_array; 
    379377 
    380 // Initialize various stuff 
    381 xk = (double *) calloc(sizeof(double), Nt); 
    382 for(k0=0; k0<n; k0++) { 
    383   for(k1=0; k1<2; k1++) { 
    384     sp[k0][k1].Lx = 0; 
    385     sp[k0][k1].z = (double *) malloc(sizeof(double) * Nt); 
    386     sp[k0][k1].h = (double *) malloc(sizeof(double) * Nt); 
    387     sp[k0][k1].x0 = (double *) malloc(sizeof(double) * Nt); 
    388     sp[k0][k1].x1 = (double *) malloc(sizeof(double) * Nt); 
    389   } 
     378// Initialize various arrays 
     379xk = (double *) malloc(sizeof(double) * Nt); 
     380for(k0=0; k0<np; k0++) { 
     381  sp[k0].z = (double *) malloc(sizeof(double) * Nt); 
     382  sp[k0].h = (double *) malloc(sizeof(double) * Nt); 
     383  sp[k0].x0 = (double *) malloc(sizeof(double) * Nt); 
     384  sp[k0].x1 = (double *) malloc(sizeof(double) * Nt); 
    390385  // The "previous" log-volatility of the first time-series sample is set to 
    391386  // the log-volatility offset. 
     
    393388  // TODO: Treat it as another latent parameter and simulate its value 
    394389  for(k1=0; k1<Nt; k1++) 
    395     PA1(sp[k0][0].h, k1) = D1(k1); 
     390    PA1(sp[k0].h, k1) = D1(k1); 
    396391} 
    397392 
     
    399394for(k0=0; k0<Ns; k0++) { 
    400395   
    401   // Simulate n sets of h samples from separate proposals on z until a 
    402   // substantially common-valued sample is reached 
     396  // Simulate np sets of h samples from separate proposals on z from the 
     397  // current time-series sample to some subsequent time-series sample at which 
     398  // the log-likelihood of the innovation for that sample becomes substantially 
     399  // the same for all proposals. 
    403400 
    404401  k1 = k0; 
    405   notFirst = 0; 
     402 
     403  // Clear the log-likelihood accumulator of each proposal struct for the next 
     404  // proposal evalution 
     405  for(k2=0; k2<np; k2++) 
     406    sp[k2].Lx = 0; 
    406407 
    407408  do { 
     
    411412      PA1(xk, k2) = X2(k2, k1); 
    412413     
    413     // Set z for the set of sample structs... 
    414     for(k2=0; k2<n; k2++) { 
     414    // For each proposal... 
     415    for(k2=0; k2<np; k2++) { 
     416 
     417      // Set z... 
    415418      for(k3=0; k3<Nt; k3++) { 
    416419        val = Z2(k3, k1); 
    417         if(!notFirst) { 
     420        if(k1==k0) { 
    418421          // First time-series sample, use a proposal on z instead of z itself 
    419422          val += uniform(-w, +w); 
     423          zp[k2] = val; 
    420424          Lz[k2] = normLogDensity(val); 
    421425        } 
    422         PA1(sp[k2][notFirst].z, k3) = val; 
     426        PA1(sp[k2].z, k3) = val; 
    423427      } 
    424     } 
    425     // ...compute their log-volatilities 
    426     for(k2=0; k2<n; k2++) 
    427       logVol(&sp[k2][notFirst], &P); 
    428     // ...and the log-likelihood of the innovation for this time-series sample, 
    429     // for each proposal on z and its log-volatility 
    430     val = 0; 
    431     for(k2=0; k2<n; k2++) { 
    432       logLikelihood(&sp[k2][notFirst], &P, xk); 
    433     } 
    434      
    435     if(k1==k0) { 
    436       // Next will be the second time-series sample... 
    437       notFirst = 1; 
    438       // ...and will be starting with copy of h arrays from proposal structs 
    439       // for the just-completed first time-series sample 
    440       for(k2=0; k2<n; k2++) { 
     428      // ...compute the multivariate log-volatility 
     429      logVol(&sp[k2], &P); 
     430      // ...and the log-likelihood of the innovation for this time-series 
     431      // sample, given the log-volatility 
     432      logLikelihood(&sp[k2], &P, xk); 
     433       
     434      // Save the first log-volatility and the log-likelihood based on it (and 
     435      // it alone, at this point) as the log-likelihood of the innovation given 
     436      // the log-volatility of the proposal 
     437      if(k1==k0) { 
    441438        for(k3=0; k3<Nt; k3++) 
    442           PA1(sp[k2][1].h, k3) = PA1(sp[k2][0].h, k3); 
     439          hp[k2][k3] = PA1(sp[k2].h, k3); 
     440        Lxk[k2] = sp[k2].Lx; 
    443441      } 
    444442    } 
     
    447445    k1++; 
    448446   
    449   } while (k1<Ns && (k1==k0+1 || notCloseEnough(sp, n))); 
     447  } while (k1<Ns && k1<k0+ne); 
    450448   
    451449  // Importance sample proposal using pProb=exp(Lz+Lx) for each... 
    452   spSelection = importanceSample(sp, Lz, n); 
     450  spSelection = importanceSample(sp, Lz, np); 
     451  // ...add the log-likelihood of the innovation given the selected 
     452  // log-volatility proposal to what will be the total log-likelihood of the 
     453  // innovations given the entire simulated h... 
     454  Lx += Lxk[spSelection.kp]; 
    453455  // ...add the log-density of the selected log-volatility proposal to what 
    454456  // will be the total log-density of the entire simulated h... 
    455   Lh += spSelection.Lh
    456   // ...and update the normal variate underlying the curent time-series sample 
     457  Lh += spSelection.Lp
     458  // ...and update the normal variate underlying the current time-series sample 
    457459  // to that on which the selected log-volatility proposal was based. 
    458460  for(k1=0; k1<Nt; k1++) 
    459     Z2(k1,k0) = PA1(sp[spSelection.kp][0].z, k1)
    460    
     461    Z2(k1, k0) = zp[spSelection.kp]
     462 
    461463  //--- DEBUG ----------------------------------------------------------------- 
    462   printf("\n%d %d\n---------------------------\n",k0, spSelection.kp); 
    463   for(k1=0; k1<n; k1++) 
    464     printf("%d %f %f\n", k1, sp[k1][1].Lx, Lz[k1]); 
     464  //printf("\n%d %d\n---------------------------\n",k0, spSelection.kp); 
     465  //for(k1=0; k1<n; k1++) 
     466  //  printf("%d %f %f\n", k1, Lxk[k1], sp[k1].Lx); 
    465467  //--------------------------------------------------------------------------- 
    466468 
     
    469471  // sample. 
    470472  for(k1=0; k1<Nt; k1++) { 
    471     val = PA1(sp[spSelection.kp][1].h, k1); 
    472     for(k2=0; k2<n; k2++) 
    473       PA1(sp[k2][0].h, k1) = val; 
    474   } 
    475  
    476   // Add the log-likelihood of this time-series sample's innovation to the 
    477   // running total and clear the Lx accumulators for the next time-series 
    478   // sample 
    479   for(k1=0; k1<Nt; k1++) { 
    480     Lx += sp[k1][0].Lx; 
    481     sp[k1][0].Lx = 0; 
    482     sp[k1][1].Lx = 0; 
     473    val = hp[spSelection.kp][k1]; 
     474    for(k2=0; k2<np; k2++) 
     475      PA1(sp[k2].h, k1) = val; 
    483476  } 
    484477 
     
    489482// sample 
    490483for(k0=0; k0<Nt; k0++) 
    491   H1(k0) = PA1(sp[spSelection.kp][1].h, k0); 
     484  H1(k0) = PA1(sp[spSelection.kp].h, k0); 
    492485 
    493486// Free array memory 
    494487free(xk); 
    495 for(k0=0; k0<n; k0++) { 
    496   for(k1=0; k1<2; k1++) { 
    497     free(sp[k0][k1].z); 
    498     free(sp[k0][k1].h); 
    499     free(sp[k0][k1].x0); 
    500     free(sp[k0][k1].x1); 
    501   } 
     488for(k0=0; k0<np; k0++) { 
     489    free(sp[k0].z); 
     490    free(sp[k0].h); 
     491    free(sp[k0].x0); 
     492    free(sp[k0].x1); 
    502493} 
    503494 
  • projects/AsynCluster/trunk/svpmc/model.py

    r188 r189  
    294294        # ...scaling constants for multivariate normal pdf 
    295295        sc = s.empty((self.p, 2)) 
    296         sc[:,0] = s.sqrt(1 - self.g**2) 
     296        sc[:,0] = s.sqrt(1.0 - self.g**2) 
    297297        sc[:,1] = s.log(s.sqrt(2*s.pi) * sc[:,0]) 
    298298 
     
    301301        Lx = 0 
    302302        Lh = 0 
     303        for name in ('z', 'h', 'x', 
     304                     'w', 'np', 'ne', 
     305                     'd', 'e', 'g', 'rz', 'ri', 'sc'): 
     306            print name, locals().get(name, getattr(self, name, "-")) 
     307 
    303308        for k in xrange(self.N2): 
    304309            result = self.inline( 
    305                 'z', 'h', 'x', 'w', 'n', 'd', 'e', 'g', 'rz', 'ri', 'sc', 
    306                 w=self.wiggle, n=self.N1) 
    307             print k, result 
     310                'z', 'h', 'x', 
     311                'w', 'np', 'ne', 
     312                'd', 'e', 'g', 'rz', 'ri', 'sc', 
     313                w=self.wiggle, np=self.N1, ne=2) 
    308314            Lx += result[0] 
    309315            Lh += result[1] 
  • projects/AsynCluster/trunk/svpmc/test/test_model.py

    r188 r189  
    510510    int k; 
    511511    const int n = NX[0]; 
    512     struct sample sp[n][2]
     512    struct sample sp[n]
    513513    for(k=0; k<n; k++) { 
    514       sp[k][1].L_diff = X1(k); 
    515       sp[k][1].Lx = X1(k); 
     514      sp[k].L_diff = X1(k); 
     515      sp[k].Lx = X1(k); 
    516516    } 
    517517    """ 
     
    529529    spSelection = importanceSample(sp, Lz, n); 
    530530    result[0] = spSelection.kp; 
    531     result[1] = spSelection.Lh
     531    result[1] = spSelection.Lp
    532532    return_val = result; 
    533533    """ 
     
    578578    corr = 0.0 
    579579    params = { 
    580         'a': [0], 
    581         'b': [[0]], 
     580        'a': [0.0], 
     581        'b': [[0.0]], 
    582582        'cr': [], 
    583         'd': [0], 
    584         'e': [[0]], 
     583        'd': [0.0], 
     584        'e': [[0.0]], 
    585585        'fs': [1.0], 
    586586        'fr': [], 
     
    588588        } 
    589589 
    590     def _z_to_h(self, z, eValue, fsValue): 
     590    def _z_to_h(self, z, eValue, fsValue=1.0): 
    591591        """ 
    592592        Runs the volatility shocks through a VAR(1) to get simulated 
     
    595595        return signal.lfilter([1], [1.0, -eValue], fsValue*z) 
    596596 
    597     def _x_to_y(self, x, aValue, bValue): 
     597    def _x_to_y(self, x, aValue=0.0, bValue=1.0): 
    598598        """ 
    599599        Runs the innovations through a VAR(1) to get simulated 
    600600        observations. 
    601601        """ 
    602         return signal.lfilter([1], [1.0, -bValue], x) + aValue * s.ones_like(x) 
     602        return s.row_stack([ 
     603            signal.lfilter([1], [1.0, -bValue], x) + aValue * s.ones_like(x)]) 
    603604 
    604605    def _simData(self, Ns, eValue, gValue=0, fsValue=1): 
     
    626627                s.array(kw.get(name, self.params[name]), dtype='d')) 
    627628        # Return the parameter container along with a model to be tested 
    628         modelObj = model.Model(N1=N1, N2=N2
     629        modelObj = model.Model(
    629630        modelObj.paramNames = util.PARAM_NAMES 
    630         modelObj.y = s.row_stack([y]) 
     631        modelObj.N1=N1 
     632        modelObj.N2=N2 
    631633        return pc, modelObj 
    632634 
    633     def _check(self, x, zReal, zSim, eValue, fsValue=1.0): 
    634         # Reconstruct simulated h from z 
    635         hSim = self._z_to_h(zSim, e, fs) 
    636         hReal = self._z_to_h(zReal, e, fs) 
    637         if VERBOSE: 
    638             fig = self.fig 
    639             vectors = (x, (zReal, zSim), (hReal, hSim)) 
    640             for k, vector in enumerate(vectors): 
    641                 spNumber = 100*len(vectors) + k + 11 
    642                 sp = fig.add_subplot(spNumber) 
    643                 if not isinstance(vector, (tuple, list)): 
    644                     vector = [vector] 
    645                 for v in vector: 
    646                     sp.plot(v) 
    647         self._check_corr(hReal, hSim) 
    648      
    649     def _likelihood(self, x, eValue, wiggle, N1, N2): 
     635    def _plot(self, *vectors): 
     636        fig = self.fig 
     637        for k, vector in enumerate(vectors): 
     638            spNumber = 100*len(vectors) + k + 11 
     639            sp = fig.add_subplot(spNumber) 
     640            if not isinstance(vector, (tuple, list)): 
     641                vector = [vector] 
     642            for v in vector: 
     643                sp.plot(v) 
     644            sp.grid(True) 
     645     
     646    def _likelihood(self, eValue, wiggle, Ns, Nr, N1, N2, Ne): 
    650647        """ 
    651648        Runs C code for L{model.Model.likelihood} independent of the method 
    652649        itself. 
    653650        """ 
    654         z = s.zeros((1, len(x))) 
    655         zAll = s.empty((N2, len(x))) 
     651        z = s.zeros((1, Ns)) 
    656652        kw = { 
    657653            'h':s.empty(1), 
    658654            'sc':s.array([1, s.log(s.sqrt(2*s.pi))]), 
    659655            'z':z, 
    660             'x':s.row_stack([x]), 
    661656            'w':wiggle, 
    662             'n':N1, 
     657            'np':N1, 
     658            'ne':Ne, 
    663659            'd':s.array([0.0]), 
    664660            'e':s.array([[float(eValue)]]), 
     
    668664            } 
    669665        modelObj = model.Model() 
    670         Lx = s.empty(N2) 
    671         Lh = s.empty(N2) 
    672         for k in xrange(N2): 
    673             result = modelObj.inlineDirect(modelObj.code['likelihood'], **kw) 
    674             Lx[k] = result[0] 
    675             Lh[k] = result[1] 
    676             zAll[k,:] = z.copy() 
    677         return Lx, Lh, zAll 
     666        for j in xrange(Nr): 
     667            iv, hReal, x = self._simData(Ns, eValue) 
     668            kw['x'] = s.row_stack([x]) 
     669            for k in xrange(N2): 
     670                modelObj.inlineDirect(modelObj.code['likelihood'], **kw) 
     671            hSim = self._z_to_h(z[0,:], eValue, 1.0) 
     672            yield hReal, hSim 
    678673     
    679674    def test_basic(self): 
    680675        wiggle = 0.1 
    681676        eValue = 0.90 
    682         Ns, N1, N2 = 3, 5, 1
    683         x = 2.0*s.ones(Ns) 
     677        Ns, N1, N2 = 10, 5, 2
     678        x = 2*s.ones(Ns) 
    684679        Lx, Lh, zSim = self._likelihood(x, eValue, wiggle, N1, N2) 
    685         #fig = self.fig 
    686         #fig.add_subplot(211).plot(Lx) 
    687         #fig.add_subplot(212).plot(Lh) 
    688680        print "\nk   Lx      Lh       Simulated Volatility Shocks" 
    689681        print "-"*100 
    690682        for k, z in enumerate(zSim): 
    691683            zLine = "   ".join(["%+4.2f" % xx for xx in z]) 
    692             print "%d: %+5.1f  %+6.1f%s%s" % (k, Lx[k], Lh[k], " "*5, zLine) 
    693      
     684            print "%3d: %+5.1f  %+6.1f%s%s" % (k, Lx[k], Lh[k], " "*5, zLine) 
     685 
     686    def test_highCorrelation_h(self): 
     687        Ne = 2 
     688        wiggle = 3.0 
     689        eValue = 0.99 
     690        Ns, N1, N2 = 200, 20, 1 
     691        for hReal, hSim in self._likelihood(eValue, wiggle, Ns, 1, N1, N2, Ne): 
     692            self._plot((hReal, hSim)) 
     693     
     694    def test_highCorrelation_correlation(self): 
     695        # Empirically, it looks like Ne=2 is best (possibly Ne=3, but why?) 
     696        Ne = 2 
     697        Nr = 10000 
     698        wiggle = 3.0 
     699        eValue = 0.99 
     700        Ns, N1, N2 = 200, 20, 1 
     701        correlations = [] 
     702        for hReal, hSim in self._likelihood(eValue, wiggle, Ns, Nr, N1, N2, Ne): 
     703            corrInfo = stats.spearmanr(hReal, hSim) 
     704            if VERBOSE: 
     705                print corrInfo[0] 
     706            correlations.append(corrInfo[0]) 
     707            #self.failUnless(corrInfo[0] > 0.8) 
     708        sp = self.fig.add_subplot(111) 
     709        sp.hist(s.array(correlations)) 
     710        sp.set_title("Ne=%d" % Ne) 
     711        #self._check(x, iv[1,:], zSim[-1,:], eValue) 
     712 
    694713    def test_highCorrelation_model(self): 
    695         wiggle = 0.1 
     714        wiggle = 3.0 
    696715        eValue = 0.95 
    697         Ns, N1, N2 = 1000, 10, 100 
     716        Ns, N1, N2 = 300, 20, 1 
    698717        iv, h, x = self._simData(Ns, eValue) 
    699         pc, modelObj = self._modelAndParamFactory(wiggle, Ns, N1, N2, e=[[0.95]]) 
     718        pc, modelObj = self._modelAndParamFactory( 
     719            wiggle, Ns, N1, N2, e=[[eValue]]) 
     720        pc.z = s.zeros((1, Ns)) 
    700721        modelObj.y = self._x_to_y(x) 
    701722        Lx, Lh, z, hn = modelObj.likelihood(pc) 
    702         self._check(x, iv[1,:], z[0,:], eValue) 
    703          
    704     def test_hybridGibbs_lowCorrelation(self): 
    705         self._setupModel(e=[[0.4]]) 
    706         self._runHG(20, 0.5) 
    707  
    708     def test_hybridGibbs_noCorrelation(self): 
    709         self._setupModel(e=[[0.0]]) 
    710         self._runHG(20, 0.5) 
    711  
    712     def test_hybridGibbs_highSigma(self): 
    713         self._setupModel(e=[[0.5]], fs=[[2.0]]) 
    714         self._runHG(20, 0.5) 
     723        self._plot(x, (iv[1,:], z[0,:]), (h, self._z_to_h(z[0,:], eValue)))