Changeset 184

Show
Ignore:
Timestamp:
05/17/08 01:31:43 (7 months ago)
Author:
edsuom
Message:

Grinding along with C code for new model

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • projects/AsynCluster/trunk/svpmc/model.c

    r183 r184  
    109109  // Log-likelihood of the innovations up to the current one, given the history 
    110110  // of h to this point 
    111   double Lx = 0
     111  double Lx
    112112  // Multivariate normal proposal, which is just z for time-series samples 
    113113  // after the first in each log-volatility computation 
     
    127127  int kp; 
    128128  double Lp; 
    129 } 
     129}; 
    130130 
    131131 
     
    271271// the log-probability of the proposal's having been selected. 
    272272struct selection \ 
    273 sample(struct sample *sp[][2], double Lz[], double Dp[], int n) 
     273importanceSample(struct sample *sp[][2], double Lz[], double Dp[], int n) 
    274274{ 
    275275  int k0, kpMax; 
     
    287287      pMax = val; 
    288288      kpMax = k0; 
     289    } 
    289290  } 
    290291 
     
    304305 
    305306 
    306 // Model.hybridGibbs 
     307// Model.likelihood 
    307308//  
    308309// Supplied variables 
     
    311312//--- Supplied variables --- 
    312313// z    Independent normal variates, [pn] array (updated) 
     314// x    Innovation values, [pn] array 
    313315// h    Simulated last-sample multivariate log-volatility, [p] (overwritten) 
    314 // x    Innovation values, [pn] array 
    315316// w    Wiggle value for +/- proposals (float) 
    316317// n    Number of proposals to try per time-series sample (int) 
     
    366367P.rz = rz_array; P.ri = ri_array; P.sc = sc_array; 
    367368 
    368 // Initialize various arrays 
     369// Initialize various stuff 
    369370xk = (double *) calloc(sizeof(double), Nt); 
    370371for(k0=0; k0<n; k0++) { 
    371372  for(k1=0; k1<2; k1++) { 
     373    sp[k0][k1].Lx = 0; 
    372374    sp[k0][k1].h = (double *) calloc(sizeof(double), Nt); 
    373375    sp[k0][k1].x0 = (double *) calloc(sizeof(double), Nt); 
     
    430432 
    431433  // Importance sample proposal using pProb=exp(Lz+Lx) for each 
    432   spSelection = sample(sp, Lz, Dp, n); 
     434  spSelection = importanceSample(sp, Lz, Dp, n); 
    433435  Lp += log(spSelection.Lp); 
    434436  for(k1=0; k1<Nt; k1++) 
     
    453455 
    454456// Free array memory 
    455 free(x0); 
     457free(xk); 
    456458for(k0=0; k0<n; k0++) { 
    457459  for(k1=0; k1<2; k1++) { 
  • projects/AsynCluster/trunk/svpmc/model.py

    r179 r184  
    5656        M = modelObj 
    5757 
    58     def likelihood(paramContainer, sigma): 
    59         result = M.likelihood(paramContainer, sigma
     58    def likelihood(paramContainer): 
     59        result = M.likelihood(paramContainer
    6060        if result is None: 
    6161            return 
     
    106106        return d 
    107107     
    108     def likelihood(self, paramContainer, sigma, remote=False, local=False): 
     108    def likelihood(self, paramContainer, remote=False, local=False): 
    109109        """ 
    110110        Computes the log-likelihood of the parameters in the supplied 
     
    123123                # An error from the remote likelihood method call is treated as 
    124124                # infinitely low likelihood 
    125                 result = (-s.inf, [], [], 0, 0
     125                result = (-s.inf, 0, [], []
    126126            elif isinstance(result, str): 
    127127                # Unpack string result from remote call 
    128128                result = list(pack.Unpacker(result)) 
    129             for k, name in enumerate(('Lx', 'h', 'z', 'Lh', 'acceptanceRate')): 
     129            for k, name in enumerate(('Lx', 'Lp', 'z', 'h')): 
    130130                setattr(paramContainer, name, result[k]) 
    131131            return paramContainer 
    132  
     132         
    133133        if (remote or self.remoteMode) and not local: 
    134134            d = self.client.run( 
    135                 'likelihood', paramContainer, sigma, timeout=self.timeout) 
     135                'likelihood', paramContainer, timeout=self.timeout) 
    136136        else: 
    137137            d = self.queue.call( 
    138                 self.modelObj.likelihood, paramContainer, sigma
     138                self.modelObj.likelihood, paramContainer
    139139        return d.addCallbacks(gotLikelihood, self._oops) 
    140140 
     
    177177    @ivar y: A 2-D array containing my observation data. 
    178178 
    179     @ivar Mv: Number of volatility draws per likelihood evaluation. 
    180  
    181     """ 
    182     keyAttrs = {'y':None, 'Mv':1} 
     179    @ivar wiggle: The +/- range of the random-walk uniform proposal on z for 
     180      each hybrid Gibbs proposal. 
     181 
     182    @ivar N1: Number of log-volatility proposals to compute for the importance 
     183      sampling of each hybrid Gibbs step. 
     184 
     185    @ivar N2: Number of hybrid-Gibbs iterations per likelihood evaluation. 
     186 
     187    """ 
     188    keyAttrs = {'y':None, 'wiggle':0.3, 'N1': 8, 'N2':1} 
    183189 
    184190    #--- Properties ----------------------------------------------------------- 
     
    221227                i += 1 
    222228        return cv 
    223  
    224     def logVolatilities(self, z, v, x, h0): 
    225         """ 
    226         Call this method with a 2-D array I{z} of multivariate IID normal 
    227         random variates, a 2-D array I{v} of volatility shocks correlated from 
    228         the IID variates, a 2-D vector I{x} containing one multivariate 
    229         innovation for each random variate in I{z}, and a 1-D vector I{h0} that 
    230         defines an initial multivariate log-volatility value. 
    231  
    232         The method will compute a 2-D array I{h} of log-volatilities for each 
    233         volatility shock by running it through the stochastic-volatility VAR(1) 
    234         process, with the value provided in I{h0} as the VAR component for the 
    235         first sample. Then it will compute the total log-likelihood of the 
    236         innovations in I{x} given each respective log-volatility value in I{h}. 
    237          
    238         The method returns a tuple with the total log-likelihood value and the 
    239         computed log-volatility array I{h}. 
    240  
    241         CALLS TO THIS METHOD IS WHERE MOST OF THE CPU TIME IS SPENT! 
    242         """ 
    243         # Mostly empty log-volatility array 
    244         h = s.column_stack([h0, s.zeros_like(v)]) 
    245         # Working arrays with intialized values 
    246         if 'lv_work' in self.cache: 
    247             y, ri = self.cache['lv_work'] 
    248         else: 
    249             # Scratchpad with some constants 
    250             y = s.empty((self.p, 4)) 
    251             y[:,0] = s.sqrt(1 - self.g**2) 
    252             y[:,1] = s.log(s.sqrt(2*s.pi) * y[:,0]) 
    253             # Inverse of concurrent cross-correlations between innovation 
    254             # shocks, will raise exception if invalid 
    255             ri = linalg.inv(linalg.cholesky( 
    256                 self.covarMatrix(s.ones(self.p), self.cr), lower=True)) 
    257             self.cache['lv_work'] = y, ri 
    258         # Run the C code where the heavy lifting is done 
    259         L0, L = self.inline('z', 'v', 'x', 'h', 'ri', 'y', 'd', 'e', 'g') 
    260         return L0, L, h[:,1:] 
    261  
    262     def hybridGibbs(self, z, x, rv, sigma, tol=1E-3): 
    263         """ 
    264         Does one iteration of a hybrid Gibbs sampler for the log-volatilities, 
    265         where each conditional draw is done with a Metropolis-Hastings step. 
    266          
    267         Updates the IID normal variates in place. Returns the likelihood of the 
    268         innovations in I{x}, given the simulated log-volatilities, along with 
    269         the last sample's multivariate log-volatility value, and the 
    270         log-probability of the simulated result. 
    271          
    272         The method will account for the effect of log-volatility proposals on 
    273         downstream samples. You can specify the maximum fractional effect to 
    274         account for with I{tol}. Smaller tolerances require more post-sample 
    275         log-volatility recomputations and hence more CPU time. 
    276         """ 
    277         L_total = 0 
    278         N = self.decaySamples(tol) 
    279         # Initialize volatility shocks from cross-correlation of the IID 
    280         # variates, if positive definite... 
    281         v = s.array(rv*z) 
    282         # ...and initial log-volatilites, from the offset alone... 
    283         h0 = self.d 
    284         # ...and a set of random-walk proposals for the IID variates, along 
    285         # with their volatility shocks 
    286         zp, L_result = self.zProposal(z, sigma) 
    287         vp = s.array(rv*zp) 
    288         # Do conditional draws of each sample in turn 
    289         for k in xrange(self.n): 
    290             # We need to compare the current log-volatilities for 
    291             # this sample, given the previous log-volatility sample... 
    292             LV = self.logVolatilities(z[:,k:k+N], v[:,k:k+N], x[:,k:k+N], h0) 
    293             # ...to a log-volatility proposal 
    294             zpk = s.column_stack([zp[:,k], z[:,k+1:k+N]]) 
    295             vpk = s.column_stack([vp[:,k], v[:,k+1:k+N]]) 
    296             LVp = self.logVolatilities(zpk, vpk, x[:,k:k+N], h0) 
    297             # Metropolis-Hastings accept/reject of the proposal 
    298             dL = LVp[1] - LV[1] 
    299             if dL < -600 or not s.isfinite(dL): 
    300                 # Bogus likelihood or infintesimally small probability of 
    301                 # acceptance, just reject 
    302                 accept = False 
    303             elif dL > 0: 
    304                 # Always accept if the proposal's likelihood is better 
    305                 accept = True 
    306             elif self.logUniform() < dL: 
    307                 # Proposal likelihood worse, but accepted 
    308                 L_result += dL 
    309                 accept = True 
    310             else: 
    311                 # Proposal likelihood worse and rejected 
    312                 L_result += s.log(1 - s.exp(dL)) 
    313                 accept = False 
    314             if accept: 
    315                 LV = LVp 
    316                 z[:,k] = zp[:,k] 
    317                 v[:,k] = vp[:,k] 
    318             # Update stuff for the next sample 
    319             h0 = LV[2][:,0] 
    320             L_total += LV[0] 
    321             # If the likelihood at this point is so bad that the parameter is 
    322             # guaranteed not to be resampled, just bail out now and save time 
    323             if L_total < -1E6: 
    324                 return -s.inf, s.zeros_like(h0), 0.0, 0.0 
    325         return L_total, h0, L_result 
    326229     
    327230 
    328231    #--- The API -------------------------------------------------------------- 
    329232     
    330     def likelihood(self, paramContainer, sigma): 
     233    def likelihood(self, paramContainer): 
    331234        """ 
    332235        Call this method with a parameter object that defines: 
     
    338241               draw of log volatilities. 
    339242 
    340             3. The log-likelihood I{L} of my observations given the parameters 
    341                and after this method has done a simulation draw of 
    342                log-volatilities conditional on those parameter values and 
    343                starting with the IID variates in I{z}. 
    344  
    345243        A new 2-D array of log-volatilities will be drawn from a hybrid Gibbs 
    346         simulation, with each Gibbs step driven by a short Metropolis-Hastings 
    347         random walk simulation of the IID variates, followed by 
    348         cross-correlation to form volatility shocks, and VAR(1) to form the 
    349         log-volatilities. The Metropolis-Hastings proposals are scaled based on 
    350         the supplied fractional I{sigma}. The simulation is governed by the 
    351         log-likelihood of my observation data given the supplied parameters an
    352         the simulated log-volatilities. 
     244        simulation, with each Gibbs step driven by a short importance-sampling 
     245        simulation of the IID variates, followed by cross-correlation to form 
     246        volatility shocks, and VAR(1) to form the log-volatilities. The 
     247        importance-sampling proposals are scaled based on the supplied 
     248        fractional I{sigma}. The simulation is governed by the log-likelihood 
     249        of my observation data given the supplied parameters and the simulate
     250        log-volatilities. 
    353251 
    354252        If a L{linalg.LinAlgError} is raised due to an invalid correlation 
     
    357255        the log-volatility simulation: 
    358256         
    359             1. The value of the log-likelihood. 
    360  
    361             2. The last sample's multivariate log-volatility value. 
    362  
    363             3. The IID normal variates underlying the log-volatilities. 
    364  
    365             4. The log-probability of the simulated result, normalized to be 
    366                independent of the number of hybrid Gibbs steps. 
    367  
    368             5. The mean acceptance rate of the Metropolis-Hastings proposals
     257            1. The log-likelihood of my observations given the simulated 
     258               log-volatility values. 
     259 
     260            2. The log-probability of the simulated log-volatilities. 
     261 
     262            3. The IID normal variates underlying the simulated 
     263               log-volatilities. 
     264 
     265            4. The multivariate log-volatility value for the last time-series 
     266               sample, useful for extrapolating from the fitted model
    369267         
    370268        The returned likelihood does not consider the prior probability of the 
     
    378276        # ...including the latent parameter of IID normal variates 
    379277        z = paramContainer.z.copy() 
    380         # Concurrent cross-correlations between volatility shocks, bail out 
    381         # with no result if invalid 
     278 
     279        # Set up arary variables for the inline call, including innovations as 
     280        # residuals of my observations run through the reversed VAR(1)... 
     281        x = self.var.reverse(self.a, self.b) 
     282        # ... the log-volatility of the last time-series sample... 
     283        h = s.empty(self.p) 
     284        # ...concurrent cross-correlations between volatility shocks and 
     285        # inverse of concurrent cross-correlations between innovation shocks 
     286        # (we'll bail out with no result if either is invalid), and... 
    382287        try: 
    383             rv = s.matrix(linalg.cholesky( 
    384                 self.covarMatrix(self.fs, self.fr), lower=True)) 
     288            rz = linalg.cholesky( 
     289                self.covarMatrix(self.fs, self.fr), lower=True) 
     290            ri = linalg.inv(linalg.cholesky( 
     291                self.covarMatrix(s.ones(self.p), self.cr), lower=True)) 
    385292        except linalg.LinAlgError: 
    386293            return 
    387         # Innovations as residuals of the reversed VAR(1) 
    388         x = self.var.reverse(self.a, self.b) 
     294        # ...scaling constants for multivariate normal pdf 
     295        sc[:,0] = s.sqrt(1 - self.g**2) 
     296        sc[:,1] = s.log(s.sqrt(2*s.pi) * sc[:,0]) 
     297 
    389298        # Run the hybrid Gibbs rounds, returning the likelihood from the last 
    390         # one along with the state of the IID variate vector at that point, 
    391         # bailing out with no result if invalid parameter raises exception 
    392         L_result = 0 
    393         acceptRates = [] 
    394         try: 
    395             for k in xrange(self.Mv): 
    396                 info = self.hybridGibbs(z, x, rv, sigma) 
    397                 L_result += info[2] 
    398                 acceptRates.append(info[3]) 
    399         except linalg.LinAlgError: 
    400             return 
    401         return info[0], info[1], z, L_result/self.Mv, sum(acceptRates)/self.Mv 
    402      
     299        # one along with the state of the IID variate vector at that point. 
     300        Lx = 0 
     301        Lp = 0 
     302        for k in xrange(self.Mv): 
     303            result = self.inline( 
     304                'z', 'h', 'x', 'w', 'n', 'd', 'e', 'g', 'rz', 'ri', 'sc', 
     305                w=self.wiggle, n=self.N1) 
     306            Lx += result[0] 
     307            Lp += result[1] 
     308        return Lx, Lp, z, h 
  • projects/AsynCluster/trunk/svpmc/test/test_model.py

    r183 r184  
    461461    struct sample S; 
    462462    struct parameters P; 
     463    double xk[Nx[0]]; 
    463464 
    464465    P.g = g_array; P.ri = ri_array; P.sc = sc_array; 
    465     S.z = (double *) malloc(sizeof(double) * Nz[0]); 
    466     S.h = (double *) calloc(sizeof(double), Nz[0]); 
    467     S.x0 = (double *) malloc(sizeof(double) * Nz[0]); 
    468     S.x1 = (double *) malloc(sizeof(double) * Nz[0]); 
    469      
    470     for(k0=0; k0<Nz[1]; k0++) { 
    471       for(k1=0; k1<Nz[0]; k1++) 
     466 
     467    S.Lx = 0; 
     468    S.z = (double *) malloc(sizeof(double) * Nx[0]); 
     469    S.h = (double *) calloc(sizeof(double), Nx[0]); 
     470    S.x0 = (double *) malloc(sizeof(double) * Nx[0]); 
     471    S.x1 = (double *) malloc(sizeof(double) * Nx[0]); 
     472     
     473    for(k0=0; k0<Nx[1]; k0++) { 
     474      for(k1=0; k1<Nx[0]; k1++) { 
     475        xk[k1] = X2(k1, k0); 
    472476        PA1(S.z, k1) = Z2(k1, k0); 
     477        PA1(S.h, k1) = H2(k1, k0); 
     478      } 
    473479      // This is what's being tested 
    474       logLikelihood(&S, &P, x); 
    475       for(k1=0; k1<Nz[0]; k1++) 
    476         H2(k1, k0) = PA1(S.h, k1); 
     480      logLikelihood(&S, &P, xk); 
    477481    } 
     482    return_val = S.Lx; 
    478483    """ 
    479484     
    480     def _draw_h(self, d, e, fs, fr, z=None): 
    481         if z is None: 
    482             z = s.randn(len(d), self.N) 
    483         h = s.zeros_like(z) 
    484         rz = s.array(self._rv(fs, fr)) 
    485         self.inline( 
    486             self.code, z=z, d=s.array(d), e=s.array(e), h=h, rz=rz) 
    487         return h 
    488  
    489     def _check_L(self, distObj, hValue): 
    490         args = [hValue * s.ones((1, self.N)) for k in (0, 1)] + \ 
    491                [None] + \ 
    492                [s.zeros(1)] 
    493         # Test 100 points from -8 to +8, most of them close to zero 
    494         for x0 in s.linspace(-2, +2, 100): 
    495             x0 = x0**3 
    496             args[2] = x0 * s.ones((1, self.N)) 
    497             L0, L, h = self.model.logVolatilities(*args) 
    498             self.failUnlessAlmostEqual(s.exp(L0), distObj.pdf(x0), 6) 
    499             self.failUnlessAlmostEqual(self.N*L0, L) 
    500             self.failUnless(s.equal(h, hValue).all()) 
    501      
    502     def test_hZeros(self): 
    503         self._check_L(stats.distributions.norm(0, 1), 0) 
    504  
    505     def test_hOnes(self): 
    506         self._check_L(stats.distributions.norm(0, s.sqrt(s.exp(1))), 1) 
    507  
    508     def test_hMinusTwos(self): 
    509         self._check_L(stats.distributions.norm(0, s.sqrt(s.exp(-2))), -2) 
    510  
     485    def _logLikelihood(self, x, logVol): 
     486        Nt = x.shape[0] 
     487        sc = s.column_stack([ 
     488            s.ones(Nt), 
     489            s.log(s.sqrt(2*s.pi)) * s.ones(Nt)]) 
     490        return self.inline( 
     491            self.code, 
     492            x=x, z=s.zeros(Nt), h=logVol*s.ones(Nt), 
     493            g=s.zeros(Nt), ri=s.identity(Nt), sc=sc) 
     494     
     495    def test_basic(self): 
     496        for logVol in (-2, -1, 0, +1): 
     497            distObj = stats.norm(0, s.exp(0.5*logVol)) 
     498            for x in s.linspace(-2, +2, 10): 
     499                Lx = self._logLikelihood(s.array([[x]]), logVol) 
     500                self.failUnlessAlmostEqual(Lx, s.log(distObj.pdf(x))) 
     501 
     502    def test_Lx_accumulates(self): 
     503        # TODO 
     504        Ns = 2 
     505        Lx = self._logLikelihood(s.ones((1,Ns)), 0) 
     506        distObj = stats.norm(0, 1) 
     507        self.failUnlessAlmostEqual(Lx, s.log(Ns*distObj.pdf(1))) 
     508             
    511509 
    512510class Test_Model_likelihood(BaseCase):