Changeset 133
- Timestamp:
- 04/01/08 01:11:40 (9 months ago)
- Files:
-
- projects/AsynCluster/trunk/pypmc/model.py (modified) (1 diff)
- projects/AsynCluster/trunk/pypmc/test/dm-us.dat (added)
- projects/AsynCluster/trunk/pypmc/test/jy-us.dat (added)
- projects/AsynCluster/trunk/pypmc/test/project.py (modified) (1 diff)
- projects/AsynCluster/trunk/pypmc/test/test_tseries.py (modified) (5 diffs)
- projects/AsynCluster/trunk/pypmc/test/us-bp.dat (added)
- projects/AsynCluster/trunk/pypmc/test/util.py (modified) (2 diffs)
- projects/AsynCluster/trunk/pypmc/tseries.py (modified) (6 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
projects/AsynCluster/trunk/pypmc/model.py
r132 r133 249 249 Stochastic Model for fitting a C{Kx1} vector of random variables to a 250 250 C{KxN} matrix of observations. 251 252 The probability distributions of the random variables is embodied in the my253 I{distribution} attribute, an instance of L{dist.Distribution} or, for a254 combination of random variables, an instance of L{dist.Combo_Distribution}.255 256 The observations are embodied in my I{data} attribute, an instance of257 L{data.Data}.258 259 @ivar N_normParams: The number of normalization parameters, if any,260 that I will be expecting at the beginning of parameter vectors.261 262 @ivar data: A callable object that returns a 1-D array of data values.263 264 @ivar distribution: An instance of L{dist.Distribution}.265 266 251 """ 267 252 keyAttrs = {'N_normParams':None, 'data':None, 'distribution':None} projects/AsynCluster/trunk/pypmc/test/project.py
r132 r133 23 23 from pypmc.tseries import TimeSeries 24 24 25 26 def dataPath(fileName): 27 return os.path.join(projectDir, 'data', fileName)25 def ts(fileName): 26 filePath = "%s.dat" % os.path.join(os.path.dirname(__file__), fileName) 27 return TimeSeries(filePath) 28 28 29 29 30 30 # Data 31 stocks = TimeSeries(dataPath("vtsmx.dat")).v2r()32 bonds = TimeSeries(dataPath("vbmfx.dat")).v2r()33 34 31 data = [ 35 # Name Object32 # Name Data Source 36 33 #-------------------------------------------------------------------------- 37 [ 'stocks', stocks,],38 [ 'bonds', bonds,],39 [ 'sb_diff', stocks-bonds,],34 ['dm', ts("dm-us").v2r() ], 35 ['bp', ts("us-bp").inv().v2r() ], 36 ['jy', ts("jy-us").v2r() ], 40 37 #-------------------------------------------------------------------------- 41 38 ] 42 39 43 44 # Parameter vector, with prior distribution parameters45 46 parameters = [47 # Name Dist loc scale a b48 #--------------------------------------------------------------------------49 [ 'cs', 'beta', 0.30, 0.30, 2, 2, ],50 51 [ 's_drift', 'beta', 0.006, 0.009, 2, 2, ],52 [ 's_gamma', 'beta', 0.0, 9.0, 2, 2, ],53 [ 's_theta', 'beta', 0.004, 0.010, 2, 2, ],54 [ 's_alpha', 'beta', 0.1, 6.0, 2, 3, ],55 [ 's_rho', 'beta', -0.9, 0.6, 2, 2, ],56 57 [ 'b_drift_os', 'beta', 0.002, 0.004, 2, 2, ],58 [ 'b_gamma', 'beta', 0.0, 0.5, 2, 3, ],59 [ 'b_theta', 'beta', 0.0001, 0.0010, 2, 4, ],60 [ 'b_alpha', 'beta', 0.01, 0.3, 2, 3, ],61 [ 'b_rho', 'beta', -0.9, 0.3, 2, 2, ],62 63 [ 'sb_drift', 'beta', -0.010, 0.010, 2, 2, ],64 [ 'sb_gamma', 'beta', 0.1, 11.0, 2, 2, ],65 [ 'sb_theta', 'beta', 0.00003, 0.00007, 2, 2, ],66 [ 'sb_alpha', 'beta', 0.1, 4.00, 2, 2, ],67 [ 'sb_rho', 'beta', -0.9, 0.7, 2, 2, ],68 #--------------------------------------------------------------------------69 ]70 71 72 # Expressions, computed on the fly from parameters and supplied to models in73 # place thereof74 expressions = [75 # Name Parameter Expression76 #--------------------------------------------------------------------------77 [ 's_kappa', '(2*s_gamma*s_theta/s_alpha)**0.5' ],78 [ 'b_kappa', '(2*b_gamma*b_theta/b_alpha)**0.5' ],79 [ 'sb_kappa', '(2*sb_gamma*sb_theta/sb_alpha)**0.5' ],80 #--------------------------------------------------------------------------81 ]82 83 84 # Distributions for random variables or linear combinations thereof85 86 sDist = dist.Stochastic_Volatility()87 bDist = dist.Stochastic_Volatility()88 sbDist = dist.Stochastic_Volatility()89 90 distributions = [91 # Name Object92 #--------------------------------------------------------------------------93 ['stocks', dist.Combo_Distribution(dists=[sDist, sbDist]) ],94 ['bonds', dist.Combo_Distribution(dists=[bDist, sbDist]) ],95 ['sb_diff', dist.Combo_Distribution(dists=[sDist, bDist, sbDist]) ],96 #--------------------------------------------------------------------------97 ]98 99 100 # Models101 # Use 'U' as an expression to denote the unit value, i.e., one102 103 models = [104 # [Label,105 # Data, Distribution,106 # [normParams ...],107 # [distParams ...]]108 # -------------------------------------------------------------------------109 ["Vanguard Stock Fund (VTSMX)",110 'stocks', 'stocks', 0.65,111 ['s_drift+cs*sb_drift'],112 ['cs', 's_gamma', 's_theta', 's_kappa', 's_rho',113 'U', 'sb_gamma', 'sb_theta', 'sb_kappa', 'sb_rho']],114 115 ["Vanguard Bond Fund (VBMFX)",116 'bonds', 'bonds', 0.35,117 ['b_drift_os'],118 ['U', 'b_gamma', 'b_theta', 'b_kappa', 'b_rho',119 'U', 'sb_gamma', 'sb_theta', 'sb_kappa', 'sb_rho']],120 121 ["VTSMX vs. VBMFX",122 'sb_diff', 'sb_diff', None,123 ['s_drift+cs*sb_drift-b_drift_os'],124 ['cs', 's_gamma', 's_theta', 's_kappa', 's_rho',125 '-U', 'b_gamma', 'b_theta', 'b_kappa', 'b_rho',126 'cs-1', 'sb_gamma', 'sb_theta', 'sb_kappa', 'sb_rho']],127 #--------------------------------------------------------------------------128 ]projects/AsynCluster/trunk/pypmc/test/test_tseries.py
r132 r133 28 28 from twisted_goodies.pybywire import pack, params 29 29 30 from tseries import TimeSeries 31 30 import tseries 32 31 import util 33 32 … … 64 63 65 64 65 class Test_Loader(util.TestCase): 66 def setUp(self): 67 self.loader = tseries.Loader() 68 69 def check(self, dateField, y, m=1, d=1): 70 for s0 in ("", " ", "\t"): 71 for s1 in (" ", "\t"): 72 for s2 in ("", " ", "\t"): 73 for eValue, vs in ((1.23, "1.23"), (123, "123")): 74 line = "%s%s%s%s%s" % (s0, dateField, s1, vs, s2) 75 year, value = self.loader.parseLine(line) 76 eYear = y + float(m-1)/12 + float(d)/365 77 self.failUnlessAlmostEqual(year, eYear) 78 self.failUnlessEqual(value, eValue) 79 80 def test_ymd(self): 81 self.check("1981-10-01", 1981, 10, 1) 82 self.check("1894-1-1", 1894, 1, 1) 83 self.check("2005-02-28", 2005, 2, 28) 84 85 def test_dMy(self): 86 self.check("01-Oct-81", 1981, 10, 1) 87 self.check("28-feb-92", 1992, 2, 28) 88 self.check("10-Mar-28", 1928, 3, 10) 89 90 def test_ym(self): 91 self.check("1981.10", 1981, 10, 1) 92 self.check("1992-02", 1992, 2, 1) 93 self.check("1928-3", 1928, 3, 1) 94 95 66 96 class Test_TimeSeries(util.TestCase): 67 97 def setUp(self): 68 self.prices = TimeSeries('prices', data=PRICES)98 self.prices = tseries.TimeSeries('prices', data=PRICES) 69 99 70 100 def test_convertInPlace(self): … … 73 103 74 104 def test_v2r_earnings(self): 75 earnings = TimeSeries('earnings', data=EARNINGS).intersect(self.prices)105 earnings = tseries.TimeSeries('earnings', data=EARNINGS).intersect(self.prices) 76 106 self.prices.v2r(earnings=earnings()) 77 107 self.failUnlessAlmostEqual(self.prices()[1], s.log(1.02), places=3) … … 96 126 client to it. 97 127 """ 98 self.prices = TimeSeries('prices', data=PRICES)128 self.prices = tseries.TimeSeries('prices', data=PRICES) 99 129 return self.getReferenceToRoot(self.CopyableReturner(self.prices)) 100 130 … … 102 132 def got(result): 103 133 self.failIfEqual(id(result), id(self.prices)) 104 self.failUnless(isinstance(result, TimeSeries))134 self.failUnless(isinstance(result, tseries.TimeSeries)) 105 135 self.failUnless(s.equal(result.data, self.prices.data).all()) 106 136 projects/AsynCluster/trunk/pypmc/test/util.py
r132 r133 34 34 from twisted.trial import unittest 35 35 36 for packageName in (' dist', 'model'):36 for packageName in ('model',): 37 37 try: 38 38 exec "import %s" % packageName … … 359 359 return pack.Unpacker(string, decode=True)() 360 360 361 362 def gaussianModelFactory():363 """364 Returns a tuple that can be used as args for a ModelManager constructor,365 for posterior sample analysis from a gaussian distribution.366 """367 priors = [368 model.Prior(name='beta', a=2, b=2, loc=-1, scale=2),369 model.Prior(name='beta', a=2, b=2, loc=0.1, scale=1.9)]370 data = Gaussian_Data()371 distribution = dist.Normal_Distribution()372 modelObj = model.Model(373 name="Univariate Gaussian Model",374 N_normParams=1,375 data=data, distribution=distribution)376 paramExprs = ['drift', 'sigma']377 return ['drift', 'sigma'], priors, [(modelObj, paramExprs)]378 379 361 projects/AsynCluster/trunk/pypmc/tseries.py
r132 r133 60 60 value of one data point loaded from the file. 61 61 """ 62 monthList = [ 63 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 64 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'] 65 reList = [ 66 (x, re.compile("^\s*%s\s+([0-9\.]+)\s*$" % y)) 67 for x, y in [ 68 ('ymd', 69 "(\d{4})[\-\./](\d{1,2})[\-\./](\d{1,2})"), 70 ('dMy', 71 "(\d{1,2})[\-\ ]([jfmasondJFMASOND][a-z]{2})[\-\ ](\d{2})"), 72 ('ym', 73 "(\d{4})[\-\./](\d{1,2})")]] 74 62 75 def __call__(self): 63 76 rows = self.load() 64 77 rows.sort(key=lambda x: x[0]) 65 78 return s.array(rows) 79 80 def parseLine(self, line): 81 """ 82 Attempts to parse one line of the data file, returning C{None} if the 83 attempt failed. If it succeeded, returns the year (as a float, with 84 months being fractional values) and the value. 85 """ 86 if not line or line.strip().startswith('#'): 87 return 88 for dateCode, thisRe in self.reList: 89 m = thisRe.match(line) 90 if m is None: 91 continue 92 year = 1.0 / 365 93 for k, char in enumerate(dateCode): 94 if char == 'y': 95 rawYear = int(m.group(k+1)) 96 if rawYear < 100: 97 # WARNING: Not Y2K compliant!!!!! 98 year += 1900+rawYear 99 else: 100 year += rawYear 101 elif char == 'm': 102 year += (float(m.group(k+1)) - 1) / 12 103 elif char == 'M': 104 month = self.monthList.index(m.group(k+1).lower()) 105 year += float(month) / 12 106 elif char == 'd': 107 year += ((float(m.group(k+1)) - 1) / 365) 108 return year, float(m.group(k+2)) 66 109 67 110 … … 70 113 Instantiate me with the path (absolute or relative to current path) of a 71 114 source data file. Then you can call my instance to get a sorted 2-D array 72 in which each row contains the yearand value of one data point loaded from115 in which each row contains the date and value of one data point loaded from 73 116 the file. 74 117 """ 75 reList = [76 re.compile(x) for x in77 "\s*(\d{4})[\-\./](\d{1,2})[\-\./]\d{1,2}\s+([0-9\.]+)",78 "\s*(\d{4})[\-\./](\d{1,2})\s+([0-9\.]+)",79 "\s*(\d{4})\s+([0-9\.]+)"]80 118 81 119 def __init__(self, filePath): … … 91 129 fh.close() 92 130 return rows 93 94 def parseLine(self, line): 95 """ 96 Attempts to parse one line of the data file, returning C{None} if the 97 attempt failed. If it succeeded, returns the year (as a float, with 98 months being fractional values) and the value. 99 """ 100 if not line or line.strip().startswith('#'): 101 return 102 for thisRe in self.reList: 103 m = thisRe.match(line) 104 if m is None: 105 continue 106 matchList = [float(x) for x in m.groups()] 107 year = matchList[0] 108 if len(matchList) > 2: 109 year += (matchList[1] - 1) / 12 110 value = matchList[-1] 111 return year, value 112 113 114 class TextLoader(FileLoader): 131 132 133 class TextLoader(Loader): 115 134 """ 116 135 Instantiate me with a string of text and I'll load from it as L{FileLoader} … … 130 149 131 150 132 class UndatedLoader(FileLoader):133 """134 Instantiate me with the path of an undated (single column, data values135 only) source data file and a start date.136 """137 def __init__(self, filePath, startYear=1900, startMonth=1):138 self.filePath = os.path.abspath(filePath)139 self.year, self.month = startYear, float(startMonth)140 141 def parseLine(self, line):142 """143 Attempts to parse one line of the data file, returning C{None} if the144 attempt failed. If it succeeded, returns the year (as a float, with145 months being fractional values) and the value.146 """147 line = line.strip()148 if not line or line.startswith('#'):149 return150 year = self.year + self.month/12151 self.month += 1152 if self.month == 13:153 self.year += 1154 self.month = 1.0155 return year, float(line)156 157 158 151 class TimeSeries(params.Parameterized): 159 152 """ … … 166 159 167 160 def __init__( 168 self, nameOrFilePath, data=None, j=None, k=None , undated=False):161 self, nameOrFilePath, data=None, j=None, k=None): 169 162 self.name = nameOrFilePath 170 163 if data is None: 171 if undated: 172 data = UndatedLoader(nameOrFilePath)() 173 else: 174 data = FileLoader(nameOrFilePath)() 164 data = FileLoader(nameOrFilePath)() 175 165 elif isinstance(data, str): 176 166 data = TextLoader(data)() … … 277 267 278 268 @transform 269 def inv(self, Y): 270 """ 271 Transforms values into their inverse (reciprocal) values. 272 """ 273 return 1.0 / Y 274 275 @transform 279 276 def v2r(self, Y, earnings=None): 280 277 """
