root/projects/AsynCluster/trunk/svpmc/tseries.py

Revision 164, 9.7 kB (checked in by edsuom, 7 months ago)

Working on ProjectManager?, pulling things together

Line 
1 # svpmc: Stochastic Volatility Inference via Population Monte Carlo
2 #
3 # Copyright (C) 2007-2008 by Edwin A. Suominen, http://www.eepatents.com
4 #
5 # This program is free software; you can redistribute it and/or modify it under
6 # the terms of the GNU General Public License as published by the Free Software
7 # Foundation; either version 2 of the License, or (at your option) any later
8 # version.
9 #
10 # This program is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 # FOR A PARTICULAR PURPOSE.  See the file COPYING for more details.
13 #
14 # You should have received a copy of the GNU General Public License along with
15 # this program; if not, write to the Free Software Foundation, Inc., 51
16 # Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
17
18 """
19 Time series data sources.
20 """
21
22 import re, os.path
23 import scipy as s
24 from twisted.spread import pb
25
26 from twisted_goodies.pybywire import params
27
28
29 def transform(f):
30     """
31     Decorate a method of L{TimeSeries} with me to allow it optionally accept a vector
32     of values to be transformed. If the vector is supplied, the method will
33     transform that and return the result. Otherwise it will transform the
34     instance's values in-place and, for convenience, return a reference to the
35     instance.
36
37     Keywords can be included and will not affect whether the method is
38     transformative or not.
39     """
40     def wrapper(self, *Y, **kw):
41         if Y:
42             YY = Y[0]
43         else:
44             YY = self.data[:,1]
45         N1 = len(YY)
46         YY = f(self, YY, **kw)
47         if Y:
48             return YY
49         self.Y(YY)
50         return self
51
52     wrapper.__name__ = f.__name__
53     return wrapper
54    
55
56 class Loader(object):
57     """
58     Base class for file and string loaders. You can call an instance of those
59     classes to get a sorted 2-D array in which each row contains the year and
60     value of one data point loaded from the file.
61     """
62     monthList = [
63         'jan', 'feb', 'mar', 'apr', 'may', 'jun',
64         'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
65     reList = [
66         (x, re.compile("^\s*%s\s+([0-9\.]+)\s*$" % y))
67         for x, y in [
68         ('ymd',
69          "(\d{4})[\-\./](\d{1,2})[\-\./](\d{1,2})"),
70         ('dMy',
71          "(\d{1,2})[\-\ ]([jfmasondJFMASOND][a-z]{2})[\-\ ](\d{2})"),
72         ('ym',
73          "(\d{4})[\-\./](\d{1,2})")]]
74
75     def __call__(self):
76         rows = self.load()
77         rows.sort(key=lambda x: x[0])
78         return s.array(rows)
79        
80     def parseLine(self, line):
81         """
82         Attempts to parse one line of the data file, returning C{None} if the
83         attempt failed. If it succeeded, returns the year (as a float, with
84         months being fractional values) and the value.
85         """
86         if not line or line.strip().startswith('#'):
87             return
88         for dateCode, thisRe in self.reList:
89             m = thisRe.match(line)
90             if m is None:
91                 continue
92             year = 1.0 / 365
93             for k, char in enumerate(dateCode):
94                 if char == 'y':
95                     rawYear = int(m.group(k+1))
96                     if rawYear < 100:
97                         # WARNING: Not Y2K compliant!!!!!
98                         year += 1900+rawYear
99                     else:
100                         year += rawYear
101                 elif char == 'm':
102                     year += (float(m.group(k+1)) - 1) / 12
103                 elif char == 'M':
104                     month = self.monthList.index(m.group(k+1).lower())
105                     year += float(month) / 12
106                 elif char == 'd':
107                     year += ((float(m.group(k+1)) - 1) / 365)
108             return year, float(m.group(k+2))
109
110
111 class FileLoader(Loader):
112     """
113     Instantiate me with the path (absolute or relative to current path) of a
114     source data file. Then you can call my instance to get a sorted 2-D array
115     in which each row contains the date and value of one data point loaded from
116     the file.
117     """
118
119     def __init__(self, filePath):
120         self.filePath = os.path.abspath(filePath)
121
122     def load(self):
123         rows = []
124         fh = open(self.filePath)
125         for line in fh:
126             parsed = self.parseLine(line)
127             if parsed:
128                 rows.append(parsed)
129         fh.close()
130         return rows
131
132
133 class TextLoader(Loader):
134     """
135     Instantiate me with a string of text and I'll load from it as L{FileLoader}
136     would from a file containing that text.
137     """
138     def __init__(self, text):
139         self.textLines = text.split("\n")
140        
141     def load(self):
142         rows = []
143         for line in self.textLines:
144             parsed = self.parseLine(line)
145             if parsed:
146                 rows.append(parsed)
147         del self.textLines
148         return rows
149
150
151 class TimeSeries(params.Parameterized):
152     """
153     I am a time series of stochastic source data, with methods especially
154     suited to financial time series.
155     """
156     _X, _Y = None, None
157
158     paramNames = ['data']
159    
160     def __init__(
161         self, name, filePath=None, text=None, data=None, j=None, k=None):
162         self.name = name
163         if filePath:
164             data = FileLoader(filePath)()
165         elif text:
166             data = TextLoader(text)()
167         params.Parameterized.__init__(self, data=data[slice(j, k),:])
168
169     def X(self):
170         return self.data[:,0]
171
172     def Y(self, value=None):
173         if value is None:
174             return self.data[:,1]
175         if len(value) != s.shape(self.data)[0]:
176             raise ValueError("Transforms can't change data length!")
177         data = s.column_stack([self.data[:,0], value])
178         self.data = data
179
180     def __call__(self):
181         """
182         You can simply call me to get my full Y vector.
183         """
184         return self.Y()
185
186     def __getitem__(self, key):
187         """
188         Returns a copy of me with data limited to a year or range of years.
189         """
190         X = self.X()
191         N = len(X)
192         if isinstance(key, slice):
193             start, stop = key.start, key.stop
194         else:
195             start = stop = key
196         if start is None:
197             start = min(X)
198         elif start < 0:
199             start = max(X) + start
200         if stop is None:
201             stop = max(X)
202         elif stop < 0:
203             stop = max(X) + stop
204         I = s.arange(N)[s.greater_equal(X, start) & s.less(X, stop+1)]
205         if len(I):
206             return self.copy(I[0], I[-1]+1)
207
208     def __hash__(self):
209         X = self.X()
210         return hash((self.name, min(X), max(X), len(X)))
211
212     def __len__(self):
213         return len(self.Y())
214
215     def __abs__(self):
216         return s.absolute(self.Y())
217
218     def transmogrifator(self, other):
219         if isinstance(other, TimeSeries):
220             newTimeSeriesObject = self.intersect(other)
221             otherTimeSeries = other.intersect(self).Y()
222         else:
223             newTimeSeriesObject = self.copy()
224             otherTimeSeries = other
225         yield newTimeSeriesObject, otherTimeSeries
226
227     def __mul__(self, other):
228         for a, b in self.transmogrifator(other):
229             a.Y(a.Y()*b)
230         return a
231
232     def __div__(self, other):
233         for a, b in self.transmogrifator(other):
234             a.Y(a.Y()/b)
235         return a
236
237     def __add__(self, other):
238         for a, b in self.transmogrifator(other):
239             a.Y(a.Y()+b)
240         return a
241
242     def __sub__(self, other):
243         for a, b in self.transmogrifator(other):
244             a.Y(a.Y()-b)
245         return a
246
247     def truncate(self, j=None, k=None):
248         """
249         """
250         self.data = self.data[slice(j, k),:]
251
252     def intersect(self, other):
253         """
254         Returns a copy of myself with elements restricted to those having time
255         series X values that are also found in the I{other} data object.
256         """
257         X = s.intersect1d(self.X(), other.X())
258         I = self.data[:,0].searchsorted(X)
259         return self.copy(I[0], I[-1]+1)
260
261     def copy(self, j=None, k=None):
262         """
263         Returns a copy of me with a copied underlying data array, extents of
264         which may be limited to the slice I{j:k}.
265         """
266         return self.__class__(self.name, data=self.data.copy(), j=j, k=k)
267
268     @transform
269     def inv(self, Y):
270         """
271         Transforms values into their inverse (reciprocal) values.
272         """
273         return 1.0 / Y
274
275     @transform
276     def v2r(self, Y, earnings=None):
277         """
278         Transforms asset values into log returns relative to the previous
279         value. The log return of the first value is set to 0.0.
280
281         If I{earnings} is set to a vector, the log returns will be adjusted to
282         account for increases in holdings proportional to each corresponding
283         earnings amount, or decreases in the case of negative earnings. There
284         must be an earnings value for every asset value except the last one.
285         """
286         N = len(Y)
287         if earnings is None:
288             E = s.zeros(N-1)
289         elif len(earnings) < N-1:
290             raise ValueError(
291                 "Must be an earnings value for every asset value except last")
292         else:
293             E = earnings[:N]
294         R = s.ones(N)
295         for k, earningsThisPeriod in enumerate(E):
296             R[k+1] = (Y[k+1] + earningsThisPeriod) / Y[k]
297         return s.log(R)
298
299     @transform
300     def r2p(self, Y):
301         """
302         Transforms asset log returns into percentage changes.
303         """
304         return 100*(s.exp(Y) - 1.0)
305
306     @transform
307     def p2v(self, Y):
308         """
309         Transforms percentage changes into asset values, with an initial value
310         of 1.00 times the first percentage change.
311         """
312         def iterator():
313             value = 1.0
314             for fractionalChange in (0.01*Y + 1.0):
315                 value *= fractionalChange
316                 yield value
317
318         return s.fromiter(iterator())
Note: See TracBrowser for help on using the browser.