1 '''(Mostly time-series-related) functions needed and written by Sven Schreiber.
2
3 This is free but copyrighted software, distributed under the same license terms
4 (as of January 2007) as the 'gretl' program by Allin Cottrell and others, see
5 gretl.sf.net (in short: GPL v2, see www.gnu.org/copyleft/gpl.html).
6
7 (see end of this file for a changelog)
8 '''
9 from numpy import r_, c_, arange, diff, mean, sqrt, log, mat
10 from numpy import asarray, nan
11 from numpy.matlib import ones, zeros, rand, eye, empty
12 from numpy.linalg import eigh, cholesky, solve, lstsq
13
14
15
16 quarter2month = {1: 1, 2: 4, 3: 7, 4: 10}
17
18 month2quarter = {1: 1, 2: 1, 3: 1, 4: 2, 5: 2, 6: 2, 7: 3, 8: 3, 9: 3, \
19 10: 4, 11: 4, 12: 4}
20 qNumber2qFloat = {1: 0.0, 2: 0.25, 3: 0.5, 4: 0.75}
21 mNumber2mFloat = {1: 0.0, 2: 0.0833, 3: 0.1666, 4: 0.2499, 5: 0.3332, \
22 6: 0.4165, 7: 0.4998, 8: 0.5831, 9: 0.6664, 10: 0.7497, \
23 11: 0.8330, 12: 0.9163}
24 qFracstring2qString = {0.0: 1, 0.25: 2, 0.5: 3, 0.75: 4}
25 mFloat2mNumber = {0.0: 1, 0.0833: 2, 0.1666: 3, 0.2499: 4, 0.3332: 5, \
26 0.4165: 6, 0.4998: 7, 0.5831: 8, 0.6664: 9, 0.7497: 10, \
27 0.8330: 11, 0.9163: 12}
28
29
30 from numpy.linalg import lstsq, svd
31 from numpy import where
32 -def rank(m, rcond = 1e-10):
33 '''
34 Returns the (algebraic, not numpy-jargon) rank of m.
35 '''
36 svals = svd(m)[1]
37 return where(svals > svals[0]*rcond, 1, 0).sum()
38
40 '''
41 Returns all columns of the input as a stacked (column) vector.
42
43 If m is a numpy-array, a 1d-array is returned. For a numpy-matrix m,
44 the output has shape (n*m, 1).
45 '''
46 return m.T.ravel().T
47
48 from numpy import mat, asarray
50 '''
51 Turns (column) vector into matrix of shape == (rows, cols).
52
53 Also accepts 1d-array input, but always returns numpy matrix.
54 '''
55 if type(m) == type(mat(m)):
56 assert m.shape[1] == 1
57 intype = 'matrix'
58 else:
59 assert len(m.shape) == 1
60 intype = 'array'
61 m = mat(m).T
62 assert cols * rows == m.shape[0]
63 out = m.reshape(cols, rows).T
64 if intype == 'array': return asarray(out)
65 else: return out
66
67 from numpy import mat
69 '''
70 Turns numpy matrix or array (or scalar!) m into gretl string representation.
71 '''
72
73 out = ';'.join( [ ','.join(map(str, row)) for row in mat(m).tolist() ] )
74 return '{' + out + '}'
75
77 '''
78 Constructs list of observation labels following the input pattern.
79
80 Example:
81 startperiod = '1999q3', numofobs = 2 -> ['1999q3', '1999q4']
82 Currently supports only annual (pure number), monthly, quarterly.
83 Years must be in 4-digit format.
84 '''
85 if startperiod.isdigit():
86 startnumber = int(startperiod)
87 return [ str(startnumber + ix) for ix in range(numofobs) ]
88 elif startperiod[4] in 'qQ':
89 wrap = 4
90 period = int(startperiod[5])
91 elif startperiod[4] in 'mM':
92 wrap = 12
93 period = int(startperiod[5:7])
94 else: raise NotImplementedError
95
96 year = int(startperiod[:4])
97 out = [str(year) + startperiod[4] + str(period)]
98 for ix in range(numofobs):
99 if period == wrap:
100 period = 1
101 year += 1
102 else: period += 1
103 out.append(str(year) + startperiod[4] + str(period))
104
105 return out
106
107 import csv
108 from numpy import mat
109 -def writecsv(filename, data, orientation = 'cols', delim = ',', \
110 varnames = [], obslabels = [], comments = [], commentchar = '# '):
111 '''
112 Saves array or matrix <data> in csv format in file <filename> (path string).
113
114 <comments> must be passed as a sequence of strings, one for each line,
115 and will be written at the top of the file, each line starting with
116 <commentchar>.
117 <orientation> can be 'cols' or 'rows', determines whether the
118 variable names will be used as column or row headers, and how to treat
119 1d-input. (And observation labels will be written accordingly.)
120 <varnames> and <obslabels> must be sequences of strings.
121 '''
122 data = mat(data)
123 if orientation == 'rows':
124 colheaders = obslabels
125 rowheaders = varnames
126 cell11 = 'var'
127 else:
128 colheaders = varnames
129 rowheaders = obslabels
130 cell11 = 'obs'
131 if data.shape[0] == 1: data = data.T
132 if len(colheaders) > 0: assert len(colheaders) == data.shape[1]
133
134
135 target = csv.writer(open(filename, 'w'), delimiter = delim)
136 target.writerows([ [commentchar + comment] for comment in comments])
137
138
139 if len(rowheaders) > 0:
140 assert len(rowheaders) == data.shape[0]
141 target.writerow(colheaders.insert(0, cell11))
142 else: target.writerow(colheaders)
143 temp = data.tolist()
144 for ix in range(len(rowheaders)): temp[ix].insert(0, rowheaders[ix])
145 target.writerows(temp)
146
147 return 0
148
149 import csv
150 from numpy import mat
151 -def readcsv(filename, delim = ',', commentchar = '#', colheader = 'names', \
152 rowheader = 'obs'):
153 '''
154 Read in a csv file (may contain comments starting with commentchar).
155
156 The contents of the first non-comment row and column must be indicated in
157 rowheader and colheader as one of 'names', 'obs' (labels), or None.
158 The array (matrix) of the data is returned as is, i.e. w/o transpose, hence
159 the caller must know whether variables are in rows or columns.
160 If both colheader and rowheader are not None, the upper-left cell (header
161 of the first row/col) is ignored (but must be non-empty).
162
163 Returns a five-element tuple:
164 0. numpy-matrix of the actual data as floats
165 1. orientation of variables: 'cols', 'rows', or 'unknown'
166 2. 1d-array of variable names (or None)
167 3. 1d-array of observation labels (or None)
168 4. the type/frequency of the data
169 (currently one of 'a', 'q', 'm', guessed from the first date label)
170 (if this deduction failed, 'unknown' is returned here)
171
172 Easiest example with upper-left data cell in second row/second column:
173 mydata = readcsv('myfile.csv')[0]
174 '''
175 read_from = csv.reader(open(filename, 'rb'), delimiter = delim, \
176 skipinitialspace = True)
177 tempnestedlist = [ line for line in read_from if not \
178 line[0].strip().startswith(commentchar) ]
179 data = mat(tempnestedlist, dtype = str)
180
181 if colheader == 'names':
182 orientation = 'cols'
183 varnames, data = data[0, :].A1, data[1:, :]
184 if rowheader == 'obs':
185 obslabels, data = data[:, 0].A1, data[:, 1:]
186 varnames = varnames[1:]
187 elif rowheader == 'names':
188 orientation = 'rows'
189 varnames, data = data[:, 0].A1, data[:, 1:]
190 if colheader == 'obs':
191 obslabels, data = data[0, :].A1, data[1:, :]
192 varnames = varnames[1:]
193 elif colheader == 'obs':
194 orientation = 'rows'
195 obslabels, data = data[0, :].A1, data[1:, :]
196 if rowheader == 'names':
197 varnames, data = data[:, 0].A1, data[:, 1:]
198 obslabels = obslabels[1:]
199 elif rowheader == 'obs':
200 orientation = 'cols'
201 obslabels, data = data[:, 0].A1, data[:, 1:]
202 if colheader == 'names':
203 varnames, data = data[0, :].A1, data[1:, :]
204 obslabels = obslabels[1:]
205 else:
206 assert colheader == None
207 assert rowheader == None
208 orientation = 'unknown'
209 varnames = None
210 obslabels = None
211
212
213
214 if len(obslabels[0]) == 4: freq = 'a'
215
216 elif len(obslabels[0]) == 6 and obslabels[0][4] in 'qQ': freq = 'q'
217
218 elif len(obslabels[0]) == 7 and obslabels[0][4] in 'mM': freq = 'm'
219 else: freq = 'unknown'
220
221 return data.astype(float), orientation, varnames, obslabels, freq
222
223 from numpy import nan
225 '''
226 Converts nacode to numpy.nan value.
227
228 Also returns other input as float (e.g. for matplotlib's load, asarray).
229 '''
230 if datapoint == nacode: return nan
231 return float(datapoint)
232
234 '''
235 Converts '1999q2' -> 1999.25, '1999m2' -> 1999.0833, etc.
236
237 So far only for quarterly and monthly.
238 '''
239 year, freq = float(datestring[:4]), datestring[4]
240 assert freq in 'qQmM', 'sorry, only quarterly or monthly'
241 if freq in 'qQ':
242 result = year + qNumber2qFloat[int(datestring[5])]
243 elif freq in 'mM':
244 result = year + mNumber2mFloat[int(datestring[5:7])]
245 return result
246
247 from datetime import date, timedelta
249 '''
250 Constructs a list of quarterly date labels for t obs.
251
252 Algorithm to get a sequence of strings relating to quarterly dates:
253 1. start with first day in the startquarter, e.g. 2006-04-01
254 2. map the month to quarter and make string year + 'q' + quarter
255 3. the longest quarters are 3rd and 4th (2*31 days + 30 days = 92 days),
256 1st the shortest (90 or 91), so add a timedelta (in days,
257 apparently default) of 100 days (anything between 92+1 and
258 sum of shortest quarter plus one month = approx. 118)
259 4. reset the day of that intermediate date to 1
260 5. return to step 2
261 '''
262 try:
263 y = int(startyear); q = int(startquarter); t = int(t)
264 except: raise TypeError, 'need integers for year, quarter, t'
265 if q not in range(1,5): raise ValueError, 'startquarter input out of range'
266
267 datestrings = []
268
269 d = date(y, quarter2month[startquarter], 1)
270 for t in range(t):
271 datestrings.append(str(d.year) + 'Q' + str(month2quarter[d.month]))
272 d += timedelta(100)
273 d = d.replace(day = 1)
274 return datestrings
275
276 from numpy.linalg import svd
277 -def null(m, rcond = 1e-10):
278 rows, cols = m.shape
279 u, svals, vh = svd(m)
280 rk = where(svals > svals[0]*rcond, 1, 0).sum()
281 return u[:, rk:]
282
283 from numpy.matlib import empty, zeros, eye, mat, asarray
284 from numpy.linalg import lstsq
286 '''
287 Constructs the orthogonally complementing columns of the input.
288
289 Input of the form pxr is assumed to have r<=p,
290 and have either full column rank r or rank 0 (scalar or matrix)
291 Output is of the form px(p-r), except:
292 a) if M square and full rank p, returns scalar 0
293 b) if rank(M)=0 (zero matrix), returns I_p
294 (Note you cannot pass scalar zero, because dimension info would be
295 missing.)
296 Return type is as input type.
297 '''
298 if type(m) == type(asarray(m)):
299 m = mat(m)
300 output = 'array'
301 else: output = 'matrix'
302 p, r = m.shape
303
304 if p < r: raise ValueError, 'need at least as many rows as columns'
305
306 rk = lstsq(m, ones(p).T)[2]
307
308 if rk == p: result = zeros((p,0))
309
310 elif rk == 0: result = eye(p)
311
312 elif rk < r:
313 raise ValueError, 'sorry, matrix does not have full column rank'
314
315 else:
316
317
318
319
320
321
322 idr = eye(r)
323 idpr = eye(p-r)
324 c = empty([0,r])
325 co = empty([0, p-r])
326 idrcount = 0
327 for row in range(p):
328
329 if lstsq( m[row,:], ones(1) )[2] == 0 or idrcount >= r:
330 c = r_[ c, zeros(r) ]
331 co = r_[ co, idpr[row-idrcount, :] ]
332 else:
333 c = r_[ c, idr[idrcount, :] ]
334 co = r_[ co, zeros(p-r) ]
335 idrcount += 1
336
337
338
339
340 result = co - c * solve(m.T * c, m.T * co)
341 if output == 'array': return result.A
342 else: return result
343
344 from numpy import mat, asarray
346 '''
347 Adds (contiguous) lags as additional columns to the TxN input.
348
349 Early periods first. If maxlag is zero, original input is returned.
350 maxlag rows are deleted (the matrix is shortened)
351 '''
352 if type(m) == type(asarray(m)):
353 m = mat(m)
354 output = 'array'
355 else: output = 'matrix'
356 T, N = m.shape
357 if type(maxlag) != type(4):
358 raise TypeError, 'addLags: need integer for lag order'
359 if maxlag > m.shape[0]:
360 raise ValueError, 'addLags: sample too short for this lag'
361 temp = m[ maxlag: ,:]
362 for lag in range(1, maxlag + 1) :
363 temp = c_[ temp, m[(maxlag-lag):(T-lag) ,:] ]
364 if output == 'array': return asarray(temp)
365 else: return temp
366
367 from numpy.matlib import empty, ones, zeros
368 from numpy import mat, c_, r_
370 '''
371 Returns various useful deterministic terms for a given sample length T.
372
373 Return object is a numpy-matrix-type of dimension Tx(len(which));
374 (early periods first, where relevant).
375 In the 'which' argument pass a string composed of the following letters,
376 in arbitrary order:
377 c - constant (=1) term
378 t - trend (starting with 0)
379 q - centered quarterly seasonal dummies (starting with 0.75, -0.25...)
380 m - centered monthly seasonal dummies (starting with 11/12, -1/12, ...)
381 l - level shift (date applies)
382 s - slope shift (date applies)
383 i - impulse dummy (date applies)
384
385 If the date argument is a floating point number (between 0 and 1),
386 it is treated as the fraction of the sample where the break occurs.
387 If instead it is an integer between 0 and T, then that observation is
388 treated as the shift date.
389 '''
390
391 if type(nobs) != type(4):
392 raise TypeError, 'need integer for sample length'
393 if nobs <=0: raise ValueError, 'need positive sample length'
394 if type(date) == type(0.5):
395 if date < 0 or date > 1:
396 raise ValueError, 'need break fraction between 0 and 1'
397 shiftperiod = int(date * nobs)
398 elif type(date) == type(4):
399 if date not in range(1, nobs+1):
400 raise ValueError, 'need period within sample range'
401 shiftperiod = date
402 else: raise TypeError, 'need float or integer input for date'
403 if type(which) != type('a string'):
404 raise TypeError, 'need string for case spec'
405
406
407 out = empty([nobs,0])
408 if 'c' in which: out = c_[ out, ones(nobs).T ]
409 if 't' in which: out = c_[ out, r_['c', :nobs] ]
410 if 'l' in which:
411 shift = r_[ zeros(shiftperiod).T, ones(nobs-shiftperiod).T ]
412 out = c_[ out, shift ]
413 if 's' in which:
414 slopeshift = r_[ zeros(shiftperiod).T, r_['c', 1:(nobs - shiftperiod + 1)] ]
415 out = c_[ out, slopeshift ]
416 if 'i' in which:
417 impulse = r_[ zeros(shiftperiod).T, ones(1), zeros(nobs-shiftperiod-1).T ]
418 out = c_[ out, impulse ]
419 if 'q' in which or 'Q' in which:
420
421 q1 = [0.75, -0.25, -0.25, -0.25] * (1 + nobs/4)
422 q2 = [-0.25, 0.75, -0.25, -0.25] * (1 + nobs/4)
423 q3 = [-0.25, -0.25, 0.75, -0.25] * (1 + nobs/4)
424 out = c_[ out, mat(q1[:nobs]).T, mat(q2[:nobs]).T, mat(q3[:nobs]).T ]
425 if 'm' in which or 'M' in which:
426 temp = [-1./12] * 11
427 for month in range(11):
428 temp.insert(month, 1-temp[0])
429
430 monthly = temp * (1 + nobs/12)
431 out = c_[ out, mat(monthly[:nobs]).T ]
432 return out
433
434 from numpy.matlib import empty
436 '''
437 Returns a (numpy-)matrix of impulse dummies for the specified periods.
438
439 sampledateslist must consist of 1999.25 -style dates (quarterly or monthly).
440 However, because periodslist is probably human-made, it expects strings
441 such as '1999q3' or '1999M12'.
442 Variables in columns.
443 So far only for quarterly and monthly data.
444 '''
445 nobs = len(sampledateslist)
446 result = empty([nobs,0])
447 for periodstring in periodslist:
448 period = dateString2dateFloat(periodstring)
449 result = c_[result, getDeterministics(nobs, 'i', \
450 sampledateslist.index(period))]
451 return result
452
453 from numpy import mat, asarray
454 from numpy.linalg import cholesky, eigh
456 ''' Solves symmetric-positive-def. generalized eigenvalue problem Az=lBz.
457
458 Takes two real-valued symmetric matrices A and B (B must also be
459 positive-definite) and returns the corresponding (also real-valued)
460 eigenvalues and eigenvectors.
461
462 Return format: as in scipy.linalg.eig, tuple (l, Z); l is taken from eigh
463 output (a 1-dim array of length A.shape[0] ?) ordered ascending, and Z is
464 an array or matrix (depending on type of input A) with the corresponding
465 eigenvectors in columns (hopefully).
466
467 Steps:
468 1. get lower triang Choleski factor of B: L*L.T = B
469 <=> A (LL^-1)' z = l LL' z
470 <=> (L^-1 A L^-1') (L'z) = l (L'z)
471 2. standard eig problem, with same eigvals l
472 3. premultiply eigvecs L'z by L^-1' to get z
473 '''
474 output = 'matrix'
475 if type(A) == type(asarray(A)):
476 output = 'array'
477 A, B = mat(A), mat(B)
478
479 LI = cholesky(B).I
480
481 evals, evecs = eigh(LI * A * LI.T)
482
483 evecs = evecs[:, evals.argsort()]
484 evals.sort()
485
486 evecs = LI.T * evecs
487 if output == 'array': return evals, asarray(evecs)
488 else: return evals, evecs
489
490 from numpy.matlib import eye, c_
492 '''
493 Converts Vecm coeffs to levels VAR representation.
494
495 Gammas need to be coeffs in shape #endo x (maxlag-1)*#endo,
496 such that contemp_diff = alpha*ect + Gammas * lagged_diffs
497 is okay when contemp_diff is #endo x 1.
498 We expect matrix input!
499 '''
500 if alpha.shape != beta.shape:
501 raise ValueError, 'alpha and beta must have equal dim'
502 N_y = alpha.shape[0]
503 if beta.shape[0] != N_y:
504 raise ValueError, "alpha or beta dim doesn't match"
505 if gammas.shape[0] != N_y:
506 raise ValueError, "alpha or gammas dim doesn't match"
507 if gammas.shape[1] != (maxlag-1)*N_y:
508 raise ValueError, "maxlag or gammas dim doesn't match"
509
510
511 levelscoeffs = eye(N_y) + alpha * beta.T + gammas[ : , :N_y ]
512
513 for lag in range(1, maxlag-1):
514 levelscoeffs = c_[ levelscoeffs, gammas[:, N_y*lag : N_y*(lag+1)] - \
515 gammas[:, N_y*(lag-1) : N_y*lag ] ]
516
517 return c_[ levelscoeffs, -gammas[:, -N_y: ] ]
518
520 '''
521 Converts Vecm-coeffs for ect at t-1 to the ones for ect at t-maxlag.
522
523 The input gammas (shortrun coeffs) refer to a Vecm where the levels are
524 lagged one period. In the alternative representation with the levels
525 lagged maxlag periods the shortrun coeffs are different; the relation is:
526 alt_gamma_i = alpha * beta' + gamma_i
527
528 Actually with numpy's broadcasting the function is a one-liner so this here
529 is mainly for documentation and reference purposes.
530 In terms of the levels VAR coefficients A_i (i=1..maxlag) the gammas are
531 defined as:
532 gamma_i = - \sum_{j=i+1)^maxlag A_j for i=1..maxlag-1;
533 and the alternative gammas (used py Proietti e.g.) are:
534 alt_gamma_i = -I + \sum_{j=1}^i A_j for i=1..maxlag-1.
535 (And \alpha \beta' = -I + \sum_{j=1}^maxlag A_j.)
536 '''
537
538 return alpha * beta.T + gammas
539
540 import os
541 from numpy.matlib import mat
543 '''
544 Writes a gretl matrix xml file to transfer matrices.
545
546 outfile should be a path string,
547 matrices is a list of numpy matrices,
548 matnames is a string list of wanted matrix names (if empty, matrices
549 are named m1, m2, etc.)
550 '''
551 if matnames == []:
552 matnames = ['m' + str(mindex) for mindex in range(len(matrices))]
553 assert len(matrices) == len(matnames)
554 out = open(outfile, 'w')
555 out.write('<?xml version="1.0" encoding="UTF-8"?>' + os.linesep)
556 out.write('<gretl-matrices count="' + str(len(matrices)) + '">' + os.linesep)
557 for m in matrices:
558 out.write('<gretl-matrix name="' + matnames.pop(0) + '" ')
559 out.write('rows="' + str(m.shape[0]) + '" ')
560 out.write('cols="' + str(m.shape[1]) + '">' + os.linesep)
561 for row in m: out.write(str(row).strip('][') + os.linesep)
562 out.write('</gretl-matrix>' + os.linesep)
563 out.write('</gretl-matrices>')
564 out.close()
565
566
567
568
569
570 from numpy.matlib import zeros, mat, asarray
571 -def autocovar(series, LagInput, Demeaned=False):
572 '''
573 Computes the autocovariance of a uni- or multivariate time series.
574
575 Usage: autocovar(series, Lag [, Demeaned=False]) returns the NxN
576 autocovariance matrix (even for N=1), where series is
577 an TxN matrix holding the N-variable T-period data (early periods first),
578 and Lag specifies the lag at which to compute the autocovariance.
579 Specify Demeaned=True if passing ols-residuals to avoid double demeaning.
580 Returns a numpy-matrix-type.
581 '''
582 if type(series) == type(asarray(series)):
583 output = 'array'
584 series = mat(series)
585 else: output = 'matrix'
586 t, n = series.shape
587 try: Lag = int(LagInput)
588 except: raise TypeError, 'autocovar: nonsense lag input type'
589 if Demeaned == False:
590
591 xbar = series.mean(axis=0)
592 else: xbar = 0
593 result = zeros([n,n])
594 for tindex in range(Lag, t):
595 xdev1 = series[tindex,:] - xbar
596 xdev2 = series[tindex-Lag, :] - xbar
597 result += xdev1.T * xdev2
598 result /= t
599 if output == 'array': return asarray(result)
600 else: return result
601
602 from numpy.matlib import zeros, mat, asarray
603 -def longrunvar(series, Demeaned = False, LagTrunc = 4):
604 '''
605 Estimates the long-run variance (aka spectral density at frequency zero)
606 of a uni- or multivariate time series.
607
608 Usage: lrv = longrunvar(series [, Demeaned, LagTrunc]),
609 where series is a TxN matrix holding
610 the N-variable T-period data (early periods first).
611 The Bartlett weighting function is used
612 up to the specified lag truncation (default = 4).
613 Specify Demeaned=True when passing Ols-residuals etc. (default False).
614 Returns an NxN matrix (even for N=1).
615 '''
616 if type(series) == type(asarray(series)):
617 output = 'array'
618 series = mat(series)
619 else: output = 'matrix'
620 t, n = series.shape
621
622
623 try: Lag = int(LagTrunc)
624 except: raise TypeError, 'longrunvar: nonsense lag input type'
625 if Lag >= t-1:
626 Lag = int(sqrt(t))
627 print 'longrunvar warning: not enough data for chosen lag window'
628 print '(was ', LagTrunc, ', reset to ', Lag, ')'
629
630 result = zeros([n,n])
631 for tau in range(1, Lag+1):
632 Gamma = autocovar(series, tau, Demeaned)
633
634 result += (1-tau/(Lag+1)) * (Gamma + Gamma.T)
635
636 result += autocovar(series, 0, Demeaned)
637 if output == 'array': return asarray(result)
638 else: return result
639
640 from numpy.matlib import ones, zeros, mat
641 from numpy.linalg import solve
643 '''
644 The Nyblom&Harvey(2000)-type tests for K_0 against K>K_0
645 common stochastic trends in time series.
646
647 Usage:
648 commontrendstest(series [, LagTrunc, Deterministics, breakpoint])
649 returns a 1d N-array with the test statistics (partial sums of relevant
650 eigenvalues), starting with the null hypothesis K_0=N-1 and ending with
651 K_0=0.
652
653 Input:
654 TxN array of data in series (early periods first).
655
656 LagTrunc:
657 determines the truncation lag of the nonparametric estimate of the
658 longrun variance.
659
660 Deterministics:
661 'c' - constant mean,
662 't' - to automatically de-trend the data (linearly),
663
664 or use one of the following models with (one-time) deterministic shifts
665 (see Busetti 2002):
666 '1' - (a string!) for a level shift w/o trend,
667 '2' - for a model with breaks in the mean and the trend slope,
668 '2a' - for a trend model where only the mean shifts.
669 (Case 2b --broken trends with connected segments-- is not implemented.)
670
671 For these models '1' through '2a' the relative breakpoint in the sample can
672 be chosen (otherwise it is ignored).
673 '''
674 series = mat(series)
675 t, n = series.shape
676 try: Lag = int(LagTrunc)
677 except: raise TypeError, 'commontrendstest: nonsense lag input type'
678 if Lag <= 0:
679 print 'commontrendstest warning: lag trunc too small, set to default!'
680 Lag = 4
681 if type(breakpoint) != type(0.5):
682 raise TypeError, 'commontrendstest: nonsense breakpoint input type'
683 elif (breakpoint <= 0) or (breakpoint >= 1):
684 raise ValueError, 'commontrendstest: breakpoint not in unit interval'
685
686 if determ == 'c': D = ones(t).T
687 elif determ == 't': D = getDeterministics(t, 'ct')
688 elif determ == '1': D = getDeterministics(t, 'cl', breakpoint)
689 elif determ == '2': D = getDeterministics(t, 'ctls', breakpoint)
690 elif determ == '2a': D = getDeterministics(t, 'ctl', breakpoint)
691
692
693
694
695 Resid = series - D * solve(D.T * D, D.T * series)
696 Cmat = zeros([n,n])
697 for i in range(t):
698 temp = zeros((1,n))
699 for tindex in range(i):
700 temp += Resid[tindex,:]
701 Cmat += temp.T * temp
702 Cmat /= t**2
703 Sm = longrunvar(Resid, True, Lag)
704
705
706
707 try: evals = geneigsympos(Cmat, Sm)[0]
708 except:
709
710
711 print Sm
712 try:
713 from scipy import linalg as sl
714 evals = sl.eigvals(Cmat, Sm)
715 evals.sort()
716 except:
717 evals = ones(n).A1 * (-1)
718
719 return evals.cumsum()
720
721
722
723
724 if __name__ == '__main__':
725 from numpy.matlib import rand
726 data = rand((100, 3))
727 print getDeterministics(100, 'ctl', 0.3).shape
728 print getDeterministics(100, 'c', 5).shape
729 print getDeterministics(100, 'ctlsi', 80).shape
730 print getDeterministics(100, 'qmtl', 0.1).shape
731 print autocovar(data, 10)
732 print autocovar(data, 5, True)
733 print longrunvar(data)
734
735 print commontrendstest(data)
736 print commontrendstest(data,2,'t',0.3)
737 print commontrendstest(data,4,'2a',0.3)
738 print commontrendstest(data,3,'1',0.3)
739 try: print commontrendstest(data,5,'2',0.8)
740 except: print '5 lags failed'
741
742 for run in range(100):
743 m1 = rand((10,5))
744 m2 = rand((10,5))
745 m1in = m1.T * m1
746 m2in = m2.T * m2
747 evals = geneigsympos(m1in, m2in)[0]
748 temp = evals
749 temp.sort()
750 for i in range(evals.shape[0]):
751 if temp[i] != evals[i]:
752 raise NotImplementedError, 'nono'
753
754 print geneigsympos(m1in, m2in)
755 print getQuarterlyDates(1985, 3, 45)
756
757 writecsv('test.csv', data, orientation = 'rows', \
758 obslabels = ['one', 'two', 'three'], comments = ['hello', 'again'])
759 print startobs2obslist('2000m12', 10)
760
761 '''
762 Changelog:
763 15May2007:
764 add write_gretl_matrix_xml(),
765 make commontrendstest() more robust to eigenvalue failures
766 1Feb2007:
767 fix getOrthColumns to return a rx0 matrix if input full rank,
768 and add a null() function for null spaces based on svd,
769 rank() now directly based on svd
770 15Jan2007:
771 new unvec() function
772 11Jan2007:
773 new writecsv() function,
774 deleted writeGpl...,
775 new startobs2obslist(),
776 new vec() function
777 10Jan2007:
778 fixed use of c_ / r_ due to change in numpy API,
779 fix bug in readcsv
780 7Jan2007:
781 rewrote input checks using assert,
782 generalized readcsv (formerly known as readgretlcsv)
783 5Jan2007:
784 explicit sorting of eigenvalues instead of relying on numpy implementation
785 3Jan2007:
786 new and simpler readgretlcsv (after gretl cvs changes),
787 converter for numpy matrix to gretl-type matrix string
788 17Aug2006:
789 fixes for readGplFile,
790 finished writeGplFile
791 16Aug2006:
792 removed obsolete qString2qNumber(),
793 rewrote readGplFile with csv module, scipy or matplotlib not required,
794 started analogous writeGplFile
795 15Aug2006:
796 minor cosmetics
797 12Aug2006:
798 added readGplFile,
799 added getImpulseDummies
800 11Aug2006:
801 added helpers for use with matplotlib and or gpl-formatted csv files,
802 renamed getDetermMatrix to getDeterministics
803 10Aug2006:
804 commented out diagm, can use .diagonal() and diagflat() in numpy
805 21Jul2006:
806 commented out zerosm, onesm, emptym, eyem, randm, which are obsoleted
807 by the new numpy.matlib,
808 what about diag?: still needs to be fixed in numpy,
809 tried to avoid inefficient inverses (and use solve instead),
810 replace asm/asmatrix by mat which now means the same in numpy,
811 try to make makeNumpyMatrix redundant,
812 2Jun2006:
813 added helpers zerosm, onesm, emptym, eyem, diagm to return numpy-matrices,
814 added helper randm including workaround,
815 switched to using ' instead of " where possible,
816 don't add the replaced stuff like zeros etc. to the namespace anymore,
817 15May2006:
818 kron is now in numpy, no need to substitute anymore;
819 evals from geneigsympos are now always returned as array
820 1Mar2006:
821 moved the Vecm class to file Vecmclass.py, and "stole" kron from scipy
822 28Feb2006:
823 add Stock-Watson common-trends calculation
824 20Feb2006:
825 work on deterministic adjustment of GG-decomp
826 14Feb2006:
827 bugfixes and better treatment of S&L deterministics
828 12Feb2006:
829 deterministics estimation a la S&L added to Vecm class,
830 more use of numpy-lstsq-function 31Jan2006: all functions should
831 return arrays or matrix-type according to the input, where that makes
832 sense, i.e. whenever a data matrix is passed to a function(and where
833 the purpose is not explicitly to produce matrices)
834 28Jan2006:
835 bugfixing related to coeffs of restricted variables, wrote function
836 for symmetric-def-gen.eigval problem to remove scipy-dependency
837 19Jan2006:
838 work started on a vecm class 19Jan2006: switched over to
839 raising exceptions instead of home-cooked string generation
840 19Jan2006:
841 functions should all return numpy-matrix-type
842 6Jan2006:
843 switched over to numpy/new-scipy
844 '''
845