Package daredare :: Package extern :: Module helpers
[hide private]
[frames] | no frames]

Source Code for Module daredare.extern.helpers

  1  '''(Mostly time-series-related) functions needed and written by Sven Schreiber.
 
  2  
 
  3  This is free but copyrighted software, distributed under the same license terms
 
  4  (as of January 2007) as the 'gretl' program by Allin Cottrell and others, see
 
  5  gretl.sf.net (in short: GPL v2, see www.gnu.org/copyleft/gpl.html).
 
  6  
 
  7  (see end of this file for a changelog)
 
  8  ''' 
  9  from numpy import r_, c_, arange, diff, mean, sqrt, log, mat 
 10  from numpy import asarray, nan 
 11  from numpy.matlib import ones, zeros, rand, eye, empty 
 12  from numpy.linalg import eigh, cholesky, solve, lstsq 
 13  # (lstsq also as tool to determine rank)
 
 14  
 
 15  # some constants/dictionaries first
 
 16  quarter2month = {1: 1, 2: 4, 3: 7, 4: 10} 
 17  # in theory we only need the four months 1, 4, 7, 10, but well...
 
 18  month2quarter = {1: 1, 2: 1, 3: 1, 4: 2, 5: 2, 6: 2, 7: 3, 8: 3, 9: 3, \
 
 19                   10: 4, 11: 4, 12: 4} 
 20  qNumber2qFloat = {1: 0.0, 2: 0.25, 3: 0.5, 4: 0.75} 
 21  mNumber2mFloat = {1: 0.0, 2: 0.0833, 3: 0.1666, 4: 0.2499, 5: 0.3332, \
 
 22                    6: 0.4165, 7: 0.4998, 8: 0.5831, 9: 0.6664, 10: 0.7497, \
 
 23                    11: 0.8330, 12: 0.9163} 
 24  qFracstring2qString = {0.0: 1, 0.25: 2, 0.5: 3, 0.75: 4} 
 25  mFloat2mNumber = {0.0: 1, 0.0833: 2, 0.1666: 3, 0.2499: 4, 0.3332: 5, \
 
 26                    0.4165: 6, 0.4998: 7, 0.5831: 8, 0.6664: 9, 0.7497: 10, \
 
 27                    0.8330: 11, 0.9163: 12} 
 28  # with onetwelfth == 0.0833 approx.
 
 29  
 
 30  from numpy.linalg import lstsq, svd 
 31  from numpy import where 
32 -def rank(m, rcond = 1e-10):
33 ''' 34 Returns the (algebraic, not numpy-jargon) rank of m. 35 ''' 36 svals = svd(m)[1] 37 return where(svals > svals[0]*rcond, 1, 0).sum()
38
39 -def vec(m):
40 ''' 41 Returns all columns of the input as a stacked (column) vector. 42 43 If m is a numpy-array, a 1d-array is returned. For a numpy-matrix m, 44 the output has shape (n*m, 1). 45 ''' 46 return m.T.ravel().T
47 48 from numpy import mat, asarray
49 -def unvec(m, rows, cols):
50 ''' 51 Turns (column) vector into matrix of shape == (rows, cols). 52 53 Also accepts 1d-array input, but always returns numpy matrix. 54 ''' 55 if type(m) == type(mat(m)): 56 assert m.shape[1] == 1 # col vector 57 intype = 'matrix' 58 else: 59 assert len(m.shape) == 1 # 1d array 60 intype = 'array' 61 m = mat(m).T 62 assert cols * rows == m.shape[0] 63 out = m.reshape(cols, rows).T 64 if intype == 'array': return asarray(out) 65 else: return out
66 67 from numpy import mat
68 -def mat2gretlmatstring(m):
69 ''' 70 Turns numpy matrix or array (or scalar!) m into gretl string representation. 71 ''' 72 # mat(m) is necessary because if m is 1d-array, map() would fail 73 out = ';'.join( [ ','.join(map(str, row)) for row in mat(m).tolist() ] ) 74 return '{' + out + '}'
75
76 -def startobs2obslist(startperiod, numofobs):
77 ''' 78 Constructs list of observation labels following the input pattern. 79 80 Example: 81 startperiod = '1999q3', numofobs = 2 -> ['1999q3', '1999q4'] 82 Currently supports only annual (pure number), monthly, quarterly. 83 Years must be in 4-digit format. 84 ''' 85 if startperiod.isdigit(): # pure (integer) number 86 startnumber = int(startperiod) 87 return [ str(startnumber + ix) for ix in range(numofobs) ] 88 elif startperiod[4] in 'qQ': # quarterly dates 89 wrap = 4 90 period = int(startperiod[5]) 91 elif startperiod[4] in 'mM': 92 wrap = 12 93 period = int(startperiod[5:7]) 94 else: raise NotImplementedError 95 96 year = int(startperiod[:4]) 97 out = [str(year) + startperiod[4] + str(period)] 98 for ix in range(numofobs): 99 if period == wrap: 100 period = 1 101 year += 1 102 else: period += 1 103 out.append(str(year) + startperiod[4] + str(period)) 104 105 return out
106 107 import csv 108 from numpy import mat
109 -def writecsv(filename, data, orientation = 'cols', delim = ',', \ 110 varnames = [], obslabels = [], comments = [], commentchar = '# '):
111 ''' 112 Saves array or matrix <data> in csv format in file <filename> (path string). 113 114 <comments> must be passed as a sequence of strings, one for each line, 115 and will be written at the top of the file, each line starting with 116 <commentchar>. 117 <orientation> can be 'cols' or 'rows', determines whether the 118 variable names will be used as column or row headers, and how to treat 119 1d-input. (And observation labels will be written accordingly.) 120 <varnames> and <obslabels> must be sequences of strings. 121 ''' 122 data = mat(data) 123 if orientation == 'rows': 124 colheaders = obslabels 125 rowheaders = varnames 126 cell11 = 'var' 127 else: # 'cols' orientation as fallback 128 colheaders = varnames 129 rowheaders = obslabels 130 cell11 = 'obs' 131 if data.shape[0] == 1: data = data.T # make 1d-array a column vector 132 if len(colheaders) > 0: assert len(colheaders) == data.shape[1] 133 134 # start writing to the file 135 target = csv.writer(open(filename, 'w'), delimiter = delim) 136 target.writerows([ [commentchar + comment] for comment in comments]) 137 # (additional trivial list layer because otherwise the comment string itself 138 # would be split up with the delim character) 139 if len(rowheaders) > 0: 140 assert len(rowheaders) == data.shape[0] 141 target.writerow(colheaders.insert(0, cell11)) 142 else: target.writerow(colheaders) 143 temp = data.tolist() # temp to have list conversion only once 144 for ix in range(len(rowheaders)): temp[ix].insert(0, rowheaders[ix]) 145 target.writerows(temp) 146 147 return 0 # success
148 149 import csv 150 from numpy import mat
151 -def readcsv(filename, delim = ',', commentchar = '#', colheader = 'names', \ 152 rowheader = 'obs'):
153 ''' 154 Read in a csv file (may contain comments starting with commentchar). 155 156 The contents of the first non-comment row and column must be indicated in 157 rowheader and colheader as one of 'names', 'obs' (labels), or None. 158 The array (matrix) of the data is returned as is, i.e. w/o transpose, hence 159 the caller must know whether variables are in rows or columns. 160 If both colheader and rowheader are not None, the upper-left cell (header 161 of the first row/col) is ignored (but must be non-empty). 162 163 Returns a five-element tuple: 164 0. numpy-matrix of the actual data as floats 165 1. orientation of variables: 'cols', 'rows', or 'unknown' 166 2. 1d-array of variable names (or None) 167 3. 1d-array of observation labels (or None) 168 4. the type/frequency of the data 169 (currently one of 'a', 'q', 'm', guessed from the first date label) 170 (if this deduction failed, 'unknown' is returned here) 171 172 Easiest example with upper-left data cell in second row/second column: 173 mydata = readcsv('myfile.csv')[0] 174 ''' 175 read_from = csv.reader(open(filename, 'rb'), delimiter = delim, \ 176 skipinitialspace = True) 177 tempnestedlist = [ line for line in read_from if not \ 178 line[0].strip().startswith(commentchar) ] 179 data = mat(tempnestedlist, dtype = str) 180 181 if colheader == 'names': 182 orientation = 'cols' 183 varnames, data = data[0, :].A1, data[1:, :] 184 if rowheader == 'obs': 185 obslabels, data = data[:, 0].A1, data[:, 1:] 186 varnames = varnames[1:] 187 elif rowheader == 'names': 188 orientation = 'rows' 189 varnames, data = data[:, 0].A1, data[:, 1:] 190 if colheader == 'obs': 191 obslabels, data = data[0, :].A1, data[1:, :] 192 varnames = varnames[1:] 193 elif colheader == 'obs': 194 orientation = 'rows' 195 obslabels, data = data[0, :].A1, data[1:, :] 196 if rowheader == 'names': 197 varnames, data = data[:, 0].A1, data[:, 1:] 198 obslabels = obslabels[1:] 199 elif rowheader == 'obs': 200 orientation = 'cols' 201 obslabels, data = data[:, 0].A1, data[:, 1:] 202 if colheader == 'names': 203 varnames, data = data[0, :].A1, data[1:, :] 204 obslabels = obslabels[1:] 205 else: 206 assert colheader == None # to catch typos, e.g. 'Names', 'OBS' 207 assert rowheader == None 208 orientation = 'unknown' 209 varnames = None 210 obslabels = None 211 212 # detect the dataset type: 213 # annual 214 if len(obslabels[0]) == 4: freq = 'a' 215 # quarterly 216 elif len(obslabels[0]) == 6 and obslabels[0][4] in 'qQ': freq = 'q' 217 # monthly 218 elif len(obslabels[0]) == 7 and obslabels[0][4] in 'mM': freq = 'm' 219 else: freq = 'unknown' 220 221 return data.astype(float), orientation, varnames, obslabels, freq
222 223 from numpy import nan
224 -def floatAndNanConverter(datapoint, nacode = 'na'):
225 ''' 226 Converts nacode to numpy.nan value. 227 228 Also returns other input as float (e.g. for matplotlib's load, asarray). 229 ''' 230 if datapoint == nacode: return nan 231 return float(datapoint)
232
233 -def dateString2dateFloat(datestring):
234 ''' 235 Converts '1999q2' -> 1999.25, '1999m2' -> 1999.0833, etc. 236 237 So far only for quarterly and monthly. 238 ''' 239 year, freq = float(datestring[:4]), datestring[4] 240 assert freq in 'qQmM', 'sorry, only quarterly or monthly' 241 if freq in 'qQ': #quarterly 242 result = year + qNumber2qFloat[int(datestring[5])] 243 elif freq in 'mM': #monthly 244 result = year + mNumber2mFloat[int(datestring[5:7])] 245 return result
246 247 from datetime import date, timedelta
248 -def getQuarterlyDates(startyear, startquarter, t):
249 ''' 250 Constructs a list of quarterly date labels for t obs. 251 252 Algorithm to get a sequence of strings relating to quarterly dates: 253 1. start with first day in the startquarter, e.g. 2006-04-01 254 2. map the month to quarter and make string year + 'q' + quarter 255 3. the longest quarters are 3rd and 4th (2*31 days + 30 days = 92 days), 256 1st the shortest (90 or 91), so add a timedelta (in days, 257 apparently default) of 100 days (anything between 92+1 and 258 sum of shortest quarter plus one month = approx. 118) 259 4. reset the day of that intermediate date to 1 260 5. return to step 2 261 ''' 262 try: 263 y = int(startyear); q = int(startquarter); t = int(t) 264 except: raise TypeError, 'need integers for year, quarter, t' 265 if q not in range(1,5): raise ValueError, 'startquarter input out of range' 266 # create list for date strings: 267 datestrings = [] 268 # step 1.: 269 d = date(y, quarter2month[startquarter], 1) 270 for t in range(t): 271 datestrings.append(str(d.year) + 'Q' + str(month2quarter[d.month])) 272 d += timedelta(100) 273 d = d.replace(day = 1) 274 return datestrings
275 276 from numpy.linalg import svd
277 -def null(m, rcond = 1e-10):
278 rows, cols = m.shape 279 u, svals, vh = svd(m) 280 rk = where(svals > svals[0]*rcond, 1, 0).sum() 281 return u[:, rk:]
282 283 from numpy.matlib import empty, zeros, eye, mat, asarray 284 from numpy.linalg import lstsq
285 -def getOrthColumns(m):
286 ''' 287 Constructs the orthogonally complementing columns of the input. 288 289 Input of the form pxr is assumed to have r<=p, 290 and have either full column rank r or rank 0 (scalar or matrix) 291 Output is of the form px(p-r), except: 292 a) if M square and full rank p, returns scalar 0 293 b) if rank(M)=0 (zero matrix), returns I_p 294 (Note you cannot pass scalar zero, because dimension info would be 295 missing.) 296 Return type is as input type. 297 ''' 298 if type(m) == type(asarray(m)): 299 m = mat(m) 300 output = 'array' 301 else: output = 'matrix' 302 p, r = m.shape 303 # first catch the stupid input case 304 if p < r: raise ValueError, 'need at least as many rows as columns' 305 # we use lstsq(M, ones) just to exploit its rank-finding algorithm, 306 rk = lstsq(m, ones(p).T)[2] 307 # first the square and full rank case: 308 if rk == p: result = zeros((p,0)) # note the shape! hopefully octave-like 309 # then the zero-matrix case (within machine precision): 310 elif rk == 0: result = eye(p) 311 # now the rank-deficient case: 312 elif rk < r: 313 raise ValueError, 'sorry, matrix does not have full column rank' 314 # (what's left should be ok) 315 else: 316 # we have to watch out for zero rows in M, 317 # if they are in the first p-r positions! 318 # so the (probably inefficient) algorithm: 319 # 1. check the rank of each row 320 # 2. if zero, then also put a zero row in c 321 # 3. if not, put the next unit vector in c-row 322 idr = eye(r) 323 idpr = eye(p-r) 324 c = empty([0,r]) # starting point 325 co = empty([0, p-r]) # will hold orth-compl. 326 idrcount = 0 327 for row in range(p): 328 # (must be ones() instead of 1 because of 2d-requirement 329 if lstsq( m[row,:], ones(1) )[2] == 0 or idrcount >= r: 330 c = r_[ c, zeros(r) ] 331 co = r_[ co, idpr[row-idrcount, :] ] 332 else: # row is non-zero, and we haven't used all unit vecs 333 c = r_[ c, idr[idrcount, :] ] 334 co = r_[ co, zeros(p-r) ] 335 idrcount += 1 336 # earlier non-general (=bug) line: c = mat(r_[eye(r), zeros((p-r, r))]) 337 # and: co = mat( r_[zeros((r, p-r)), eye(p-r)] ) 338 # old: 339 # result = ( eye(p) - c * (M.T * c).I * M.T ) * co 340 result = co - c * solve(m.T * c, m.T * co) 341 if output == 'array': return result.A 342 else: return result
343 344 from numpy import mat, asarray
345 -def addLags(m, maxlag):
346 ''' 347 Adds (contiguous) lags as additional columns to the TxN input. 348 349 Early periods first. If maxlag is zero, original input is returned. 350 maxlag rows are deleted (the matrix is shortened) 351 ''' 352 if type(m) == type(asarray(m)): 353 m = mat(m) 354 output = 'array' 355 else: output = 'matrix' 356 T, N = m.shape 357 if type(maxlag) != type(4): 358 raise TypeError, 'addLags: need integer for lag order' 359 if maxlag > m.shape[0]: 360 raise ValueError, 'addLags: sample too short for this lag' 361 temp = m[ maxlag: ,:] # first maxlag periods must be dropped due to lags 362 for lag in range(1, maxlag + 1) : 363 temp = c_[ temp, m[(maxlag-lag):(T-lag) ,:] ] 364 if output == 'array': return asarray(temp) 365 else: return temp
366 367 from numpy.matlib import empty, ones, zeros 368 from numpy import mat, c_, r_
369 -def getDeterministics(nobs, which = 'c', date = 0.5):
370 ''' 371 Returns various useful deterministic terms for a given sample length T. 372 373 Return object is a numpy-matrix-type of dimension Tx(len(which)); 374 (early periods first, where relevant). 375 In the 'which' argument pass a string composed of the following letters, 376 in arbitrary order: 377 c - constant (=1) term 378 t - trend (starting with 0) 379 q - centered quarterly seasonal dummies (starting with 0.75, -0.25...) 380 m - centered monthly seasonal dummies (starting with 11/12, -1/12, ...) 381 l - level shift (date applies) 382 s - slope shift (date applies) 383 i - impulse dummy (date applies) 384 385 If the date argument is a floating point number (between 0 and 1), 386 it is treated as the fraction of the sample where the break occurs. 387 If instead it is an integer between 0 and T, then that observation is 388 treated as the shift date. 389 ''' 390 # some input checks (as well as assignment of shiftperiod): 391 if type(nobs) != type(4): # is not an integer 392 raise TypeError, 'need integer for sample length' 393 if nobs <=0: raise ValueError, 'need positive sample length' 394 if type(date) == type(0.5): #is a float, treat as break fraction 395 if date < 0 or date > 1: 396 raise ValueError, 'need break fraction between 0 and 1' 397 shiftperiod = int(date * nobs) 398 elif type(date) == type(4): # is integer, treat as period number 399 if date not in range(1, nobs+1): 400 raise ValueError, 'need period within sample range' 401 shiftperiod = date 402 else: raise TypeError, 'need float or integer input for date' 403 if type(which) != type('a string'): 404 raise TypeError, 'need string for case spec' 405 # end input checks 406 407 out = empty([nobs,0]) # create starting point 408 if 'c' in which: out = c_[ out, ones(nobs).T ] 409 if 't' in which: out = c_[ out, r_['c', :nobs] ] 410 if 'l' in which: 411 shift = r_[ zeros(shiftperiod).T, ones(nobs-shiftperiod).T ] 412 out = c_[ out, shift ] 413 if 's' in which: 414 slopeshift = r_[ zeros(shiftperiod).T, r_['c', 1:(nobs - shiftperiod + 1)] ] 415 out = c_[ out, slopeshift ] 416 if 'i' in which: 417 impulse = r_[ zeros(shiftperiod).T, ones(1), zeros(nobs-shiftperiod-1).T ] 418 out = c_[ out, impulse ] 419 if 'q' in which or 'Q' in which: 420 # to end of next full year, thus need to slice at T below: 421 q1 = [0.75, -0.25, -0.25, -0.25] * (1 + nobs/4) 422 q2 = [-0.25, 0.75, -0.25, -0.25] * (1 + nobs/4) 423 q3 = [-0.25, -0.25, 0.75, -0.25] * (1 + nobs/4) 424 out = c_[ out, mat(q1[:nobs]).T, mat(q2[:nobs]).T, mat(q3[:nobs]).T ] 425 if 'm' in which or 'M' in which: 426 temp = [-1./12] * 11 427 for month in range(11): 428 temp.insert(month, 1-temp[0]) 429 # again, to end of next full year, thus need to slice at T below: 430 monthly = temp * (1 + nobs/12) # temp is still a list here! 431 out = c_[ out, mat(monthly[:nobs]).T ] 432 return out
433 434 from numpy.matlib import empty
435 -def getImpulseDummies(sampledateslist, periodslist):
436 ''' 437 Returns a (numpy-)matrix of impulse dummies for the specified periods. 438 439 sampledateslist must consist of 1999.25 -style dates (quarterly or monthly). 440 However, because periodslist is probably human-made, it expects strings 441 such as '1999q3' or '1999M12'. 442 Variables in columns. 443 So far only for quarterly and monthly data. 444 ''' 445 nobs = len(sampledateslist) 446 result = empty([nobs,0]) 447 for periodstring in periodslist: 448 period = dateString2dateFloat(periodstring) 449 result = c_[result, getDeterministics(nobs, 'i', \ 450 sampledateslist.index(period))] 451 return result
452 453 from numpy import mat, asarray 454 from numpy.linalg import cholesky, eigh
455 -def geneigsympos(A, B):
456 ''' Solves symmetric-positive-def. generalized eigenvalue problem Az=lBz. 457 458 Takes two real-valued symmetric matrices A and B (B must also be 459 positive-definite) and returns the corresponding (also real-valued) 460 eigenvalues and eigenvectors. 461 462 Return format: as in scipy.linalg.eig, tuple (l, Z); l is taken from eigh 463 output (a 1-dim array of length A.shape[0] ?) ordered ascending, and Z is 464 an array or matrix (depending on type of input A) with the corresponding 465 eigenvectors in columns (hopefully). 466 467 Steps: 468 1. get lower triang Choleski factor of B: L*L.T = B 469 <=> A (LL^-1)' z = l LL' z 470 <=> (L^-1 A L^-1') (L'z) = l (L'z) 471 2. standard eig problem, with same eigvals l 472 3. premultiply eigvecs L'z by L^-1' to get z 473 ''' 474 output = 'matrix' 475 if type(A) == type(asarray(A)): 476 output = 'array' 477 A, B = mat(A), mat(B) 478 # step 1 479 LI = cholesky(B).I 480 # step 2 481 evals, evecs = eigh(LI * A * LI.T) 482 # sort 483 evecs = evecs[:, evals.argsort()] 484 evals.sort() # in-place! 485 # step 3 486 evecs = LI.T * evecs 487 if output == 'array': return evals, asarray(evecs) 488 else: return evals, evecs
489 490 from numpy.matlib import eye, c_
491 -def vecm2varcoeffs(gammas, maxlag, alpha, beta):
492 ''' 493 Converts Vecm coeffs to levels VAR representation. 494 495 Gammas need to be coeffs in shape #endo x (maxlag-1)*#endo, 496 such that contemp_diff = alpha*ect + Gammas * lagged_diffs 497 is okay when contemp_diff is #endo x 1. 498 We expect matrix input! 499 ''' 500 if alpha.shape != beta.shape: # hope this computes for tuples 501 raise ValueError, 'alpha and beta must have equal dim' 502 N_y = alpha.shape[0] 503 if beta.shape[0] != N_y: 504 raise ValueError, "alpha or beta dim doesn't match" 505 if gammas.shape[0] != N_y: 506 raise ValueError, "alpha or gammas dim doesn't match" 507 if gammas.shape[1] != (maxlag-1)*N_y: 508 raise ValueError, "maxlag or gammas dim doesn't match" 509 510 # starting point first lag: 511 levelscoeffs = eye(N_y) + alpha * beta.T + gammas[ : , :N_y ] 512 # intermediate lags: 513 for lag in range(1, maxlag-1): 514 levelscoeffs = c_[ levelscoeffs, gammas[:, N_y*lag : N_y*(lag+1)] - \ 515 gammas[:, N_y*(lag-1) : N_y*lag ] ] 516 # last diff-lag, now this should be N_y x maxlags*N_y: 517 return c_[ levelscoeffs, -gammas[:, -N_y: ] ]
518
519 -def gammas2alternativegammas(gammas, alpha, beta):
520 ''' 521 Converts Vecm-coeffs for ect at t-1 to the ones for ect at t-maxlag. 522 523 The input gammas (shortrun coeffs) refer to a Vecm where the levels are 524 lagged one period. In the alternative representation with the levels 525 lagged maxlag periods the shortrun coeffs are different; the relation is: 526 alt_gamma_i = alpha * beta' + gamma_i 527 528 Actually with numpy's broadcasting the function is a one-liner so this here 529 is mainly for documentation and reference purposes. 530 In terms of the levels VAR coefficients A_i (i=1..maxlag) the gammas are 531 defined as: 532 gamma_i = - \sum_{j=i+1)^maxlag A_j for i=1..maxlag-1; 533 and the alternative gammas (used py Proietti e.g.) are: 534 alt_gamma_i = -I + \sum_{j=1}^i A_j for i=1..maxlag-1. 535 (And \alpha \beta' = -I + \sum_{j=1}^maxlag A_j.) 536 ''' 537 # use broadcasting to do the summation in one step: 538 return alpha * beta.T + gammas
539 540 import os 541 from numpy.matlib import mat
542 -def write_gretl_mat_xml(outfile, matrices, matnames = []):
543 ''' 544 Writes a gretl matrix xml file to transfer matrices. 545 546 outfile should be a path string, 547 matrices is a list of numpy matrices, 548 matnames is a string list of wanted matrix names (if empty, matrices 549 are named m1, m2, etc.) 550 ''' 551 if matnames == []: 552 matnames = ['m' + str(mindex) for mindex in range(len(matrices))] 553 assert len(matrices) == len(matnames) 554 out = open(outfile, 'w') 555 out.write('<?xml version="1.0" encoding="UTF-8"?>' + os.linesep) 556 out.write('<gretl-matrices count="' + str(len(matrices)) + '">' + os.linesep) 557 for m in matrices: 558 out.write('<gretl-matrix name="' + matnames.pop(0) + '" ') 559 out.write('rows="' + str(m.shape[0]) + '" ') 560 out.write('cols="' + str(m.shape[1]) + '">' + os.linesep) 561 for row in m: out.write(str(row).strip('][') + os.linesep) 562 out.write('</gretl-matrix>' + os.linesep) 563 out.write('</gretl-matrices>') 564 out.close()
565 566 ################################ 567 ## now some more econometrically oriented helper functions 568 ################################ 569 570 from numpy.matlib import zeros, mat, asarray
571 -def autocovar(series, LagInput, Demeaned=False):
572 ''' 573 Computes the autocovariance of a uni- or multivariate time series. 574 575 Usage: autocovar(series, Lag [, Demeaned=False]) returns the NxN 576 autocovariance matrix (even for N=1), where series is 577 an TxN matrix holding the N-variable T-period data (early periods first), 578 and Lag specifies the lag at which to compute the autocovariance. 579 Specify Demeaned=True if passing ols-residuals to avoid double demeaning. 580 Returns a numpy-matrix-type. 581 ''' 582 if type(series) == type(asarray(series)): 583 output = 'array' 584 series = mat(series) 585 else: output = 'matrix' 586 t, n = series.shape 587 try: Lag = int(LagInput) 588 except: raise TypeError, 'autocovar: nonsense lag input type' 589 if Demeaned == False: 590 # axis=0 for columns (otherwise does overall-average): 591 xbar = series.mean(axis=0) 592 else: xbar = 0 # seems to broadcast to vector-0 ok (below) 593 result = zeros([n,n]) 594 for tindex in range(Lag, t): 595 xdev1 = series[tindex,:] - xbar 596 xdev2 = series[tindex-Lag, :] - xbar 597 result += xdev1.T * xdev2 598 result /= t 599 if output == 'array': return asarray(result) 600 else: return result
601 602 from numpy.matlib import zeros, mat, asarray
603 -def longrunvar(series, Demeaned = False, LagTrunc = 4):
604 ''' 605 Estimates the long-run variance (aka spectral density at frequency zero) 606 of a uni- or multivariate time series. 607 608 Usage: lrv = longrunvar(series [, Demeaned, LagTrunc]), 609 where series is a TxN matrix holding 610 the N-variable T-period data (early periods first). 611 The Bartlett weighting function is used 612 up to the specified lag truncation (default = 4). 613 Specify Demeaned=True when passing Ols-residuals etc. (default False). 614 Returns an NxN matrix (even for N=1). 615 ''' 616 if type(series) == type(asarray(series)): 617 output = 'array' 618 series = mat(series) 619 else: output = 'matrix' 620 t, n = series.shape 621 622 # set the lag window constant: 623 try: Lag = int(LagTrunc) 624 except: raise TypeError, 'longrunvar: nonsense lag input type' 625 if Lag >= t-1: 626 Lag = int(sqrt(t)) 627 print 'longrunvar warning: not enough data for chosen lag window' 628 print '(was ', LagTrunc, ', reset to ', Lag, ')' 629 630 result = zeros([n,n]) 631 for tau in range(1, Lag+1): 632 Gamma = autocovar(series, tau, Demeaned) # numpy-matrix here 633 #the positive and negative range together: 634 result += (1-tau/(Lag+1)) * (Gamma + Gamma.T) 635 # add the tau=0 part: 636 result += autocovar(series, 0, Demeaned) 637 if output == 'array': return asarray(result) 638 else: return result
639 640 from numpy.matlib import ones, zeros, mat 641 from numpy.linalg import solve
642 -def commontrendstest(series, LagTrunc=4, determ = 'c', breakpoint=0.5):
643 ''' 644 The Nyblom&Harvey(2000)-type tests for K_0 against K>K_0 645 common stochastic trends in time series. 646 647 Usage: 648 commontrendstest(series [, LagTrunc, Deterministics, breakpoint]) 649 returns a 1d N-array with the test statistics (partial sums of relevant 650 eigenvalues), starting with the null hypothesis K_0=N-1 and ending with 651 K_0=0. 652 653 Input: 654 TxN array of data in series (early periods first). 655 656 LagTrunc: 657 determines the truncation lag of the nonparametric estimate of the 658 longrun variance. 659 660 Deterministics: 661 'c' - constant mean, 662 't' - to automatically de-trend the data (linearly), 663 664 or use one of the following models with (one-time) deterministic shifts 665 (see Busetti 2002): 666 '1' - (a string!) for a level shift w/o trend, 667 '2' - for a model with breaks in the mean and the trend slope, 668 '2a' - for a trend model where only the mean shifts. 669 (Case 2b --broken trends with connected segments-- is not implemented.) 670 671 For these models '1' through '2a' the relative breakpoint in the sample can 672 be chosen (otherwise it is ignored). 673 ''' 674 series = mat(series) 675 t, n = series.shape 676 try: Lag = int(LagTrunc) 677 except: raise TypeError, 'commontrendstest: nonsense lag input type' 678 if Lag <= 0: 679 print 'commontrendstest warning: lag trunc too small, set to default!' 680 Lag = 4 681 if type(breakpoint) != type(0.5): # check for floating point input 682 raise TypeError, 'commontrendstest: nonsense breakpoint input type' 683 elif (breakpoint <= 0) or (breakpoint >= 1): 684 raise ValueError, 'commontrendstest: breakpoint not in unit interval' 685 686 if determ == 'c': D = ones(t).T 687 elif determ == 't': D = getDeterministics(t, 'ct') 688 elif determ == '1': D = getDeterministics(t, 'cl', breakpoint) 689 elif determ == '2': D = getDeterministics(t, 'ctls', breakpoint) 690 elif determ == '2a': D = getDeterministics(t, 'ctl', breakpoint) 691 692 # okay, now remove the deterministics: 693 # (by now, D should be Tx(1,2,3, or 4) ) 694 # this should do the projection: 695 Resid = series - D * solve(D.T * D, D.T * series) 696 Cmat = zeros([n,n]) 697 for i in range(t): 698 temp = zeros((1,n)) 699 for tindex in range(i): 700 temp += Resid[tindex,:] 701 Cmat += temp.T * temp 702 Cmat /= t**2 703 Sm = longrunvar(Resid, True, Lag) 704 # (True for data w/o deterministics, because everything removed) 705 706 # generalized eigenvalues, corresponding to det(Cmat- lambda_j Sm)=0 707 try: evals = geneigsympos(Cmat, Sm)[0] 708 except: 709 # most probably Sm wasn't pos-def, which can happen depending on lags, 710 # then we try to switch over to scipy's more general eigenvalues 711 print Sm #to get some insight before everything dies 712 try: 713 from scipy import linalg as sl 714 evals = sl.eigvals(Cmat, Sm) 715 evals.sort() # in-place! 716 except: # we give up, and place -1's in the return 1d-array 717 evals = ones(n).A1 * (-1) 718 # default axis in cumsum works here: 719 return evals.cumsum()
720 721 722 ############################################################# 723 # test cases: 724 if __name__ == '__main__': 725 from numpy.matlib import rand 726 data = rand((100, 3)) 727 print getDeterministics(100, 'ctl', 0.3).shape 728 print getDeterministics(100, 'c', 5).shape 729 print getDeterministics(100, 'ctlsi', 80).shape 730 print getDeterministics(100, 'qmtl', 0.1).shape 731 print autocovar(data, 10) 732 print autocovar(data, 5, True) 733 print longrunvar(data) 734 # the following could raise exceptions due to non-pos-def matrices 735 print commontrendstest(data) 736 print commontrendstest(data,2,'t',0.3) 737 print commontrendstest(data,4,'2a',0.3) 738 print commontrendstest(data,3,'1',0.3) 739 try: print commontrendstest(data,5,'2',0.8) 740 except: print '5 lags failed' 741 # check loop for sorting evals: 742 for run in range(100): 743 m1 = rand((10,5)) 744 m2 = rand((10,5)) 745 m1in = m1.T * m1 746 m2in = m2.T * m2 747 evals = geneigsympos(m1in, m2in)[0] 748 temp = evals 749 temp.sort() 750 for i in range(evals.shape[0]): 751 if temp[i] != evals[i]: 752 raise NotImplementedError, 'nono' 753 #print run 754 print geneigsympos(m1in, m2in) 755 print getQuarterlyDates(1985, 3, 45) 756 #print readGplFile('de-joint-wpu-6dim-1977q1.gpl') 757 writecsv('test.csv', data, orientation = 'rows', \ 758 obslabels = ['one', 'two', 'three'], comments = ['hello', 'again']) 759 print startobs2obslist('2000m12', 10) 760 761 ''' 762 Changelog: 763 15May2007: 764 add write_gretl_matrix_xml(), 765 make commontrendstest() more robust to eigenvalue failures 766 1Feb2007: 767 fix getOrthColumns to return a rx0 matrix if input full rank, 768 and add a null() function for null spaces based on svd, 769 rank() now directly based on svd 770 15Jan2007: 771 new unvec() function 772 11Jan2007: 773 new writecsv() function, 774 deleted writeGpl..., 775 new startobs2obslist(), 776 new vec() function 777 10Jan2007: 778 fixed use of c_ / r_ due to change in numpy API, 779 fix bug in readcsv 780 7Jan2007: 781 rewrote input checks using assert, 782 generalized readcsv (formerly known as readgretlcsv) 783 5Jan2007: 784 explicit sorting of eigenvalues instead of relying on numpy implementation 785 3Jan2007: 786 new and simpler readgretlcsv (after gretl cvs changes), 787 converter for numpy matrix to gretl-type matrix string 788 17Aug2006: 789 fixes for readGplFile, 790 finished writeGplFile 791 16Aug2006: 792 removed obsolete qString2qNumber(), 793 rewrote readGplFile with csv module, scipy or matplotlib not required, 794 started analogous writeGplFile 795 15Aug2006: 796 minor cosmetics 797 12Aug2006: 798 added readGplFile, 799 added getImpulseDummies 800 11Aug2006: 801 added helpers for use with matplotlib and or gpl-formatted csv files, 802 renamed getDetermMatrix to getDeterministics 803 10Aug2006: 804 commented out diagm, can use .diagonal() and diagflat() in numpy 805 21Jul2006: 806 commented out zerosm, onesm, emptym, eyem, randm, which are obsoleted 807 by the new numpy.matlib, 808 what about diag?: still needs to be fixed in numpy, 809 tried to avoid inefficient inverses (and use solve instead), 810 replace asm/asmatrix by mat which now means the same in numpy, 811 try to make makeNumpyMatrix redundant, 812 2Jun2006: 813 added helpers zerosm, onesm, emptym, eyem, diagm to return numpy-matrices, 814 added helper randm including workaround, 815 switched to using ' instead of " where possible, 816 don't add the replaced stuff like zeros etc. to the namespace anymore, 817 15May2006: 818 kron is now in numpy, no need to substitute anymore; 819 evals from geneigsympos are now always returned as array 820 1Mar2006: 821 moved the Vecm class to file Vecmclass.py, and "stole" kron from scipy 822 28Feb2006: 823 add Stock-Watson common-trends calculation 824 20Feb2006: 825 work on deterministic adjustment of GG-decomp 826 14Feb2006: 827 bugfixes and better treatment of S&L deterministics 828 12Feb2006: 829 deterministics estimation a la S&L added to Vecm class, 830 more use of numpy-lstsq-function 31Jan2006: all functions should 831 return arrays or matrix-type according to the input, where that makes 832 sense, i.e. whenever a data matrix is passed to a function(and where 833 the purpose is not explicitly to produce matrices) 834 28Jan2006: 835 bugfixing related to coeffs of restricted variables, wrote function 836 for symmetric-def-gen.eigval problem to remove scipy-dependency 837 19Jan2006: 838 work started on a vecm class 19Jan2006: switched over to 839 raising exceptions instead of home-cooked string generation 840 19Jan2006: 841 functions should all return numpy-matrix-type 842 6Jan2006: 843 switched over to numpy/new-scipy 844 ''' 845