Notebook
In [845]:
import numpy as np
import scipy as sp
In [846]:
days_in_quarter = 3
num_of_quarters=4
In [847]:
x = np.array([[1, np.nan, 3],  # 0
              [1, np.nan, 3],  # 1
              [1, np.nan, 3],  # 2
              [1, 1, 4],  # 3
              [1, 1, 4],  # 4
              [1, 1, 4],  # 5
              [1, 7, 5],  # 6
              [2, 7, 5],  # 7
              [2, 7, 5],  # 8
              [2, 7, 6],  # 9
              [2, 7, 6],  #10
              [4, 7, 6]]) #11
In [848]:
q1 = np.empty(68)
q1.fill(np.nan)
q2 = np.empty(59)
q2.fill(0.40)
q3 = np.empty(67)
q3.fill(0.40)
q4 = np.empty(63)
q4.fill(0.30)

#x=np.concatenate([q1, q2, q3, q4])
#days_in_quarter = 70
#x
In [849]:
window_length = x.shape[0]
num_of_securities = x.shape[1]

# duplicate the first row and add 1. So there is always a diff at position 0
y = np.nan_to_num(x)
y = np.insert(y, 0, y[0] + 1, axis=0)
In [850]:
y = np.transpose(y)
d = np.diff(y, axis=1)
d
Out[850]:
array([[-1.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  2.],
       [-1.,  0.,  0.,  1.,  0.,  0.,  6.,  0.,  0.,  0.,  0.,  0.],
       [-1.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  0.]])
In [851]:
i = np.where(d != 0)
i
Out[851]:
(array([0, 0, 0, 1, 1, 1, 2, 2, 2, 2]),
 array([ 0,  7, 11,  0,  3,  6,  0,  3,  6,  9]))
In [852]:
f = np.bincount(i[0])
f
Out[852]:
array([3, 3, 4])
In [853]:
sidxs = np.cumsum(f)[:-1]
sidxs
Out[853]:
array([3, 6])
In [854]:
sa = np.split(i[1], sidxs)
sa
Out[854]:
[array([ 0,  7, 11]), array([0, 3, 6]), array([0, 3, 6, 9])]
In [855]:
sa2 = np.empty([0, num_of_quarters], dtype=int)
for a in sa:    
    if len(a) >= num_of_quarters:
        sa2 = np.vstack((sa2,a[-num_of_quarters:]))
    else:
        n = len(a)
        a = np.pad(a, num_of_quarters-n, 'maximum')[-num_of_quarters:]
        d_sa2 = np.diff(a)
        i_sa2 = np.where(d_sa2 > days_in_quarter)[0]
        i_sa2 = i_sa2[0:num_of_quarters-n]
        a = np.insert(a, i_sa2 + 1, a[i_sa2] + days_in_quarter)[:num_of_quarters]        
        sa2 = np.vstack((sa2,a))

sa2
Out[855]:
array([[ 0,  3,  7, 11],
       [ 0,  3,  6,  6],
       [ 0,  3,  6,  9]])
In [856]:
a=sa[1]
a[-4:]
Out[856]:
array([0, 3, 6])
In [857]:
sa2r = np.ravel(sa2)

i0 = np.transpose(range(0, len(sa2)) *np.ones((num_of_quarters,len(sa2)), dtype=int))
i0r = np.ravel(i0)

print i0r
print sa2r
[0 0 0 0 1 1 1 1 2 2 2 2]
[ 0  3  7 11  0  3  6  6  0  3  6  9]
In [858]:
xa=np.transpose(x)[i0r, sa2r]
xa
Out[858]:
array([  1.,   1.,   2.,   4.,  nan,   1.,   7.,   7.,   3.,   4.,   5.,
         6.])
In [859]:
quartely_data = np.transpose(np.reshape(xa, (num_of_securities, 4)))
quartely_data
Out[859]:
array([[  1.,  nan,   3.],
       [  1.,   1.,   4.],
       [  2.,   7.,   5.],
       [  4.,   7.,   6.]])
In [860]:
np.sum(quartely_data, axis=0)
Out[860]:
array([  8.,  nan,  18.])
In [861]:
print np.diff((0,  29,  92, 155, 220))
print np.diff((0,  42, 112, 169, 232))
[29 63 63 65]
[42 70 57 63]