import numpy as np
import scipy as sp
days_in_quarter = 3
num_of_quarters=4
x = np.array([[1, np.nan, 3], # 0
[1, np.nan, 3], # 1
[1, np.nan, 3], # 2
[1, 1, 4], # 3
[1, 1, 4], # 4
[1, 1, 4], # 5
[1, 7, 5], # 6
[2, 7, 5], # 7
[2, 7, 5], # 8
[2, 7, 6], # 9
[2, 7, 6], #10
[4, 7, 6]]) #11
q1 = np.empty(68)
q1.fill(np.nan)
q2 = np.empty(59)
q2.fill(0.40)
q3 = np.empty(67)
q3.fill(0.40)
q4 = np.empty(63)
q4.fill(0.30)
#x=np.concatenate([q1, q2, q3, q4])
#days_in_quarter = 70
#x
window_length = x.shape[0]
num_of_securities = x.shape[1]
# duplicate the first row and add 1. So there is always a diff at position 0
y = np.nan_to_num(x)
y = np.insert(y, 0, y[0] + 1, axis=0)
y = np.transpose(y)
d = np.diff(y, axis=1)
d
i = np.where(d != 0)
i
f = np.bincount(i[0])
f
sidxs = np.cumsum(f)[:-1]
sidxs
sa = np.split(i[1], sidxs)
sa
sa2 = np.empty([0, num_of_quarters], dtype=int)
for a in sa:
if len(a) >= num_of_quarters:
sa2 = np.vstack((sa2,a[-num_of_quarters:]))
else:
n = len(a)
a = np.pad(a, num_of_quarters-n, 'maximum')[-num_of_quarters:]
d_sa2 = np.diff(a)
i_sa2 = np.where(d_sa2 > days_in_quarter)[0]
i_sa2 = i_sa2[0:num_of_quarters-n]
a = np.insert(a, i_sa2 + 1, a[i_sa2] + days_in_quarter)[:num_of_quarters]
sa2 = np.vstack((sa2,a))
sa2
a=sa[1]
a[-4:]
sa2r = np.ravel(sa2)
i0 = np.transpose(range(0, len(sa2)) *np.ones((num_of_quarters,len(sa2)), dtype=int))
i0r = np.ravel(i0)
print i0r
print sa2r
xa=np.transpose(x)[i0r, sa2r]
xa
quartely_data = np.transpose(np.reshape(xa, (num_of_securities, 4)))
quartely_data
np.sum(quartely_data, axis=0)
print np.diff((0, 29, 92, 155, 220))
print np.diff((0, 42, 112, 169, 232))