deepYeast/temp.py
2017-09-18 16:31:54 -05:00

33 lines
1.0 KiB
Python

from util import *
"""将positive data中各种size的cell均衡化,避免某一类cell过多"""
NB_EACH_BIN = 1000
filename = './data/rect_data.csv'
data = np.loadtxt(filename, dtype=np.int32, comments='#', delimiter=' ')
np.random.shuffle(data)
X = data[:,4:]
y = data[:,0:4]
area = np.array([(yy[1]-yy[0])*(yy[3]-yy[2]) for yy in list(y)])
sepes = np.histogram(area, bins=20)[1]
counts = np.histogram(area, bins=20)[0]
data2 = list()
for idx in range(20):
data2.append([])
for idx in range(data.shape[0]):
for ii in range(20):
if(sepes[ii] <= area[idx] < sepes[ii+1]):
data2[ii].append(data[idx])
data3 = []
for idx_bin in range(20):
if(len(data2[idx_bin]) != 0):
data3.append(np.vstack(tuple(data2[idx_bin])))
for idx_bin in range(len(data3)):
if(data3[idx_bin].shape[0] > NB_EACH_BIN):
np.random.shuffle(data3[idx_bin])
data3[idx_bin] = data3[idx_bin][0:NB_EACH_BIN, :]
data4 = np.vstack(tuple(data3))
np.savetxt('./data/rect_data2.csv', data4, fmt='%d', header = 'rect_examples, first 4 cols are target')