Commit 5a2ccebd by Genevieve Flaspohler

changed file strcutrue

parent 490456e3
......@@ -154,11 +154,11 @@ if __name__ == '__main__':
parser.add_argument("-i", "--image_size", type = int, default = 400, help = "Size of the downsampled images, in pixels by pixels.")
parser.add_argument("-c", "--image_channels", type = int, default = 3, help = "Number of color channels in image.")
parser.add_argument("-p", "--pickle_prefix", default = 'd20150419_19', help = "Prefix of the pickle files containing the data.")
parser.add_argument("-m", "--intermed_path", default = os.path.join(root_dir, 'data/panama/d20150419_19/intermed'), help = "Path to store intermediate data representations.")
parser.add_argument("-n", "--recon_path", default = os.path.join(root_dir, 'data/panama/d20150419_19/recon'), help = "Path to store reconstruction images.")
parser.add_argument("--num_pickles", type = int, default = 22, help = "The number of pickle files your data are stored in.")
parser.add_argument("--num_images", type = int, default = 2296, help = "The total number of image files within the pickles.")
parser.add_argument("-p", "--pickle_prefix", default = 'missionI', help = "Prefix of the pickle files containing the data.")
parser.add_argument("-m", "--intermed_path", default = os.path.join(root_dir, 'intermed/'), help = "Path to store intermediate data representations.")
parser.add_argument("-n", "--recon_path", default = os.path.join(root_dir, 'recon/'), help = "Path to store reconstruction images.")
parser.add_argument("--num_pickles", type = int, default = 11, help = "The number of pickle files your data are stored in.")
parser.add_argument("--num_images", type = int, default = 1116, help = "The total number of image files within the pickles.")
parser.add_argument("-s", "--stride", type = int, default = 2, help = "Convolutional stride.")
parser.add_argument("-w", "--window", type = int, default = 10, help = "Size of the convolutional filter.")
parser.add_argument("-o", "--output", type = int, default = 5, help = "Number of output channels in the bottleneck layer")
......@@ -191,35 +191,29 @@ if __name__ == '__main__':
n_epochs = args.epochs
batch_size = args.batch_size
datasets = os.listdir(os.path.join(root_dir, 'data/panama/'))
datasets = [x for x in datasets if x[0] == 'd']
#for i, PICKLE_RAW in enumerate(datasets):
# Added to automate finding the number of pickles files and the number of images files
IMAGE_PATH = os.path.join(root_dir, 'data/panama/' + PICKLE_RAW + '/intermed')
RECON_PATH = os.path.join(root_dir, 'data/panama/' + PICKLE_RAW + '/recon')
IMAGE_PATH = args.intermed_path
RECON_PATH = args.recon_path
if not os.path.exists(IMAGE_PATH):
os.makedirs(IMAGE_PATH)
if not os.path.exists(RECON_PATH):
os.makedirs(RECON_PATH)
image_files = glob.iglob(os.path.join(root_dir, 'data/panama/' + PICKLE_RAW + '/images/*.jpg'))
pickle_files = glob.iglob(os.path.join(root_dir, 'data/full_pickles/*' + PICKLE_RAW + '*.pickle'))
image_files = glob.iglob(os.path.join(root_dir, 'images/*.jpg'))
pickle_files = glob.iglob(os.path.join(root_dir, 'pickles/*' + PICKLE_RAW + '*.pickle'))
NUM_IMAGES = len(list(image_files))
NUM_FILES= len(list(pickle_files))
NUM_FILES = len(list(pickle_files))
PICKLE_FILE = 'panama_' + args.pickle_prefix + '.400.all.'
# PICKLE_FILE = args.pickle_prefix + '.400.all.'
# End addtion
# Initialize data array and read data from pickle files
X = np.zeros((NUM_IMAGES, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))
mean_img = 0;
for i in xrange(NUM_FILES):
pickle_file = os.path.join(root_dir, 'data/full_pickles/' + PICKLE_FILE + str(i) + '.pickle')
for pickle_file in pickle_files:
#pickle_file = os.path.join(root_dir, 'data/full_pickles/' + PICKLE_FILE + str(i) + '.pickle')
print "Loading file", pickle_file
temp = Utils.load_pickled_dataset(pickle_file)
X[i*100:i*100+temp['x'].shape[0]] = temp['x']
......@@ -322,12 +316,17 @@ if __name__ == '__main__':
channels = np.zeros((intermed.shape[3]+1,)).astype(int)
'''
for x in xrange(intermed.shape[1]):
for y in xrange(intermed.shape[2]):
max_channel = np.argmax(intermed[0, x, y, :])
# Index plus one to account for adding the timestep as the first entry
channels[max_channel+1] += 1;
#channels[max_channel+1] += 1;
'''
for x in xrange(intermed.shape[3]):
sum = np.sum(intermed[0, :, :, x])
channels[x+1] = sum;
channels[0] = example_i;
activation[example_i, :] = channels;
......@@ -338,10 +337,9 @@ if __name__ == '__main__':
norm_activation /= np.max(norm_activation, axis = 0) + 1e-7 # this is to prevent from divide by zero error
norm_activation[0, :] = activation[0, :]
txtpath = os.path.join(root_dir, 'data/panama/' + PICKLE_RAW + '/')
np.savetxt(os.path.join(txtpath, 'activations.csv'), activation, delimiter=",")
np.savetxt(os.path.join(txtpath, 'norm_activations.csv'), norm_activation, delimiter=",")
np.savetxt(os.path.join(txtpath, 'perplexity.csv'), perplexity, delimiter=",", fmt='%d, %.2f')
np.savetxt(os.path.join(root_dir, 'activations.csv'), activation, delimiter=",")
np.savetxt(os.path.join(root_dir, 'norm_activations.csv'), norm_activation, delimiter=",")
np.savetxt(os.path.join(root_dir, 'perplexity.csv'), perplexity, delimiter=",", fmt='%d, %.2f')
# Writes the intermediate representation to image files, stored by channel name, at intermed_path
def write_intermediate_all():
......@@ -438,7 +436,6 @@ if __name__ == '__main__':
im.save(all_path)
# Run the two previously defined functions
#write_intermediate_all()
write_intermediate_long()
generate_activations()
......
......@@ -24,7 +24,9 @@ def get_key_virgin_islands(filename):
return int(filename[3:-4])
def get_key_panama(filename):
return int(filename[21:-4])
root_dir = os.environ['DATA_HOME'];
# ---------------------- User configurables -------------------------------#
USER_KEY = get_key_panama # Define function for sorting data chronologically
# ---------------------- User input -------------------------------#
......@@ -97,6 +99,8 @@ def make_basic_datasets(dp):
def save_pickle_file(pickle_file, save_dict, dp):
try:
if not os.path.exists(dp['PICKLE_PATH']):
os.makedirs(dp['PICKLE_PATH'])
f = open(dp['PICKLE_PATH'] + pickle_file, 'wb')
pickle.dump(save_dict, f, pickle.HIGHEST_PROTOCOL)
f.close()
......@@ -116,11 +120,11 @@ if __name__ == '__main__':
parser.add_argument("-c", "--image_channels", type = int, default = 3, help = "Number of color channels in image.")
parser.add_argument("-t", "--image_depth", type = int, default = 255, help = "Value range of a pixel (0 - image_depth). Std = 255.")
parser.add_argument("-f", "--file_chunk", type = int, default = 100, help = "Number of images saved in each pickle file.")
parser.add_argument("-p", "--pickle_path", default = os.path.join(root_dir, 'data/pickles/'), help = "Path to store the pickle files.")
parser.add_argument("-x", "--pickle_prefix", default = 'panama_d20150419_19.400.all.', help = "Prefix of the pickle files containing the data.")
parser.add_argument("-d", "--data_path", default = os.path.join(root_dir, 'data/panama/d20150403_2/processed/wX/images'), help = "Directory where the image files can be found.")
parser.add_argument("-p", "--pickle_path", default = os.path.join(root_dir, 'pickle/'), help = "Path to store the pickle files.")
parser.add_argument("-x", "--pickle_prefix", default = 'missionI.400.all.', help = "Prefix of the pickle files containing the data.")
parser.add_argument("-d", "--data_path", default = os.path.join(root_dir,'images'), help = "Directory where the image files can be found.")
args = parser.parse_args()
print "Making dataset and saving it to:", args.pickle_path
print "To change this and other settings, edit the flags."
......@@ -133,5 +137,6 @@ if __name__ == '__main__':
data_params['PREFIX_STR'] = args.pickle_prefix
data_params['DATA_PATH'] = args.data_path
make_basic_datasets(data_params)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment