Commit 3751efb0 by Hyungjin Ko

fix README.md and change index_1000 name

parent 0fc24769
......@@ -23,13 +23,6 @@ If you use this code or dataset as part of any published research, please refer
## Setup
### Prerequisites
Make sure you have the following software installed on your system:
- Python 2.7
- Tensorflow 1.0+
### Get our code
```
......@@ -62,15 +55,43 @@ python setup.py install
### Prepare Data
- Video Feature
- Make video-file divided into frames using **ffmpeg** with 24 frames per second.
- Use one frame every 5 five frames.
- Extract Resnet-152 res5c feature and make it hdf file.
- Make soft link in dataset folder(make datset folder in root)
1. Download [LSMDC data](https://sites.google.com/site/describingmovies/lsmdc-2016/download).
2. Extract all frames in videos into a separate folder. Here is one example script that extracts avi files into frames. You can save following script and run it with "./SCRIPT_NAME.sh INPUT_FOLDER avi OUTPUT_FOLDER"
```
#!/bin/bash
if [ "$1" == '' ] || [ "$2" == '' ] || [ "$3" == '' ]; then
echo "Usage: $0 <input folder> <file extension> <output folder>";
exit;
fi
for file in "$1"/*."$3"; do
destination="$2${file:${#1}:${#file}-${#1}-${#3}-1}";
mkdir -p "$destination";
ffmpeg -i "$file" "$destination/%d.jpg";
done
```
3. Extract ResNet-152 features by using each pretrained models
- Extract 'res5c' for ResNet-152.
- Only use one frame every five frames.
4. Wrap each extracted features into hdf5 file, name as 'RESNET.hdf5' and save it in 'root/dataset/LSMDC/LSMDC16_features'.
- Data frames
- we process raw data frames file in LSMDC16.
- We processed raw data frames file in LSMDC16.
- [[Download dataframes]](https://drive.google.com/open?id=0B1VtBNgsMJBgLXRseVhxVDhfSEE)
- Save these files in "root/dataset/LSMDC/DataFrame"
- Vocabulary
- Embed words by GloVe word embedding
- We make word embedding matrix using GloVe Vector.
- [Download vocabulary files](https://drive.google.com/open?id=0B1VtBNgsMJBga09ubXE4ajhGNjg)
- These files include word embedding matrix file, word-index mapping file, and concept-index mapping file.
- Save these files in "root/dataset/LSMDC/Vocabulary"
......
......@@ -176,18 +176,18 @@ class DatasetLSMDC():
assert_exists(word2idx_path)
idx2word_path = os.path.join(VOCABULARY_DIR, 'index_to_word.hkl')
assert_exists(idx2word_path)
index_1000_path = os.path.join(VOCABULARY_DIR, 'index_1000.hkl')
assert_exists(index_1000_path)
word2idx_1000_path = os.path.join(VOCABULARY_DIR, 'word_to_index_1000.hkl')
assert_exists(word2idx_1000_path)
index_2000_path = os.path.join(VOCABULARY_DIR, 'index_2000.hkl')
assert_exists(index_2000_path)
word2idx_2000_path = os.path.join(VOCABULARY_DIR, 'word_to_index_2000.hkl')
assert_exists(word2idx_2000_path)
with open(word2idx_1000_path, 'r') as f:
self.word2idx_1000 = hkl.load(f)
log.info("Load word2idx_1000 from hkl file : %s", word2idx_1000_path)
with open(word2idx_2000_path, 'r') as f:
self.word2idx_2000 = hkl.load(f)
log.info("Load word2idx_2000 from hkl file : %s", word2idx_1000_path)
with open(index_1000_path, 'r') as f:
self.index_1000 = hkl.load(f)
log.info("Load index_1000 from hkl file : %s", index_1000_path)
with open(index_2000_path, 'r') as f:
self.index_2000 = hkl.load(f)
log.info("Load index_2000 from hkl file : %s", index_2000_path)
with open(word_matrix_path, 'r') as f:
self.word_matrix = hkl.load(f)
......@@ -214,8 +214,8 @@ class DatasetLSMDC():
self.idx2word = dataset.idx2word
self.word2idx = dataset.word2idx
self.index_1000 = dataset.index_1000
self.word2idx_1000 = dataset.word2idx_1000
self.index_2000 = dataset.index_2000
self.word2idx_2000 = dataset.word2idx_2000
if hasattr(dataset, 'word_matrix'):
self.word_matrix = dataset.word_matrix
......@@ -358,8 +358,8 @@ class DatasetLSMDC():
vid_key = self.data_df.loc[key, 'vid_key']
bow_words = self.cap_df.loc[vid_key, 'bow']
bow_words = literal_eval(bow_words)
bow_indices = [self.word2idx_1000[word] for word in bow_words]
bow_onehots = np.zeros(len(self.index_1000))
bow_indices = [self.word2idx_2000[word] for word in bow_words]
bow_onehots = np.zeros(len(self.index_2000))
bow_onehots[bow_indices] = 1.0
return bow_onehots
......@@ -425,7 +425,7 @@ class DatasetLSMDC():
+ list(self.get_video_feature_dimension()),
dtype=np.float32)
batch_caption = np.zeros([batch_size, self.max_length], dtype=np.uint32)
batch_bow = np.zeros([batch_size, len(self.index_1000)], dtype=np.uint32)
batch_bow = np.zeros([batch_size, len(self.index_2000)], dtype=np.uint32)
batch_video_mask = np.zeros([batch_size, self.max_length], dtype=np.uint32)
batch_caption_mask = np.zeros([batch_size, self.max_length], dtype=np.uint32)
......@@ -472,7 +472,7 @@ class DatasetLSMDC():
dtype=np.float32)
batch_blank_sent = np.zeros([batch_size, self.max_length], dtype=np.uint32)
batch_answer = np.zeros([batch_size, self.word_matrix.shape[0]], dtype=np.uint32)
batch_bow = np.zeros([batch_size, len(self.index_1000)], dtype=np.uint32)
batch_bow = np.zeros([batch_size, len(self.index_2000)], dtype=np.uint32)
batch_video_mask = np.zeros([batch_size, self.max_length], dtype=np.uint32)
batch_blank_sent_mask = np.zeros([batch_size, self.max_length], dtype=np.uint32)
......@@ -520,7 +520,7 @@ class DatasetLSMDC():
dtype=np.float32)
batch_candidates = np.zeros([batch_size, 5, self.max_length], dtype=np.uint32)
batch_answer = np.zeros([batch_size], dtype=np.uint32)
batch_bow = np.zeros([batch_size, len(self.index_1000)], dtype=np.uint32)
batch_bow = np.zeros([batch_size, len(self.index_2000)], dtype=np.uint32)
batch_video_mask = np.zeros([batch_size, self.max_length], dtype=np.uint32)
batch_candidates_mask = np.zeros([batch_size, 5, self.max_length], dtype=np.uint32)
......
......@@ -63,7 +63,7 @@ def main(argv):
session = tf.Session(graph=g, config=tf_config)
model = MODELS[train_config.train_tag](model_config, train_dataset.word_matrix,
train_dataset.index_1000)
train_dataset.index_2000)
log.info("Build the model...")
model.build_model(**model.get_placeholder())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment