Commit 72c17abd authored by Rémi Bèges's avatar Rémi Bèges

Fix file-listing error

parent 247d4e06
......@@ -14,7 +14,26 @@ from sklearn.utils import shuffle
import tempfile
import librosa
def fetch_dataset_slices(dir, test_percent=10, block_ms=32, framerate=16000, store_wav_file=True):
def list_files(dirlist, globbing=True):
'''Detects all candidate files matching dirlist, either using a glob pattern
(with globbing == true) or a list of filenames (using globbing == false)
Returns: list of paths
'''
# List all images
if not isinstance(dirlist, list):
dirlist = [dirlist]
files = []
for d in dirlist:
if globbing:
files += glob(d)
else:
if os.path.isfile(d):
files.append(d)
return files
def fetch_dataset_slices(dir, test_percent=10, block_ms=32, framerate=16000,
store_wav_file=True, globbing=True):
'''
Attributes:
dir Directory where the movies + subtitles are located
......@@ -25,14 +44,10 @@ def fetch_dataset_slices(dir, test_percent=10, block_ms=32, framerate=16000, sto
# TODO: Extract framerate instead of hardcoding fps
assert test_percent < 100
# List all images
if not isinstance(dir, list):
dir = [dir]
files = []
for d in dir:
files += glob(d)
files = list_files(dir)
#files = files[:3]
if len(files) == 0:
raise IndexError('No files found under glob {}'.format(dir))
# Ensure array have an integer amount of blocks
# Make blocks of 20 ms
......
......@@ -21,7 +21,7 @@ def sync(movie, sub, save_to=None):
save_to Export filename for the synced subtitle.
'''
yield 'Extracting audio...'
test_audio = fetch_dataset_slices(movie, test_percent=0, store_wav_file=False)
test_audio = fetch_dataset_slices(movie, test_percent=0, store_wav_file=False, globbing=False)
yield 'Analyzing audio...'
folder = os.path.dirname(os.path.dirname(__file__))
......
Do not add or remove files from this folder !
Used by test_dataset/test_list_file.
......@@ -29,3 +29,13 @@ def test_sub_to_sequence(sub_filename):
seq = sub_to_sequence(sub_filename, total_length=1000000)
assert np.isclose(np.amin(seq), 0.0)
assert np.isclose(np.amax(seq), 1.0)
@parameterized.expand([
('test/resource/filelist/foo.mkv', True, 1),
('test/resource/filelist/bar[qux].mkv', True, 0),
('test/resource/filelist/bar[qux].mkv', False, 1),
('test/resource/filelist/*.mkv', True, 2),
])
def test_detect_files(dirlist, globbing, expected_count):
result = list_files(dirlist, globbing=globbing)
assert len(result) == expected_count
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment