Commit 47941f1a authored by Rogier van der Geer's avatar Rogier van der Geer

Merge branch 'dev' into 'master'

Dev

See merge request !4
parents 4db0d852 765e0f27
......@@ -10,16 +10,6 @@ before_script:
stages:
- build
build:
stage: build
script:
- docker build .
except:
- master
- dev
tags:
- docker
build_and_push:
stage: build
script:
......
# skylines
This project contains a fully functioning image classifier. It was mainly
intended as an example, but can also be used as-is.
### Usage
Install `skylines` in your virtualenv using something like `pip install .` in the directory where the `setup.py` lives.
Then all you need is:
```python
from skylines.model import Model
from skylines.loader import Loader
loader = Loader('path/to/my/data', batch_size=8)
model = Model.create_model(loader, checkpoint_location='./checkpoint', model='squeezenet')
model.train(n_epochs=10)
```
Then you can do:
```python
model.display_results()
model.confusion_matrix()
model.plot_performance()
```
to get a nice overview of the results.
The data is expected to be organised in three subfolders, of which each contains a subfolder per class:
```
data/
train/
class_1/
class_2/
...
test/
class_1/
class_2/
...
val/
class_1/
class_2/
```
### AWS SageMaker
I've included a Dockerfile which you can use to run the model as an AWS SageMaker job. Here is an example configuration:
```yaml
TrainingJobName: <your-job-name>
RoleArn: <your-sagemaker-role>
ResourceConfig:
InstanceType: ml.p2.xlarge
InstanceCount: 1
VolumeSizeInGB: 64
AlgorithmSpecification:
TrainingImage: <the-skylines-image-on-your-ECR>
TrainingInputMode: File
InputDataConfig:
- ChannelName: images
DataSource:
S3DataSource:
S3DataType: S3Prefix
S3Uri: <s3://path/to/your/data>
# If you add the output of a previous model here it will continue where it left off
- ChannelName: model
DataSource:
S3DataSource:
S3DataType: S3Prefix
S3Uri: <s3://output/path/name-of-job/outpu>
HyperParameters:
model: squeezenet
num_epochs: 10
OutputDataConfig:
S3OutputPath: <s3://output/path>
StoppingCondition:
MaxRuntimeInSeconds: <some-number-of-seconds>
```
\ No newline at end of file
This diff is collapsed.
from os import path
from pathlib import Path
from typing import Dict
from torch import Tensor
from torch.utils.data import DataLoader
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torchvision import transforms
from torchvision.datasets import ImageFolder
class ImagePathFolder(ImageFolder):
def __getitem__(self, index):
sample, target = super(ImagePathFolder, self).__getitem__(index)
path, _ = self.imgs[index] # return image path
return sample, target, path
class Loader:
def __init__(self, image_path: str, batch_size: int = 32, num_workers: int = 1):
......@@ -56,11 +64,16 @@ class Loader:
result += [1/n_items] * n_items
return result
def create_datasets(self):
return {
subset: ImageFolder(path.join(self.path, subset), transform=transform)
def create_datasets(self) -> Dict[str, Dataset]:
result = {
subset: ImagePathFolder(path.join(self.path, subset), transform=transform)
for subset, transform in self.transforms.items()
}
result['all'] = ConcatDataset([
ImagePathFolder(path.join(self.path, subset), transform=self.transforms['test'])
for subset in self.transforms.keys()
])
return result
def create_loaders(self):
return {
......
......@@ -42,17 +42,17 @@ class Model:
else:
raise ValueError(f'Unknown model: "{model}".')
optimizer = SGD(model.parameters(), lr=1E-3, momentum=0.9)
scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=10)
scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=25)
result = cls(model=model, loader=loader, criterion=CrossEntropyLoss(weight=loader.class_weights.to(device)),
optimizer=optimizer, scheduler=scheduler, device=device, checkpoint_location=checkpoint_location)
if auto_load:
if not checkpoint_location:
raise ValueError('Cannot automatically load a model without checkpoint location!')
ckpt_dir = path.join(checkpoint_location, 'ckpt')
if path.isfile(ckpt_dir):
model_path = path.join(checkpoint_location, 'model')
if path.isfile(model_path):
result.load()
else:
print(f'Unable to find a model checkpoint in {ckpt_dir}.')
print(f'Unable to find a model checkpoint in {model_path}.')
return result
@staticmethod
......@@ -85,31 +85,35 @@ class Model:
raise ValueError('Cannot checkpoint model: no location specified and no default location set.')
if not path.isdir(location):
mkdir(location)
save(self.model.state_dict(), path.join(location, 'ckpt'))
save(self.model.state_dict(), path.join(location, 'model'))
save(self.optimizer.state_dict(), path.join(location, 'optimizer'))
save(self.scheduler.state_dict(), path.join(location, 'scheduler'))
with open(path.join(location, 'loss'), 'w') as f:
dump_json(self.loss, f)
print(f'Checkpointed model state to {location}.')
def load(self, location: Optional[str] = None):
location = location or self.checkpoint_location
if not location:
raise ValueError('Cannot load model: no location specified and no default location set.')
self.model.load_state_dict(load(path.join(location, 'model'), map_location=self.device))
self.optimizer.load_state_dict(load(path.join(location, 'optimizer'), map_location=self.device))
self.scheduler.load_state_dict(load(path.join(location, 'scheduler'), map_location=self.device))
with open(path.join(location, 'loss')) as f:
self.loss = load_json(f)
print(f'Loaded model state from {location}.')
def evaluate(self,
validation: bool = False,
data_set: str = 'test',
n_batches: Optional[int] = None) -> Dict[str, float]:
sum_accuracy, sum_loss, n_images = 0, 0, 0
for result in self._evaluations(validation=validation, n_batches=n_batches):
for result in self._evaluations(data_set=data_set, n_batches=n_batches):
sum_accuracy += result['prediction'] == result['label']
sum_loss += result['loss']
n_images += 1
# TODO: evaluation-time modifications
return dict(loss=sum_loss / n_images, accuracy=sum_accuracy / n_images)
def load(self, location: Optional[str] = None):
location = location or self.checkpoint_location
if not location:
raise ValueError('Cannot load model: no location specified and no default location set.')
self.model.load_state_dict(load(path.join(location, 'ckpt'), map_location=self.device))
with open(path.join(location, 'loss')) as f:
self.loss = load_json(f)
print(f'Loaded model state from {location}.')
def train(self, n_epochs: int) -> DataFrame:
for _ in trange(n_epochs):
tr = self._train_epoch()
......@@ -121,6 +125,10 @@ class Model:
self.loss.append(result)
return self.progress_dataframe
def predict(self, data_set: str = 'all', n_batches: Optional[int] = None) -> DataFrame:
results = list(self._predictions(data_set=data_set, n_batches=n_batches))
return DataFrame(results)
@property
def progress_dataframe(self) -> DataFrame:
return DataFrame(self.loss).set_index('epoch')
......@@ -188,8 +196,8 @@ class Model:
ax.set_ylabel('accuracy')
ax.set_xlim([results.index.min(), results.index.max()])
def display_results(self, n_rows: int = 2, n_cols: int = 2, validation: bool = False, how: str = 'sample'):
results = self.get_results(validation=validation, n=n_cols * n_rows, how=how)
def display_results(self, n_rows: int = 2, n_cols: int = 2, data_set = 'test', how: str = 'sample'):
results = self.get_results(data_set=data_set, n=n_cols * n_rows, how=how)
fig, axes = plt.subplots(nrows=n_rows, ncols=n_cols, figsize=(n_cols*4, n_rows*4))
for result, ax in zip(results, (ax for row in axes for ax in row)):
ax.imshow(transpose(result['image'], (1, 2, 0)))
......@@ -201,9 +209,9 @@ class Model:
else:
ax.set_title(f'{label} (p={prediction})', color='r')
def get_results(self, validation: bool = False, n: int = 16,
def get_results(self, data_set: str = 'test', n: int = 16,
how: str = 'sample', select_class: Optional[str] = None) -> List[Dict[str, Any]]:
results = self._evaluations(validation=validation)
results = self._evaluations(data_set=data_set)
if select_class:
results = filter(lambda result: result['label'] == self.loader.lookup_class(select_class), results)
if how == 'sample':
......@@ -229,7 +237,7 @@ class Model:
self.model.train()
batch_loss, batch_accuracy = [], []
for inputs, labels in self._loop(loader=self.loader.loaders['train'], n_batches=n_batches):
for inputs, labels, paths in self._loop(loader=self.loader.loaders['train'], n_batches=n_batches):
inputs, labels = inputs.to(self.device), labels.to(self.device)
self.optimizer.zero_grad()
......@@ -247,20 +255,25 @@ class Model:
return dict(loss=mean(batch_loss), accuracy=mean(batch_accuracy))
def _evaluations(self,
validation: bool = False,
data_set: str = 'test',
n_batches: Optional[int] = None) -> Generator[Dict[str, Any], None, None]:
self.model.eval()
loader = self.loader.loaders['val' if validation else 'test']
loader = self.loader.loaders[data_set]
with no_grad():
for inputs, labels in self._loop(loader=loader, n_batches=n_batches):
for inputs, labels, paths in self._loop(loader=loader, n_batches=n_batches):
inputs, labels = inputs.to(self.device), labels.to(self.device)
outputs = self.model(inputs)
for input_tensor, output_tensor, label in zip(inputs, outputs, labels):
for input_tensor, output_tensor, label, path in zip(inputs, outputs, labels, paths):
yield dict(image=input_tensor, output=output_tensor,
image_path=path,
label=label.cpu().item(),
prediction=argmax(output_tensor.cpu()).item(),
loss=self.criterion(output_tensor.unsqueeze(0), label.unsqueeze(0)).cpu().item())
def _predictions(self, data_set: str = 'all', n_batches: Optional[int] = None) -> Generator[Dict[str, Any], None, None]:
for result in self._evaluations(data_set=data_set, n_batches=n_batches):
yield dict(image_path=result['image_path'], prediction=self.loader.class_names[result['prediction']])
@staticmethod
def _loop(loader: DataLoader, n_batches: Optional[int] = None) -> Generator[List, None, None]:
n_batches = min(len(loader), n_batches) if n_batches else len(loader)
......
from argparse import ArgumentParser, Namespace
from typing import Any, Dict
from os import path
from yaml import load, SafeLoader
......@@ -13,6 +14,8 @@ def train(config: Namespace):
progress = model.train(n_epochs=int(config.num_epochs))
print(progress)
model.checkpoint()
results = model.predict()
results.to_csv(path.join(config.checkpoint, 'results.csv'))
def parse_train_arguments() -> Namespace:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment