Commit 36cf1a03 authored by Paul Shved's avatar Paul Shved

Implement concurrent box matching on GPU

parent c27e66ed
This diff is collapsed.
This diff is collapsed.
......@@ -88,7 +88,8 @@ class Box(object):
def _dbg_xform(self):
print("New xform: {} {} {}".format(self.xform_affine, self.xform_corner_A, self.xform_corner_b))
#print("New xform: {} {} {}".format(self.xform_affine, self.xform_corner_A, self.xform_corner_b))
pass
def center_x(self):
......@@ -173,6 +174,9 @@ class Box(object):
return intersection / union
def as_tensor_row(self):
return np.array([self.y, self.x, self.y + self.h, self.x + self.w])
def greedy_nms(boxes: List[Box], iou_threshold=0.5):
"""Perform NMS in a greedy way
......
......@@ -11,6 +11,8 @@ from smilebot.box import Box
from learning import tensors
from csvdb import truths
from tensormix import boxes
class Truths(truths.Truths):
def __init__(self, csv_filename):
self.reraise = False
......@@ -59,9 +61,7 @@ class Detector(object):
(12, 12)] # AlexNet conv5
self.input_image_shape = (224, 224, 3)
def get_anchors(self, scale_idx):
# The anchors are determined by the k-means analysis of the widths and sizes.
ANCHORS= [
self.ANCHORS= [
[[ 10, 10],
[ 25, 25],
[ 37, 40],
......@@ -71,7 +71,12 @@ class Detector(object):
[ 49.77373727, 65.84141215],
[104.05757209, 136.93817919]],
]
return ANCHORS[scale_idx]
self.ANCHORS_TENSOR_FMT = [ [[h,w] for w, h in scale ] for scale in self.ANCHORS ]
def get_anchors(self, scale_idx):
# The anchors are determined by the k-means analysis of the widths and sizes.
return self.ANCHORS[scale_idx]
def num_anchors_for_scale(self, scale_idx):
return len(self.get_anchors(scale_idx))
......@@ -124,14 +129,133 @@ class Detector(object):
def make_feature_vector_np_dict(self, gt_boxes: List[List[Box]], cheat=None, iou_threshold=0.3):
assert len(gt_boxes) == 1
fvs, masks, nums = self.make_feature_vector_np(gt_boxes, cheat=cheat, iou_threshold=iou_threshold)
#fvs, masks, nums = self.make_feature_vector_np(gt_boxes, cheat=cheat, iou_threshold=iou_threshold)
#return {
#'y_gt': fvs[0],
#'y_mask': masks[0],
#'num_matched_boxes': nums[0],
#}
fvs, masks, nums = self.make_feature_vector_np_mass(gt_boxes, cheat=cheat, iou_threshold=iou_threshold)
return {
'y_gt': fvs[0],
'y_mask': masks[0],
'num_matched_boxes': nums[0],
'y_gt': fvs[0].numpy(),
'y_mask': masks[0].numpy(),
'num_matched_boxes': nums[0].numpy(),
}
def encode_boxes(self, gt_boxes: List[List[Box]], dtype=tf.float16):
# Batch size
N = len(gt_boxes)
def one_hot(cls):
return np.array([ float( i == cls ) for i in range(self.num_classes)])
UNLABELED_ONE_HOT = one_hot(self.UNLABELED)
box_batches = []
class_batches = []
for gt_boxes_for_image in gt_boxes:
# The special "empty" box.
boxes = [ [0, 0, 0, 0] ]
classes = [ UNLABELED_ONE_HOT ]
for gt_box in gt_boxes_for_image:
boxes.append( [gt_box.y, gt_box.x, gt_box.y + gt_box.h, gt_box.x + gt_box.w] )
classes.append( one_hot(gt_box.object_class) )
box_batches.append(boxes)
class_batches.append(classes)
#return np.asarray(box_batches), np.asarray(class_batches)
return tf.convert_to_tensor(box_batches, dtype=dtype), tf.convert_to_tensor(class_batches, dtype=dtype)
def encode_offsets(self, gt_boxes, anchor_boxes, fmt='ij'):
"""Returns the array with box offset params from the given anchor box to the GT box.
fmt is either 'ij' or 'xy'; if the latter, then the output formats are
rolled by 1. 'xy' is more for the compatibility with the previous
feature generator.
"""
D = gt_boxes.shape[-1] // 2
gt_bot = gt_boxes[:, :, :D]
gt_top = gt_boxes[:, :, D:]
gt_size = gt_top - gt_bot
anchor_bot = anchor_boxes[:, :, :D]
anchor_top = anchor_boxes[:, :, D:]
anchor_size = anchor_top - anchor_bot
center_offset_rel = ( (gt_top + gt_bot) / 2.0 - (anchor_top + anchor_bot) / 2.0) / anchor_size
scale = tf.math.log(gt_size / anchor_size)
if fmt == 'xy':
center_offset_rel = tf.roll(center_offset_rel, axis=-1, shift=1)
scale = tf.roll(scale, axis=-1, shift=1)
return tf.concat( [center_offset_rel, scale], axis=-1)
def make_feature_vector_np_mass(self, gt_boxes: List[List[Box]], cheat=None, iou_threshold=0.3, scale_shapes=None):
final_dtype = tf.float16
intermediate_dtype = tf.float32
# Batch size
N = len(gt_boxes)
# A x 4 where A is number of anchors per image
anchors = boxes.evenly_spaced_boxes(
self.scale_shapes, self.ANCHORS_TENSOR_FMT, (self.input_image_shape[0], self.input_image_shape[1]),
dtype=intermediate_dtype)
# N x A x 4
anchors = tf.broadcast_to(anchors, (N, anchors.shape[0], anchors.shape[1]))
# Encode the GT boxes in the tensor-aligned format. A special
# degenerate empty box is placed at index 0.
#
# encoded_gt_boxes: N x B x 4, where B is 1 + the max number of GT boxes in 1 image in the batch.
# box_classes: N x B x C, where C is the number of classes + 1 for unlabeled
encoded_gt_boxes, box_classes = self.encode_boxes(gt_boxes, dtype=intermediate_dtype)
# TODO: limit the number of boxes to prevent OOMs
# Find matching boxes
# N x A x B
ious = boxes.iou(anchors, encoded_gt_boxes)
# N x A
anchor_matches = tf.math.reduce_max(ious, axis=2) >= tf.convert_to_tensor(iou_threshold)
# N x A
best_box_index = tf.where(
anchor_matches,
tf.math.argmax(ious, axis=2),
tf.zeros_like(anchor_matches, dtype=tf.int64))
# N x A x 4
best_box = tf.gather(encoded_gt_boxes, best_box_index, axis=1, batch_dims=1)
# N x B x 4 N x A | |
# ^ ^ | |
# +-------------|--------------------+ |
# +----------------------------------+
# Mix in the anchors into the box array so that the offsets for
# non-matching boxes are 0. This doesn't need to be done, but we do it
# for backwards compatibility with the previous feature generator.
# N x A x 4
best_box_for_offset = tf.where(tf.expand_dims(anchor_matches, -1), best_box, anchors)
# N x A x num_box_params
offsets = self.encode_offsets(best_box_for_offset, anchors, fmt='xy')
# N x A x C
classes = tf.gather(box_classes, best_box_index, axis=1, batch_dims=1)
# N x A x (C + num_box_params)
fvs = tf.concat( [classes, offsets], axis=2 )
fvs = tf.reshape(fvs, (N, -1))
masks = tf.concat( [
# Classes always match
tf.ones_like(classes),
# ... but offsets match only when there's a match with an anchor.
tf.where(tf.expand_dims(anchor_matches, -1), tf.ones_like(offsets), tf.zeros_like(offsets))], axis=2)
masks = tf.reshape(masks, (N, -1))
nums = tf.reduce_sum(tf.cast(anchor_matches, dtype=tf.int32), axis=-1)
# Avoid inf loss by adding at least 1 box to the non-modelable images.
nums = tf.maximum(nums, tf.ones_like(nums))
return tf.cast(fvs, dtype=final_dtype), tf.cast(masks, dtype=final_dtype), nums
def make_feature_vector_np(self, gt_boxes: List[List[Box]], cheat=None, iou_threshold=0.3, scale_shapes=None):
if scale_shapes is None:
scale_shapes = self.scale_shapes
......@@ -188,7 +312,13 @@ class Detector(object):
# Pick the best box for this anchor, and calculate the offsets.
best_box_tuple = (-1.0, None)
for gt_box in gt_boxes_for_image:
best_box_tuple = max(best_box_tuple, (anchor_box.iou(gt_box), gt_box))
iou = anchor_box.iou(gt_box)
#print("Computing iou...")
#iou = boxes.iou(
#anchor_box.as_tensor_row()[np.newaxis, np.newaxis, :],
#gt_box.as_tensor_row()[np.newaxis, np.newaxis, :])[0][0][0]
#print("IOU done: {}".format(iou))
best_box_tuple = max(best_box_tuple, (iou, gt_box))
best_iou, best_box = best_box_tuple
if best_iou < iou_threshold:
......@@ -213,7 +343,7 @@ class Detector(object):
mask_for_box[self.num_classes : self.num_classes + self.num_box_params] = 1.0
if num_matched_boxes == 0 and len(gt_boxes_for_image) > 0:
#print("OOPS! Have {} gt boxes, found 0 matching. Gt boxes are: {}".format(len(gt_boxes_for_image), gt_boxes_for_image))
print("OOPS! Have {} gt boxes, found 0 matching. Gt boxes are: {}".format(len(gt_boxes_for_image), gt_boxes_for_image))
pass
else:
#print("YAY! Matched {} out of {} boxes!".format(num_matched_boxes, len(gt_boxes_for_image)))
......
......@@ -208,6 +208,7 @@ class Provider(object):
label = 0
if detector is not None:
gtf = self.get_gt_features_from_detector(detector, entry)
#print(tf.reduce_sum(tf.cast(tf.where(gtf['y_gt'] != 0.0), dtype=tf.int32)))
entry['y_gt'] = gtf['y_gt']
entry['y_mask'] = gtf['y_mask']
entry['num_matched_boxes'] = gtf['num_matched_boxes']
......@@ -233,7 +234,9 @@ class Provider(object):
def encode_entry(*args):
return self.decode_entry(*args)
dataset = dataset.map(process_one_entry_tf_function, num_parallel_calls=tf.data.experimental.AUTOTUNE)
#dataset = dataset.map(process_one_entry_tf_function, num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.map(process_one_entry_tf_function, num_parallel_calls=5)
#dataset = dataset.map(process_one_entry_tf_function, num_parallel_calls=1)
dataset = dataset.map(encode_entry)
# This doesn't work because "entry" is a hash input and the graph gets recompiled every time.
......
"""Axis-aligned bounding box operations.
All boxes in this module are represented in the tensor shape format. A
D-dimensional axis-aligned box in R^D is represented as a 2*D array of the
coordinates of the bottom-left point followed by the coordinates of the
top-right point. The coordinates are ij-indexed.
In a correct box, each component of the first point will be smaller or equal
than each component of the second point. The boxes where this is not the case
are degenerate. It is undefined what values are returned for the degenerate
boxes.
"""
import tensorflow as tf
def intersection(a, b):
"""Computes intersection area of each pair of boxes in a and b.
This function is primarily intended to use with batched anchor matching.
If the number of boxes in each batch is different, simply pad the boxes
with 0.0 and ignore the rows.
Args:
a: Tensor [N x K x 2*D] of box coordinates. N is batch size, K is the
number of boxes in a batch, D is the dimension of the euclidian space.
Each box is represented as the coordinates of the bottom left corner
followed by the coordinates of the top right corner. E.g. a
two-dimensional box would have the coordinates laid out like so:
x1, y1, x2, y2
b: Tensor [N x M x 2D] of box coordinates, N is batch size, M is the
number of boxes in a batch, D is the dimension of the euclidian space.
See above for more details.
Returns:
Tensor [N x K x M] of pairwise box intersection areas using the
standard volume metric in R^D.
"""
# Shape check
assert len(b.shape) == 3, "Wrong shape of b: {}".format(b.shape)
assert a.shape[0] == b.shape[0]
assert a.shape[2] == b.shape[2]
N, K, Dx2 = a.shape
N, M, _ = b.shape
D = Dx2 // 2
assert 2*D == Dx2
# Extend a-s so we can broadcast transposed b-s
a_ext = a # N x K x 2D
a_ext = tf.expand_dims(a_ext, 2) # N x K x 1 x 2D
# Using tf.broadcast_to instead of tf.tile for extra speed.
# (To be honest, I never benchmarked.)
a_ext = tf.broadcast_to(a_ext, (N, K, M, 2*D)) # N x K x M x 2D
a1 = a_ext[:, :, :, :D] # N x K x M x D
a2 = a_ext[:, :, :, D:] # N x K x M x D
b1 = tf.expand_dims(b[:, :, :D], 1) # N x 1 x M x D
b2 = tf.expand_dims(b[:, :, D:], 1) # N x 1 x M x D
i1 = tf.maximum(a1, b1) # N x K x M x D
i2 = tf.minimum(a2, b2) # N x K x M x D
ds = tf.maximum(i2 - i1, tf.zeros_like(i1)) # N x K x M x D
# Got the intersection box lengths, now compute the area.
return tf.math.reduce_prod(ds, axis=3) # N x K x M
def area(a):
"""Computes area of each box in a.
This function is primarily intended to use with batched box matching.
Args:
a: Tensor [...dims... x 2*D] of box coordinates where D is the dimension of
the euclidian space. Each box is represented as the coordinates of the
bottom left corner followed by the coordinates of the top right corner.
E.g. a two-dimensional box would have the coordinates laid out like so:
x1, y1, x2, y2
Returns:
Tensor [...dims...] of box volumes using the standard volume metric in R^D.
"""
# Shape check
shapes = list(a.shape)
D = shapes[-1] // 2
assert shapes[-1] == 2*D
# Compuite the slice while keeping dims. Essentially we want the :D and D:
# on the last dimension.
bottom_begin = [0 for _ in shapes]
bottom_size = shapes
bottom_size[-1] = D
top_begin = [0 for _ in shapes]
top_begin[-1] = D
top_size = shapes
top_size[-1] = D
bottom = tf.slice(a, bottom_begin, bottom_size)
top = tf.slice(a, top_begin, top_size)
return tf.reduce_prod(top - bottom, axis=-1)
def iou(a, b):
"""Computes intersection over union of each pair of boxes in a and b.
This function is primarily intended to use with batched anchor matching.
If the number of boxes in each batch is different, simply pad the boxes
with 0.0 and ignore the rows.
When the boxes do not intersect, their IOU is 0.0. When a pair of boxes
has the union area of 0.0 (e.g. when both boxes are empty) their IoU will
be NaN.
Args:
a: Tensor [N x K x 2*D] of box coordinates. N is batch size, K is the
number of boxes in a batch, D is the dimension of the euclidian space.
Each box is represented as the coordinates of the bottom left corner
followed by the coordinates of the top right corner. E.g. a
two-dimensional box would have the coordinates laid out like so:
x1, y1, x2, y2
b: Tensor [N x M x 2D] of box coordinates, N is batch size, M is the
number of boxes in a batch, D is the dimension of the euclidian space.
See above for more details.
Returns:
Tensor [N x K x M] of pairwise box IoUs using the standard volume
metric in R^D.
"""
# Shape check
assert len(b.shape) == 3, "Wrong shape of b: {}".format(b.shape)
assert a.shape[0] == b.shape[0]
assert a.shape[2] == b.shape[2]
N, K, Dx2 = a.shape
N, M, _ = b.shape
D = Dx2 // 2
assert 2*D == Dx2
i = intersection(a, b) # N x K x M
area_a = area(a) # N x K
area_b = area(b) # N x M
# Compute pairwise union. Repeat each tensor along the orthogonal dimension.
area_a = tf.broadcast_to(tf.expand_dims(area_a, 2), (N, K, M)) # N x K x M
area_b = tf.broadcast_to(tf.expand_dims(area_b, 1), (N, K, M)) # N x K x M
u = area_a + area_b - i
return i / u
def evenly_spaced_boxes(box_counts, box_sizes, image_shape, offset=None, dtype=tf.float32):
"""Returns evenly spaced boxes in the image frame.
We assume that the image is D-dimensional, and give examples for 2
dimensions. The length of all lists is equivalent to the number of scales S
in the detector. Boxes for each scale are appended after the previous scale.
Please note that the box_sizes are defined in the tensor order. It is
common to define bounding box sizes in the W,H order for 2D boxes.
Args:
box_counts: List (length S) of D-tuples that define number of elements
in a grid along each axis for the box centers. The first image is
at the offset defined by offset. Can be a tf.Tensor.
box_sizes: List (length S) of lists (length B_i) of lists (length D) of box
sizes for each scale. Can be a tf.Tensor, but please be careful
when converting!
image_shape: D-tuple that defines the overall image shape. Can be a
tf.Tensor.
offset: List (length S) of D-tuples that define the offset of the first
image from 0^D. Can be a tf.Tensor.
Returns:
Tensor [number_of_boxes x 2*D], where boxes are represented in the
x1,y1,x2,y2 format. Note that this is the dimension-first format as
opposed to the WH format.
"""
final_dtype = dtype
# Compute all boxes with large precision regardless of the final representation.
intermediate_dtype = tf.float32
# Shape check
# S x 2D
S, D = (len(box_counts), len(box_counts[0]) if len(box_counts) else 0)
assert len(box_sizes) == S, \
"len(box_sizes] is {}, but needs to be equal to box_counts.shape[0] which is {}".format(
len(box_sizes), S)
assert len(image_shape) == D, \
"len(image_shape) is {} (image_shape is {}), but needs to be equal to box_counts.shape[1] which is {}".format(
len(image_shape), image_shape, D)
boxes_for_all_scales = []
for s, box_sizes_for_shape in enumerate(box_sizes):
#assert len(box_sizes_for_shape[2]) == D, \
#"len(box_sizes[{}]) is {}, but needs to be equal to 2*box_counts.shape[1] which is {}".format(
#s, len(box_sizes_for_shape), D)
# Prepare range for all dimensions
grid_elements = []
for d, size_along_d in enumerate(box_counts[s]):
delta = image_shape[d] / size_along_d
# Note: the first argument is the max value rather than the count.
grid_elements.append(tf.range(tf.cast(image_shape[d], dtype=intermediate_dtype), delta=delta))
# This one little trick prepares a d-dimensional grid with evenly spaced things.
# We use indexing = 'ij' (the matrix convention), so that it will first
# iterate over the 1st row, then the 2nd, etc. The reason for this
# indexing convention is that tf.keras.Flatten after tf.keras.Conv2D
# iterates in the same manner.
# Shape: box_counts[s]... x D
ixs = tf.cast(tf.stack(tf.meshgrid(*grid_elements, indexing='ij'), axis=-1),
dtype=intermediate_dtype)
# TODO: add offset!
assert offset is None
# Shape: B_s x D
ixs = tf.reshape(ixs, (-1, D))
all_ixs = []
for box_size in box_sizes_for_shape:
# Shape: D
bs = tf.convert_to_tensor(box_size, dtype=intermediate_dtype)
# Add bottom point and top point assuming ixs is the center.
all_ixs += [ixs - bs / 2, ixs + bs / 2]
# Now all_ixs are the indices of all boxes for the grid. Stack them along the last axis.
# Shape of the new element: B_s x (box_sizes * 2D)
all_boxes = tf.concat(all_ixs, axis=-1)
# But! Now we reshape to : (B_s * box_sizes) x 2D
# This is not the same as tf.concat(all_ixs, axis=0)!
boxes_for_all_scales.append(tf.reshape(all_boxes, (-1, 2*D)))
# Preapare coordinates of all shapes
return tf.cast(tf.concat(boxes_for_all_scales, axis=0), dtype=final_dtype)
This diff is collapsed.
......@@ -327,7 +327,7 @@ class SmileBotTrainer(object):
# Shuffle again. This time we only need enough images to load up the GPU.
SHUFFLE_BUFFER_SIZE = 1000
#self.train_dataset = self.train_dataset.shuffle(SHUFFLE_BUFFER_SIZE)
self.train_dataset = self.train_dataset.prefetch(SHUFFLE_BUFFER_SIZE)
#self.train_dataset = self.train_dataset.prefetch(SHUFFLE_BUFFER_SIZE)
# Use validation dataset for testing.
#self.test_dataset = tf.data.Dataset.from_tensor_slices((self.X_test, self.y_test))
......@@ -362,7 +362,7 @@ class SmileBotTrainer(object):
self.y_val = y_val
self.provenance = provenance
self.train_dataset = tf.data.Dataset.from_tensor_slices((self.X_train, self.y_train))
# self.train_dataset = tf.data.Dataset.from_tensor_slices((self.X_train, self.y_train))
self.test_dataset = tf.data.Dataset.from_tensor_slices((self.X_test, self.y_test))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment