This a computer vision object detection and segmentation problem on kaggle (https://www.kaggle.com/c/airbus-ship-detection#description). In this problem, I build a model that detects all ships in satellite images and generate a mask for each ship. There several deep learning models that works with image detection such as YOLO, R-CNN, Fast R-CNN, Faster R-CNN. For objection segmentation, Unet is a great tools. Recently there is a nice paper on object instance segmentation (https://arxiv.org/abs/1703.06870) called Mask R-CNN.
In this problem, most image (~80%) contains no ships. So my strategy is the following:
from fastai.conv_learner import *
from fastai.dataset import *
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
PATH = './'
TRAIN = '/scratch/zhang.chi9/train/'
TRAIN2 = '/scratch/zhang.chi9/test/'
TEST = '/scratch/zhang.chi9/test_v2/'
SEGMENTATION = '../input/train_ship_segmentations.csv'
exclude_list = ['6384c3e78.jpg','13703f040.jpg', '14715c06d.jpg', '33e0ff2d5.jpg',
'4d4e09f2a.jpg', '877691df8.jpg', '8b909bb20.jpg', 'a8d99130e.jpg',
'ad55c3143.jpg', 'c8260c541.jpg', 'd6c7f17c7.jpg', 'dc3e7c901.jpg',
'e44dffe88.jpg', 'ef87bad36.jpg', 'f083256d8.jpg'] #corrupted image
nw = 4 #number of workers for data loader
arch = resnet34 #specify target architecture
segmentation_df = pd.read_csv(SEGMENTATION).set_index('ImageId')
train_names = segmentation_df.index.tolist()
test_names = [f for f in os.listdir(TEST)]
for el in exclude_list:
if(el in train_names): train_names.remove(el)
if(el in test_names): test_names.remove(el)
tr_n, val_n = train_test_split(train_names, test_size=0.2, random_state=42)
class pdFilesDataset(FilesDataset):
def __init__(self, fnames, path, transform):
self.segmentation_df = pd.read_csv(SEGMENTATION).set_index('ImageId')
super().__init__(fnames, transform, path)
def get_x(self, i):
img = open_image(os.path.join(self.path, self.fnames[i]))
if self.sz == 768: return img
else: return cv2.resize(img, (self.sz, self.sz))
def get_y(self, i):
if(self.path == TEST): return 0
masks = self.segmentation_df.loc[self.fnames[i]]['EncodedPixels']
if(type(masks) == float): return 0 #NAN - no ship
else: return 1
def get_c(self): return 2 #number of classes
def get_data(sz,bs):
#data augmentation
aug_tfms = [RandomRotate(20, tfm_y=TfmType.NO),
RandomDihedral(tfm_y=TfmType.NO),
RandomLighting(0.05, 0.05, tfm_y=TfmType.NO)]
tfms = tfms_from_model(arch, sz, crop_type=CropType.NO, tfm_y=TfmType.NO,
aug_tfms=aug_tfms)
ds = ImageData.get_ds(pdFilesDataset, (tr_n[:-(len(tr_n)%bs)],TRAIN),
(val_n,TRAIN), tfms, test=(test_names,TEST))
md = ImageData(PATH, ds, bs, num_workers=nw, classes=None)
return md
sz = 256 #image size
bs = 200 #batch size
md = get_data(sz,bs)
learn = ConvLearner.pretrained(arch, md, ps=0.5) #dropout 50%
learn.opt_fn = optim.Adam
learn.fit(2e-3, 1)
learn.unfreeze()
lr=np.array([1e-4,5e-4,2e-3])
learn.fit(lr, 1, cycle_len=2, use_clr=(20,8))
sz = 384 #image size
bs = 100 #batch size
md = get_data(sz,bs)
learn = ConvLearner.pretrained(arch, md, ps=0.5) #dropout 50%
learn.opt_fn = optim.Adam
learn.unfreeze()
lr=np.array([1e-4,5e-4,2e-3])
learn.fit(lr/2, 1, cycle_len=2, use_clr=(20,8)) #lr is smaller since bs is only 32
learn.save('Resnet34_lable_384_1')
learn.load('Resnet34_lable_384_1')
log_preds,y = learn.predict_with_targs()
probs = np.exp(log_preds)
probs = probs[:,1]
preds =1*(probs>0.5)
from sklearn.metrics import accuracy_score
print(f'The accuracy score is {accuracy_score(y, preds)}')
from sklearn.metrics import accuracy_score
print(f'The accuracy score is {accuracy_score(y, preds)}')
class_names = ['no_ship','has_ship']
from sklearn.metrics import confusion_matrix
def plot_confusion_matrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
# Compute confusion matrix
cnf_matrix = confusion_matrix(y, preds)
np.set_printoptions(precision=2)
# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=class_names,
title='Confusion matrix, without normalization')
# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,
title='Normalized confusion matrix')
plt.show()
log_preds,y = learn.TTA(is_test=True)
probs = np.mean(np.exp(log_preds),0)[:,1]
df['p_ship'] = pd.Series(probs, index=test_names)
import os
import sys
import random
import math
import re
import time
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from imgaug import augmenters as iaa
from scipy.ndimage import binary_erosion
from scipy.ndimage.morphology import binary_dilation
import pdb
from scipy import ndimage as ndi
%matplotlib inline
import mpld3
# Root directory of the project
ROOT_DIR = os.path.abspath("./Mask_RCNN/")
# Import Mask RCNN
sys.path.append(ROOT_DIR) # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log
# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")
def multi_rle_encode(img, **kwargs):
'''
Encode connected regions as separated masks
'''
labels = label(img)
if img.ndim > 2:
return [rle_encode(np.sum(labels==k, axis=2), **kwargs) for k in np.unique(labels[labels>0])]
else:
return [rle_encode(labels==k, **kwargs) for k in np.unique(labels[labels>0])]
# ref: https://www.kaggle.com/paulorzp/run-length-encode-and-decode
def rle_encode(img, min_max_threshold=1e-3, max_mean_threshold=None):
'''
img: numpy array, 1 - mask, 0 - background
Returns run length as string formated
'''
if np.max(img) < min_max_threshold:
return '' ## no need to encode if it's all zeros
if max_mean_threshold and np.mean(img) > max_mean_threshold:
return '' ## ignore overfilled mask
pixels = img.T.flatten()
pixels = np.concatenate([[0], pixels, [0]])
runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
runs[1::2] -= runs[::2]
return ' '.join(str(x) for x in runs)
def rle_decode(mask_rle, shape=(768, 768)):
'''
mask_rle: run-length as string formated (start length)
shape: (height,width) of array to return
Returns numpy array, 1 - mask, 0 - background
'''
s = mask_rle.split()
starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
starts -= 1
ends = starts + lengths
img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
for lo, hi in zip(starts, ends):
img[lo:hi] = 1
return img.reshape(shape).T # Needed to align to RLE direction
def masks_as_image(in_mask_list):
# Take the individual ship masks and create a single mask array for all ships
all_masks = np.zeros((768, 768), dtype = np.uint8)
for mask in in_mask_list:
if isinstance(mask, str):
all_masks |= rle_decode(mask)
return all_masks
def stack_masks(masks):
# Take the individual ship masks and create a single mask array for all ships
all_masks = np.zeros((768, 768))
for i in range(masks.shape[2]):
mask = masks[:,:,i]
all_masks += mask
return np.array(all_masks, dtype = np.uint8)
def masks_as_color(in_mask_list):
# Take the individual ship masks and create a color mask array for each ships
all_masks = np.zeros((768, 768), dtype = np.float)
scale = lambda x: (len(in_mask_list)+x+1) / (len(in_mask_list)*2) ## scale the heatmap image to shift
for i,mask in enumerate(in_mask_list):
if isinstance(mask, str):
all_masks[:,:] += scale(i) * rle_decode(mask)
return all_masks
def get_ax(rows=1, cols=1, size=8):
"""Return a Matplotlib Axes array to be used in
all visualizations in the notebook. Provide a
central point to control graph sizes.
Change the default size attribute to control the size
of rendered images
"""
_, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
return ax
PATH = '../Airbus_Ship_Detection'
TRAIN = '/scratch/zhang.chi9/train/'
TEST = '/scratch/zhang.chi9/test/'
SEGMENTATION = '../Airbus_Ship_Detection/input/train_ship_segmentations.csv'
PRETRAINED = '../input/fine-tuning-resnet34-on-ship-detection/models/Resnet34_lable_256_1.h5'
exclude_list = ['6384c3e78.jpg','13703f040.jpg', '14715c06d.jpg', '33e0ff2d5.jpg',
'4d4e09f2a.jpg', '877691df8.jpg', '8b909bb20.jpg', 'a8d99130e.jpg',
'ad55c3143.jpg', 'c8260c541.jpg', 'd6c7f17c7.jpg', 'dc3e7c901.jpg',
'e44dffe88.jpg', 'ef87bad36.jpg', 'f083256d8.jpg'] #corrupted images
train_names = [f for f in os.listdir(TRAIN)]
test_names = [f for f in os.listdir(TEST)]
for el in exclude_list:
if(el in train_names): train_names.remove(el)
if(el in test_names): test_names.remove(el)
print(f'{len(train_names)} images founded for training')
print(f'{len(test_names)} images founded for testing')
img_df = pd.read_csv(SEGMENTATION)
img_df_test = pd.read_csv(test_SEGMENTATION)
df = img_df
df.drop(df[df['ImageId'].isin(exclude_list)].index,inplace = True)
df['ships'] = df['EncodedPixels'].map(lambda c_row: 1 if isinstance(c_row, str) else 0)
img_ship1 = df.groupby('ImageId', group_keys=False).agg({'ships': 'sum'})
ships_only1 = img_ship1.drop(img_ship1.ships[img_ship1.ships==0].index)
print(f'Out of {len(train_names)} training images, only {len(ships_only1)} images that has ships')
df = img_df_test
df.drop(df[df['ImageId'].isin(exclude_list)].index,inplace = True)
df['ships'] = df['EncodedPixels'].map(lambda c_row: 1 if isinstance(c_row, str) else 0)
img_ship2 = df.groupby('ImageId', group_keys=False).agg({'ships': 'sum'})
ships_only2 = img_ship2.drop(img_ship2.ships[img_ship2.ships==0].index)
print(f'Out of {len(test_names)} training images, only {len(ships_only2)} images that has ships')
class AirbusDataset(utils.Dataset):
def load_train(self, idx, sz):
"""Generate the requested number of synthetic images.
count: number of images to generate.
sz: the size of the generated images.
"""
# Add classes
# source = Airbus
# class = ship
self.add_class("Airbus", 1, "ship")
# Add images
for i in idx:
if i > 0:
imagename = ships_only1.index[i]
rle = img_df.loc[img_df['ImageId']==imagename,'EncodedPixels'].values
self.add_image("Airbus", image_id=i, fname=imagename, path=TRAIN, sz=sz, rle = rle)
else:
imagename = ships_only2.index[-i]
rle = img_df_test.loc[img_df_test['ImageId']==imagename,'EncodedPixels'].values
self.add_image("Airbus", image_id=i, fname=imagename, path=TEST, sz=sz, rle = rle)
def load_image(self, image_id):
"""Generate an image from the specs of the given image ID.
"""
info = self.image_info[image_id]
img = plt.imread(info['path']+info['fname'])
if info['sz'] == 768: return img
else: return cv2.resize(img, (info['sz'], info['sz']))
def load_mask(self, image_id):
"""Load instance masks for the given image.
Returns:
masks: A bool array of shape [height, width, instance count] with
a binary mask per instance.
class_ids: a 1D array of class IDs of the instance masks.
"""
info = self.image_info[image_id]
rle = info['rle']
mask = np.empty([info['sz'], info['sz'], len(rle)])
class_ids = np.ones(len(rle))
for i,rle_ in enumerate(rle):
mask_ = rle_decode(rle_)
if info['sz'] != 768: mask_ = cv2.resize(mask_, (info['sz'], info['sz']))
mask[:,:,i] = mask_
return mask.astype(np.bool), class_ids.astype(np.int32)
import pickle
with open('idx.pickle', 'rb') as f:
train_idx, val_idx = pickle.load(f)
train_idx = train_idx + list(-np.arange(len(ships_only2)))
sz = 768
# Training dataset
dataset_train = AirbusDataset()
dataset_train.load_train(train_idx, sz)
dataset_train.prepare()
# Validation dataset
dataset_val = AirbusDataset()
dataset_val.load_train(val_idx, sz)
dataset_val.prepare()
class ShipsConfig(Config):
"""Configuration for training on the nucleus segmentation dataset."""
# Give the configuration a recognizable name
NAME = "test_multigpu_ships"
# Adjust depending on your GPU memory
GPU_COUNT = 1
IMAGES_PER_GPU = 1
# Number of classes (including background)
NUM_CLASSES = 1 + 1 # Background + nucleus
# Number of training and validation steps per epoch
STEPS_PER_EPOCH = 1000
VALIDATION_STEPS = 1
# Don't exclude based on confidence. Since we have two classes
# then 0.5 is the minimum anyway as it picks between nucleus and BG
DETECTION_MIN_CONFIDENCE = 0
# Input image resizing
# Random crops of size 512x512
IMAGE_RESIZE_MODE = "square"
IMAGE_MIN_DIM = sz
IMAGE_MAX_DIM = sz
# Length of square anchor side in pixels
RPN_ANCHOR_SCALES = (6, 12, 36, 108, 324)
RPN_ANCHOR_RATIOS = [0.5, 1, 3]
# ROIs kept after non-maximum supression (training and inference)
POST_NMS_ROIS_TRAINING = 1000
POST_NMS_ROIS_INFERENCE = 2000
# Non-max suppression threshold to filter RPN proposals.
# You can increase this during training to generate more propsals.
RPN_NMS_THRESHOLD = 0.9
# How many anchors per image to use for RPN training
RPN_TRAIN_ANCHORS_PER_IMAGE = 128
# Image mean (RGB)
MEAN_PIXEL = np.array([56.37015, 80.04626, 85.10948])
# If enabled, resizes instance masks to a smaller size to reduce
# memory load. Recommended when using high-resolution images.
USE_MINI_MASK = True
MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask
TRAIN_ROIS_PER_IMAGE = 128
# Maximum number of ground truth instances to use in one image
MAX_GT_INSTANCES = 200
# Max number of final detections per image
DETECTION_MAX_INSTANCES = 400
config = ShipsConfig()
from imgaug import augmenters as iaa
augmentation = iaa.Sometimes(0.8, [
iaa.Fliplr(0.5),
iaa.Flipud(0.5),
iaa.Affine(
scale={"x": (0.9, 1), "y": (0.9, 1)}, # scale images to 80-120% of their size, individually per axis
rotate=(-5, 5), # rotate by -45 to +45 degrees
shear=(-1, 1), # shear by -16 to +16 degrees
)])
from keras.callbacks import *
class find_lr(Callback):
def __init__(self):
super(find_lr, self).__init__()
self.trn_iterations = 0.
self.history = {}
def clr(self):
y = 10**(5e-3*self.trn_iterations - 6)
return y
def plot_lr(self):
plt.xlabel('Training Iterations')
plt.ylabel('Learning Rate')
plt.title('lr vs. training iterations')
plt.plot(self.history['iterations'], self.history['lr'])
def plot_loss(self,cut=6):
plt.plot(self.history['loss'])
plt.title('loss vs. learning rate')
plt.ylabel('loss')
plt.xlabel('learning rate')
def on_train_begin(self, logs={}):
logs = logs or {}
K.set_value(self.model.optimizer.lr, self.clr())
def on_batch_end(self, epoch, logs=None):
logs = logs or {}
self.trn_iterations += 1
self.history.setdefault('lr', []).append(K.get_value(self.model.optimizer.lr))
self.history.setdefault('iterations', []).append(self.trn_iterations)
for k, v in logs.items():
self.history.setdefault(k, []).append(v)
K.set_value(self.model.optimizer.lr, self.clr())
from keras.callbacks import *
class cyclic_lr(Callback):
def __init__(self, base_lr=1e-6, max_lr=3e-4, T0 = 1000, cycle_mult = 2):
super(cyclic_lr, self).__init__()
self.base_lr = base_lr
self.max_lr = max_lr
self.T0 = T0
self.cycle_mult = 2
self.trn_iterations = 0.
self.history = {}
def get_period(self):
marker = self.T0*(self.cycle_mult**np.arange(10) -1 )/(self.cycle_mult -1 )
if (self.trn_iterations > marker).any():
T_ps = np.where(self.trn_iterations > marker)[0].max()
else:
T_ps = 0
T_new = self.T0*self.cycle_mult**T_ps
iter_new = self.trn_iterations - marker[T_ps]
return T_new, iter_new
def clr(self):
T_new, iter_new = self.get_period()
y = (self.max_lr - self.base_lr)/2*np.cos(np.pi*iter_new/T_new) + (self.max_lr + self.base_lr)/2
return y
def plot_lr(self):
plt.xlabel('Training Iterations')
plt.ylabel('Learning Rate')
plt.title('lr vs. training iterations')
plt.plot(self.history['iterations'], self.history['lr'])
def plot_loss(self,cut=6):
plt.plot(self.history['loss'])
plt.title('loss vs. learning rate')
plt.ylabel('loss')
plt.xlabel('learning rate')
def on_train_begin(self, logs={}):
logs = logs or {}
K.set_value(self.model.optimizer.lr, self.clr())
def on_batch_end(self, epoch, logs=None):
logs = logs or {}
self.trn_iterations += 1
self.history.setdefault('lr', []).append(K.get_value(self.model.optimizer.lr))
self.history.setdefault('iterations', []).append(self.trn_iterations)
for k, v in logs.items():
self.history.setdefault(k, []).append(v)
K.set_value(self.model.optimizer.lr, self.clr())
find_lr_object = find_lr()
def init_model(init_mode,*args):
# imagenet, coco, saved, or last
init_with = init_mode
name = args[0]
if init_with == "imagenet":
model.load_weights(model.get_imagenet_weights(), by_name=True)
elif init_with == "coco":
model.load_weights(COCO_MODEL_PATH, by_name=True,exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", "mrcnn_bbox", "mrcnn_mask"])
elif init_with == 'saved':
model_path = os.path.join('/home/zhang.chi9/Airbus_segmentaiton/weights', name +'.h5')
model.load_weights(model_path, by_name=True)
elif init_with == "last":
# Load the last model you trained and continue training
model.load_weights(model.find_last(), by_name=True)
def train_model(stage = 1, init = 'last'):
# Training - Stage 1
if stage == 1:
print("Training network heads")
init_model(init, '128_stage_3')
model.train(dataset_train, dataset_val,
learning_rate=config.LEARNING_RATE,
epochs=1,
layers='heads',
augmentation=augmentation,
custom_callbacks=[find_lr_object])
# Training - Stage 2: Finetune layers from ResNet stage 4 and up
elif stage == 2:
print("Fine tune Resnet stage 4 and up")
init_model(init, str(sz)+'_stage_1')
model.train(dataset_train, dataset_val,
learning_rate=config.LEARNING_RATE,
epochs=1,
layers='4+',
augmentation=augmentation,
custom_callbacks=[find_lr_object])
# Training - Stage 3,Fine tune all layers
elif stage == 3:
print("Fine tune all layers")
init_model(init, str(sz)+'_stage_2')
model.train(dataset_train, dataset_val,
learning_rate=config.LEARNING_RATE / 10,
epochs=1,
layers='all',
augmentation=augmentation,
custom_callbacks=[find_lr_object])
save_model(str(sz)+'_stage_'+str(stage))
def save_model(name):
model_path = os.path.join('/home/zhang.chi9/Airbus_segmentaiton/weights', name +'.h5')
model.keras_model.save_weights(model_path)
# Create model in training mode
model = modellib.MaskRCNN(mode="training", config=config,
model_dir=MODEL_DIR)
#train_model(stage = 1, init = 'coco')
class InferenceConfig(ShipsConfig):
GPU_COUNT = 1
IMAGES_PER_GPU = 1
inference_config = InferenceConfig()
# Recreate the model in inference mode
model = modellib.MaskRCNN(mode="inference",
config=inference_config,
model_dir=MODEL_DIR)
model_path = '/home/zhang.chi9/Airbus_segmentaiton/models/mask_rcnn_ships_0259.h5'
init_with = "saved" # imagenet, coco, or last
print("Loading weights from ", init_with)
if init_with == "saved":
model.load_weights(model_path, by_name=True)
elif init_with == "last":
# Load the last model you trained and continue training
model.load_weights(model.find_last(), by_name=True)
class valid_img:
def __init__(self, image_id = random.choice(dataset_val.image_ids)):
self.image_id = image_id
self.original_image, self.image_meta, self.gt_class_id, self.gt_bbox, self.gt_masks =\
modellib.load_image_gt(dataset_val, inference_config, image_id, use_mini_mask=False)
results = model.detect([self.original_image], verbose=0)
r = results[0]
self.rois, self.masks, self.class_ids, self.scores = r['rois'], r['masks'], r['class_ids'], r['scores']
if len(self.scores) == 0: self.masks = np.zeros((768, 768 ,1), dtype = np.float)
self.masks = np.float32(self.masks)
self.gt_masks = np.float32(self.gt_masks)
self.gt_num_ships = len(self.gt_class_id)
self.num_ships = len(self.class_ids)
self.num_tta = 6
self.tta()
def visual(self):
visualize.display_instances(self.original_image, self.gt_bbox, self.gt_masks, self.gt_class_id,
dataset_train.class_names, figsize=(8, 8))
visualize.display_instances(self.original_image, self.rois, self.masks, self.class_ids,
dataset_val.class_names, self.scores, ax=get_ax())
def visual_gt(self):
visualize.display_instances(self.original_image, self.gt_bbox, self.gt_masks, self.gt_class_id,
dataset_train.class_names, figsize=(8, 8))
def visual_masks(self,anymask):
plt.figure(figsize=(8,8))
plt.imshow(anymask)
plt.imshow(self.rescaled_gt_masks,alpha = 0.6)
@property
def rescaled_gt_masks(self):
# Take the individual ship masks and create a color mask array for each ships
rescaled_masks = np.zeros((768, 768), dtype = np.float)
scale = lambda x: (self.gt_masks.shape[2]+x+1) / (self.gt_masks.shape[2]*2) ## scale the heatmap image to shift
for i in range(self.gt_masks.shape[2]):
mask = self.gt_masks[:,:,i]
rescaled_masks[:,:] += scale(i) * mask
return rescaled_masks
@property
def rescaled_masks(self):
# Take the individual ship masks and create a color mask array for each ships
rescaled_masks = np.zeros((768, 768), dtype = np.float)
scale = lambda x: self.scores[x]
for i in range(self.masks.shape[2]):
mask = self.masks[:,:,i]
rescaled_masks[:,:] += scale(i) * mask
return rescaled_masks
@property
def rescaled_tta_masks(self):
rescaled_tta_masks_list = []
for ind in range(self.num_tta):
rescaled_tta_masks = np.zeros((768, 768), dtype = np.float)
if self.tta_scores[ind].size != 0:
#scale = lambda x: self.tta_scores[ind][x]
img = self.tta_erode_masks[ind]
for i in range(img.shape[2]):
mask = img[:,:,i]
#rescaled_tta_masks[:,:] += scale(i) * mask
rescaled_tta_masks[:,:] += mask
rescaled_tta_masks_list.append(rescaled_tta_masks)
return rescaled_tta_masks_list
@property
def detection_matrix(self):
n_masks = self.gt_masks.shape[2]
n_pred = self.masks.shape[2]
matri = np.zeros((n_pred,n_masks))
for indi in range(n_pred):
mask = self.masks[:,:,indi]
for indj in range(n_masks):
p = self.gt_masks[:,:,indj]
matri[indi,indj] = IoU(p, mask)
return matri
def show_matrix(self):
mpld3.disable_notebook()
ax = sns.heatmap(t.detection_matrix, linewidth=0.5)
plt.show()
mpld3.enable_notebook()
@staticmethod
def IoU(pred, targs):
intersection = (pred*targs).sum()
return intersection / ((pred+targs).sum() - intersection + 1.0)
def f2_score(self,pred,gt):
n_th = 10
b = 4
thresholds = [0.5 + 0.05*i for i in range(n_th)]
n_masks = gt.shape[2]
n_pred = pred.shape[2]
ious = []
score = 0
for indi in range(n_masks):
mask = gt[:,:,indi]
buf = []
for indj in range(n_pred):
p = pred[:,:,indj]
buf.append(self.IoU(p,mask))
ious.append(buf)
for t in thresholds:
tp, fp, fn = 0, 0, 0
for i in range(n_masks):
match = False
for j in range(n_pred):
if ious[i][j] > t: match = True
if not match: fn += 1
for j in range(n_pred):
match = False
for i in range(n_masks):
if ious[i][j] > t: match = True
if match: tp += 1
else: fp += 1
score += ((b+1)*tp)/((b+1)*tp + b*fn + fp)
return score/n_th
def tta(self):
image_augs = [iaa.Fliplr(1),iaa.Flipud(1),iaa.Affine(rotate=(90,90)),
iaa.Affine(rotate=(180,180)),iaa.Affine(rotate=(270,270))]
inverse_image_augs = [iaa.Fliplr(1),iaa.Flipud(1),iaa.Affine(rotate=(-90,-90)),
iaa.Affine(rotate=(-180,-180)),iaa.Affine(rotate=(-270,-270))]
auged_img = [image_aug.augment_image(self.original_image) for image_aug in image_augs]
self.tta_masks = [self.masks]
self.tta_erode_masks = [self.remove_overlap_erode(self.masks,self.scores)] #
self.tta_scores = [self.scores]
for ind in range(len(auged_img)):
img = auged_img[ind]
results = model.detect([img], verbose=0)
r = results[0]
if len(r['scores']) == 0: r['masks'] = np.zeros((768, 768, 1), dtype = np.float)
self.tta_masks.append(inverse_image_augs[ind].augment_image(r['masks']))
self.tta_erode_masks.append(self.remove_overlap_erode(inverse_image_augs[ind].augment_image(r['masks']),r['scores'])) #
self.tta_scores.append(r['scores'])
@staticmethod
def get_erode_iteration(image):
percent = []
for i in range(10):
image_eroded = binary_erosion(image,iterations=(i+1)).astype(image.dtype)
percent.append(np.sum(image_eroded)/np.sum(image))
percent = np.array(percent)
it = 0
if len(percent[percent >0.5]) != 0: it= np.argmin(percent[percent >0.5])+1
return it
def erode_image(self,image):
if np.sum(image) != 0:
image = np.float32(image)
eroded_image = np.zeros(image.shape)
for i in range(image.shape[2]):
a = image[:,:,i]
it = self.get_erode_iteration(a)
if it == 0:
eroded_image[:,:,i] = a
else:
eroded_image[:,:,i] = binary_erosion(a,iterations=it).astype(a.dtype)
#eroded_image[:,:,i] = ndi.gaussian_laplace(a, sigma=5)
#eroded_image[:,:,i] = ndi.uniform_filter(, size=4, mode='constant')
return eroded_image
else:
return image
@staticmethod
def remove_overlap(mask,scores):
if len(scores) != 0:
x = []
ids = (-scores).argsort() # sort the array from largest to smallest
background = np.zeros((sz,sz))
for i in ids:
tmp = mask[:,:,i]- background
background += np.float32(tmp > 0)
if np.sum(np.float32(tmp > 0))>0: x.append(np.float32(tmp > 0))
no_overlap_mask = np.zeros((sz,sz,len(x)))
for i in range(len(x)):
no_overlap_mask[:,:,i] = x[i]
return no_overlap_mask
else:
return mask
def remove_overlap_erode(self,mask,scores):
image = self.remove_overlap(mask,scores)
return self.erode_image(image)
def visual_tta(self):
plt.figure(figsize=(12,16))
for ind in range(self.num_tta):
img = self.rescaled_tta_masks[ind]
plt.subplot(4,2,ind+1)
plt.imshow(img)
@property
def tta_mean(self):
tta_mean = np.zeros((sz,sz))
for i in range(self.num_tta):
tta_mean += self.rescaled_tta_masks[i]/6
return tta_mean
@staticmethod
def stack3d(img):
stacks = np.zeros((sz,sz))
scale = lambda x: (img.shape[2]+x+1) / (img.shape[2]*2)
for i in range(img.shape[2]):
stacks += img[:,:,i]*scale(i)
return stacks
@staticmethod
def get_drawer_ref(labled,n_objs):
if n_objs != 0:
drawer_ref = {}
ind = 0
for i in range(n_objs):
obj = (labled == i + 1).astype(float)
if(obj.sum() > threshold_obj):
drawer_ref[ind] = obj
ind += 1
else:
drawer_ref = [labled]
return drawer_ref
def put_into_drawer(self,tta_masks, tta_scores, ref):
drawer = {k: [] for k in range(len(ref))}
for ind in range(len(tta_masks)):
tta_mask = tta_masks[ind]
for i in range(tta_mask.shape[2]):
img = tta_mask[:,:,i]
if tta_scores[ind].size != 0:
scale = tta_scores[ind][i]
else:
scale = 1
drawer_ind = self.get_drawer_num(img,ref)
if drawer_ind != -1:
drawer[drawer_ind].append(img*scale)
return drawer
@staticmethod
def open_drawer(drawer):
th = 0.01
all_img_in_drawer = []
all_img_scores = []
for i in range(len(drawer)):
img = np.zeros((sz,sz))
if len(drawer[i]) != 0:
for j in range(len(drawer[i])):
img += drawer[i][j]/6
if not ((img>th)==0).all() :
all_img_scores.append(np.sum(img)/np.sum(img>th))
all_img_in_drawer.append(img>th)
if len(all_img_in_drawer) !=0:
stack = np.zeros((sz,sz,len(all_img_in_drawer)))
else:
stack = np.zeros((sz,sz,1))
for i in range(len(all_img_in_drawer)):
stack[:,:,i] = all_img_in_drawer[i]
return stack,all_img_scores
@staticmethod
def get_drawer_num(img,ref):
score_wrp_ref = []
for i in range(len(ref)):
target = ref[i]
score_wrp_ref.append(np.sum(img*target))
if np.max(score_wrp_ref) != 0:
drawer_ind = np.argmax(score_wrp_ref)
else:
drawer_ind = -1
return drawer_ind
def image_stats(dataset,image_id):
"""Returns a dict of stats for one image."""
image = dataset.load_image(image_id)
mask, _ = dataset.load_mask(image_id)
bbox = utils.extract_bboxes(mask)
# Sanity check
assert mask.shape[:2] == image.shape[:2]
# Return stats dict
return {
"id": image_id,
"shape": list(image.shape),
"bbox": [[b[2] - b[0], b[3] - b[1]]
for b in bbox
# Uncomment to exclude nuclei with 1 pixel width
# or height (often on edges)
# if b[2] - b[0] > 1 and b[3] - b[1] > 1
],
"color": np.mean(image, axis=(0, 1)),
}
stats = []
for i in np.arange(200):
stats.append(image_stats(dataset_train,i))
# Image stats
image_shape = np.array([s['shape'] for s in stats])
image_color = np.array([s['color'] for s in stats])
print("Image Count: ", image_shape.shape[0])
print("Height mean: {:.2f} median: {:.2f} min: {:.2f} max: {:.2f}".format(
np.mean(image_shape[:, 0]), np.median(image_shape[:, 0]),
np.min(image_shape[:, 0]), np.max(image_shape[:, 0])))
print("Width mean: {:.2f} median: {:.2f} min: {:.2f} max: {:.2f}".format(
np.mean(image_shape[:, 1]), np.median(image_shape[:, 1]),
np.min(image_shape[:, 1]), np.max(image_shape[:, 1])))
print("Color mean (RGB): {:.2f} {:.2f} {:.2f}".format(*np.mean(image_color, axis=0)))
# Histograms
fig, ax = plt.subplots(1, 3, figsize=(16, 4))
ax[0].set_title("Height")
_ = ax[0].hist(image_shape[:, 0], bins=20)
ax[1].set_title("Width")
_ = ax[1].hist(image_shape[:, 1], bins=20)
ax[2].set_title("Height & Width")
_ = ax[2].hist2d(image_shape[:, 1], image_shape[:, 0], bins=10, cmap="Blues")
nuclei_per_image = np.array([len(s['bbox']) for s in stats])
print("Ships in image: mean: {:.1f} median: {:.1f} min: {:.1f} max: {:.1f}".format(
nuclei_per_image.mean(), np.median(nuclei_per_image), nuclei_per_image.min(), nuclei_per_image.max()))
plt.hist(nuclei_per_image, bins=10)
# Ships size stats
nucleus_shape = np.array([b for s in stats for b in s['bbox']])
nucleus_area = nucleus_shape[:, 0] * nucleus_shape[:, 1]
print(" Total Ships: ", nucleus_shape.shape[0])
print(" Ships Height. mean: {:.2f} median: {:.2f} min: {:.2f} max: {:.2f}".format(
np.mean(nucleus_shape[:, 0]), np.median(nucleus_shape[:, 0]),
np.min(nucleus_shape[:, 0]), np.max(nucleus_shape[:, 0])))
print(" Ships Width. mean: {:.2f} median: {:.2f} min: {:.2f} max: {:.2f}".format(
np.mean(nucleus_shape[:, 1]), np.median(nucleus_shape[:, 1]),
np.min(nucleus_shape[:, 1]), np.max(nucleus_shape[:, 1])))
print(" Ships Area. mean: {:.2f} median: {:.2f} min: {:.2f} max: {:.2f}".format(
np.mean(nucleus_area), np.median(nucleus_area),
np.min(nucleus_area), np.max(nucleus_area)))
_ = plt.hist(nucleus_shape[:, 1], bins=20)
#plt.xlim(0,50)
# Show 2D histogram
_ = plt.hist2d(nucleus_shape[:, 1], nucleus_shape[:, 0], bins=200, cmap="Reds")
plt.clim(0,2)
# Ship height/width ratio
nucleus_aspect_ratio = nucleus_shape[:, 0] / nucleus_shape[:, 1]
print("Ships Aspect Ratio. mean: {:.2f} median: {:.2f} min: {:.2f} max: {:.2f}".format(
np.mean(nucleus_aspect_ratio), np.median(nucleus_aspect_ratio),
np.min(nucleus_aspect_ratio), np.max(nucleus_aspect_ratio)))
plt.figure(figsize=(15, 5))
_ = plt.hist(nucleus_aspect_ratio, bins=20, range=[0, 5])
# Augmentation
from imgaug import augmenters as iaa
augmentation = iaa.Sometimes(0.8, [
iaa.Fliplr(0.5),
iaa.Flipud(0.5),
iaa.Affine(
scale={"x": (0.9, 1), "y": (0.9, 1)}, # scale images to 80-120% of their size, individually per axis
rotate=(-5, 5), # rotate by -45 to +45 degrees
shear=(-1, 1), # shear by -16 to +16 degrees
)])
#image_id = np.random.choice(dataset_train.image_ids, 1)[0]
# Load the image multiple times to show augmentations
limit = 4
ax = get_ax(rows=2, cols=limit//2)
for i in range(limit):
image, image_meta, class_ids, bbox, mask = modellib.load_image_gt(
dataset_train, inference_config, image_id, use_mini_mask=False, augment=False, augmentation=augmentation)
visualize.display_instances(image, bbox, mask, class_ids,
dataset_train.class_names, ax=ax[i//2, i % 2],
show_mask=True, show_bbox=False)
for i in range(4):
image_id = np.random.choice(dataset_val.image_ids, 1)[0]
v = valid_img(image_id = image_id)
threshold_obj = 4
threshold = 0.35
labled,n_objs = ndi.label(v.tta_mean > (threshold*min(v.tta_mean.max(),1)))
drawer_ref = v.get_drawer_ref(labled,n_objs)
tta_average,tta_average_scores = v.open_drawer(v.put_into_drawer(v.tta_masks,v.tta_scores, drawer_ref))
tta_average = v.remove_overlap(tta_average,np.array(tta_average_scores))
fig = plt.figure(figsize=(21,7))
plt.subplot(1,3,1),plt.imshow(v.original_image),plt.title('Original Image')
plt.subplot(1,3,2),plt.imshow(v.stack3d(v.gt_masks)),plt.title('Ground Truth')
plt.subplot(1,3,3),plt.imshow(v.stack3d(tta_average)),plt.title('Prediction')
fig.savefig(f'seg_val{i}.png')
TEST_DIR = '/scratch/zhang.chi9/test_v2'
class test_img(valid_img):
def __init__(self, fname):
self.fname = fname
self.original_image = plt.imread(os.path.join(TEST_DIR, fname))
results = model.detect([self.original_image], verbose=0)
r = results[0]
self.rois, self.masks, self.class_ids, self.scores = r['rois'], r['masks'], r['class_ids'], r['scores']
self.masks = np.float32(self.masks)
self.num_tta = 6
self.tta()
def visual(self):
visualize.display_instances(self.original_image, self.rois, self.masks, self.class_ids,
dataset_val.class_names, self.scores, ax=get_ax())
df_has_ships = pd.read_csv('ship_detection.csv')
potential_has_ship = list(df_has_ships.id[df_has_ships.mean(axis=1)>0.5])
potential_no_ship = list(df_has_ships.id[df_has_ships.mean(axis=1)<=0.5])
subdf = pd.DataFrame(columns=['ImageId','EncodedPixels','size','maskrcnn_score'])
df_ind = 0
from tqdm import tqdm
for i in tqdm(range(len(potential_has_ship))):
fname = potential_has_ship[i]
t = test_img(fname)
threshold_obj = 4
threshold = 0.35
labled,n_objs = ndi.label(t.tta_mean > (threshold*min(t.tta_mean.max(),1)))
drawer_ref = t.get_drawer_ref(labled,n_objs)
tta_average,tta_average_scores = t.open_drawer(t.put_into_drawer(t.tta_masks,t.tta_scores, drawer_ref))
tta_average = t.remove_overlap(tta_average,np.array(tta_average_scores))
if len(tta_average_scores) == 0:
subdf.loc[df_ind] = [fname, np.nan,np.nan, np.nan]
df_ind += 1
else:
for i in range(tta_average.shape[2]):
p = tta_average[:,:,i]
rle = rle_encode(p)
subdf.loc[df_ind] = [fname, rle, np.sum(tta_average[:,:,i]), tta_average_scores[i]]
df_ind += 1
df_ind = len(subdf)
for j in range(len(potential_no_ship)):
subdf.loc[df_ind] = [potential_no_ship[j], np.nan,np.nan, np.nan]
df_ind += 1
subdf.to_csv('submission.csv',index=False)