Overview

This a computer vision object detection and segmentation problem on kaggle (https://www.kaggle.com/c/airbus-ship-detection#description). In this problem, I build a model that detects all ships in satellite images and generate a mask for each ship. There several deep learning models that works with image detection such as YOLO, R-CNN, Fast R-CNN, Faster R-CNN. For objection segmentation, Unet is a great tools. Recently there is a nice paper on object instance segmentation (https://arxiv.org/abs/1703.06870) called Mask R-CNN.

In this problem, most image (~80%) contains no ships. So my strategy is the following:

  1. I build a classifier to detect if a image has any ships.
  2. Feed the image that contains image detected by the classifier to Mask R-CNN.
In [1]:
from fastai.conv_learner import *
from fastai.dataset import *
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
/home/zhang.chi9/anaconda3/envs/fastai/lib/python3.6/site-packages/sklearn/ensemble/weight_boosting.py:29: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release.
  from numpy.core.umath_tests import inner1d
In [2]:
PATH = './'
TRAIN = '/scratch/zhang.chi9/train/'
TRAIN2 = '/scratch/zhang.chi9/test/'
TEST = '/scratch/zhang.chi9/test_v2/'
SEGMENTATION = '../input/train_ship_segmentations.csv'
exclude_list = ['6384c3e78.jpg','13703f040.jpg', '14715c06d.jpg',  '33e0ff2d5.jpg',
                '4d4e09f2a.jpg', '877691df8.jpg', '8b909bb20.jpg', 'a8d99130e.jpg', 
                'ad55c3143.jpg', 'c8260c541.jpg', 'd6c7f17c7.jpg', 'dc3e7c901.jpg',
                'e44dffe88.jpg', 'ef87bad36.jpg', 'f083256d8.jpg'] #corrupted image

nw = 4   #number of workers for data loader
arch = resnet34 #specify target architecture
segmentation_df = pd.read_csv(SEGMENTATION).set_index('ImageId')
train_names = segmentation_df.index.tolist()
test_names = [f for f in os.listdir(TEST)]
for el in exclude_list:
    if(el in train_names): train_names.remove(el)
    if(el in test_names): test_names.remove(el)

tr_n, val_n = train_test_split(train_names, test_size=0.2, random_state=42)

Build the classifier

In [3]:
class pdFilesDataset(FilesDataset):
    def __init__(self, fnames, path, transform):
        self.segmentation_df = pd.read_csv(SEGMENTATION).set_index('ImageId')
        super().__init__(fnames, transform, path)
    
    def get_x(self, i):
        img = open_image(os.path.join(self.path, self.fnames[i]))
        if self.sz == 768: return img 
        else: return cv2.resize(img, (self.sz, self.sz))
    
    def get_y(self, i):
        if(self.path == TEST): return 0
        masks = self.segmentation_df.loc[self.fnames[i]]['EncodedPixels']
        if(type(masks) == float): return 0 #NAN - no ship 
        else: return 1
    
    def get_c(self): return 2 #number of classes

def get_data(sz,bs):
    #data augmentation
    aug_tfms = [RandomRotate(20, tfm_y=TfmType.NO),
                RandomDihedral(tfm_y=TfmType.NO),
                RandomLighting(0.05, 0.05, tfm_y=TfmType.NO)]
    tfms = tfms_from_model(arch, sz, crop_type=CropType.NO, tfm_y=TfmType.NO, 
                aug_tfms=aug_tfms)
    ds = ImageData.get_ds(pdFilesDataset, (tr_n[:-(len(tr_n)%bs)],TRAIN), 
                (val_n,TRAIN), tfms, test=(test_names,TEST))
    md = ImageData(PATH, ds, bs, num_workers=nw, classes=None)
    return md

Training on small image size 256x256

In [ ]:
sz = 256 #image size
bs = 200  #batch size

md = get_data(sz,bs)
learn = ConvLearner.pretrained(arch, md, ps=0.5) #dropout 50%
learn.opt_fn = optim.Adam

learn.fit(2e-3, 1)

learn.unfreeze()
lr=np.array([1e-4,5e-4,2e-3])

learn.fit(lr, 1, cycle_len=2, use_clr=(20,8))

Change size to 384x384

In [4]:
sz = 384 #image size
bs = 100  #batch size

md = get_data(sz,bs)
learn = ConvLearner.pretrained(arch, md, ps=0.5) #dropout 50%
learn.opt_fn = optim.Adam
learn.unfreeze()
lr=np.array([1e-4,5e-4,2e-3])

learn.fit(lr/2, 1, cycle_len=2, use_clr=(20,8)) #lr is smaller since bs is only 32
learn.save('Resnet34_lable_384_1')
learn.load('Resnet34_lable_384_1')
/home/zhang.chi9/anaconda3/envs/fastai/lib/python3.6/site-packages/fastai/initializers.py:6: UserWarning: nn.init.kaiming_normal is now deprecated in favor of nn.init.kaiming_normal_.
  if hasattr(m, 'weight'): init_fn(m.weight)
/home/zhang.chi9/anaconda3/envs/fastai/lib/python3.6/site-packages/fastai/initializers.py:6: UserWarning: nn.init.kaiming_normal is now deprecated in favor of nn.init.kaiming_normal_.
  if hasattr(m, 'weight'): init_fn(m.weight)

Classifer validation and prediction using TTA (test time augmentation)

In [11]:
log_preds,y = learn.predict_with_targs()

probs = np.exp(log_preds)

probs = probs[:,1]

preds =1*(probs>0.5)

from sklearn.metrics import accuracy_score
print(f'The accuracy score is {accuracy_score(y, preds)}')
The accuracy score is 0.986071891933145
In [12]:
from sklearn.metrics import accuracy_score
print(f'The accuracy score is {accuracy_score(y, preds)}')

class_names = ['no_ship','has_ship']
from sklearn.metrics import confusion_matrix
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

# Compute confusion matrix
cnf_matrix = confusion_matrix(y, preds)
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=class_names,
                      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,
                      title='Normalized confusion matrix')

plt.show()
The accuracy score is 0.986071891933145
Confusion matrix, without normalization
[[14785   181]
 [  184 11056]]
Normalized confusion matrix
[[0.99 0.01]
 [0.02 0.98]]

Make prediction on test dataset

In [ ]:
log_preds,y = learn.TTA(is_test=True)
probs = np.mean(np.exp(log_preds),0)[:,1]
df['p_ship'] = pd.Series(probs, index=test_names)

Build model for image segmeatation using Mask R-CNN

In [23]:
import os
import sys
import random
import math
import re
import time
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from imgaug import augmenters as iaa
from scipy.ndimage import binary_erosion
from scipy.ndimage.morphology import binary_dilation
import pdb
from scipy import ndimage as ndi

%matplotlib inline
import mpld3


# Root directory of the project
ROOT_DIR = os.path.abspath("./Mask_RCNN/")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

def multi_rle_encode(img, **kwargs):
    '''
    Encode connected regions as separated masks
    '''
    labels = label(img)
    if img.ndim > 2:
        return [rle_encode(np.sum(labels==k, axis=2), **kwargs) for k in np.unique(labels[labels>0])]
    else:
        return [rle_encode(labels==k, **kwargs) for k in np.unique(labels[labels>0])]

# ref: https://www.kaggle.com/paulorzp/run-length-encode-and-decode
def rle_encode(img, min_max_threshold=1e-3, max_mean_threshold=None):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    if np.max(img) < min_max_threshold:
        return '' ## no need to encode if it's all zeros
    if max_mean_threshold and np.mean(img) > max_mean_threshold:
        return '' ## ignore overfilled mask
    pixels = img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def rle_decode(mask_rle, shape=(768, 768)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T  # Needed to align to RLE direction

def masks_as_image(in_mask_list):
    # Take the individual ship masks and create a single mask array for all ships
    all_masks = np.zeros((768, 768), dtype = np.uint8)
    for mask in in_mask_list:
        if isinstance(mask, str):
            all_masks |= rle_decode(mask)
    return all_masks

def stack_masks(masks):
    # Take the individual ship masks and create a single mask array for all ships
    all_masks = np.zeros((768, 768))
    for i in range(masks.shape[2]):
        mask = masks[:,:,i]
        all_masks += mask
    return np.array(all_masks, dtype = np.uint8)

def masks_as_color(in_mask_list):
    # Take the individual ship masks and create a color mask array for each ships
    all_masks = np.zeros((768, 768), dtype = np.float)
    scale = lambda x: (len(in_mask_list)+x+1) / (len(in_mask_list)*2) ## scale the heatmap image to shift 
    for i,mask in enumerate(in_mask_list):
        if isinstance(mask, str):
            all_masks[:,:] += scale(i) * rle_decode(mask)
    return all_masks

def get_ax(rows=1, cols=1, size=8):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.
    
    Change the default size attribute to control the size
    of rendered images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
    return ax

PATH = '../Airbus_Ship_Detection'
TRAIN = '/scratch/zhang.chi9/train/'
TEST = '/scratch/zhang.chi9/test/'
SEGMENTATION = '../Airbus_Ship_Detection/input/train_ship_segmentations.csv'
PRETRAINED = '../input/fine-tuning-resnet34-on-ship-detection/models/Resnet34_lable_256_1.h5'
exclude_list = ['6384c3e78.jpg','13703f040.jpg', '14715c06d.jpg',  '33e0ff2d5.jpg',
                '4d4e09f2a.jpg', '877691df8.jpg', '8b909bb20.jpg', 'a8d99130e.jpg', 
                'ad55c3143.jpg', 'c8260c541.jpg', 'd6c7f17c7.jpg', 'dc3e7c901.jpg',
                'e44dffe88.jpg', 'ef87bad36.jpg', 'f083256d8.jpg'] #corrupted images

train_names = [f for f in os.listdir(TRAIN)]
test_names = [f for f in os.listdir(TEST)]
for el in exclude_list:
    if(el in train_names): train_names.remove(el)
    if(el in test_names): test_names.remove(el)
print(f'{len(train_names)} images founded for training')
print(f'{len(test_names)} images founded for testing')

img_df = pd.read_csv(SEGMENTATION)
img_df_test = pd.read_csv(test_SEGMENTATION)

df = img_df
df.drop(df[df['ImageId'].isin(exclude_list)].index,inplace = True)
df['ships'] = df['EncodedPixels'].map(lambda c_row: 1 if isinstance(c_row, str) else 0)
img_ship1 = df.groupby('ImageId', group_keys=False).agg({'ships': 'sum'})
ships_only1 = img_ship1.drop(img_ship1.ships[img_ship1.ships==0].index)
print(f'Out of {len(train_names)} training images, only {len(ships_only1)} images that has ships')

df = img_df_test
df.drop(df[df['ImageId'].isin(exclude_list)].index,inplace = True)
df['ships'] = df['EncodedPixels'].map(lambda c_row: 1 if isinstance(c_row, str) else 0)
img_ship2 = df.groupby('ImageId', group_keys=False).agg({'ships': 'sum'})
ships_only2 = img_ship2.drop(img_ship2.ships[img_ship2.ships==0].index)
print(f'Out of {len(test_names)} training images, only {len(ships_only2)} images that has ships')

class AirbusDataset(utils.Dataset):

    def load_train(self, idx, sz):
        """Generate the requested number of synthetic images.
        count: number of images to generate.
        sz: the size of the generated images.
        """
        # Add classes
        # source = Airbus
        # class = ship
        self.add_class("Airbus", 1, "ship")

        # Add images
        for i in idx:
            if i > 0:
                imagename = ships_only1.index[i]
                rle = img_df.loc[img_df['ImageId']==imagename,'EncodedPixels'].values
                self.add_image("Airbus", image_id=i, fname=imagename, path=TRAIN, sz=sz, rle = rle)
            else:
                imagename = ships_only2.index[-i]
                rle = img_df_test.loc[img_df_test['ImageId']==imagename,'EncodedPixels'].values
                self.add_image("Airbus", image_id=i, fname=imagename, path=TEST, sz=sz, rle = rle)

    def load_image(self, image_id):
        """Generate an image from the specs of the given image ID.
        """
        info = self.image_info[image_id]
        img = plt.imread(info['path']+info['fname'])
        if info['sz'] == 768: return img 
        else: return cv2.resize(img, (info['sz'], info['sz']))
    
    def load_mask(self, image_id):
        """Load instance masks for the given image.
        Returns:
            masks: A bool array of shape [height, width, instance count] with
                a binary mask per instance.
            class_ids: a 1D array of class IDs of the instance masks.
        """
        info = self.image_info[image_id]
        rle = info['rle']
        mask = np.empty([info['sz'], info['sz'], len(rle)])
        class_ids = np.ones(len(rle))
        
        for i,rle_ in enumerate(rle):
            mask_ = rle_decode(rle_)
            if info['sz'] != 768: mask_ = cv2.resize(mask_, (info['sz'], info['sz']))
            mask[:,:,i] = mask_

        return mask.astype(np.bool), class_ids.astype(np.int32)

import pickle

with open('idx.pickle', 'rb') as f:
    train_idx, val_idx = pickle.load(f)

train_idx = train_idx + list(-np.arange(len(ships_only2)))
sz = 768

# Training dataset 
dataset_train = AirbusDataset()
dataset_train.load_train(train_idx, sz)
dataset_train.prepare()

# Validation dataset
dataset_val = AirbusDataset()
dataset_val.load_train(val_idx, sz)
dataset_val.prepare()

class ShipsConfig(Config):
    """Configuration for training on the nucleus segmentation dataset."""
    # Give the configuration a recognizable name
    NAME = "test_multigpu_ships"

    # Adjust depending on your GPU memory
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

    # Number of classes (including background)
    NUM_CLASSES = 1 + 1  # Background + nucleus

    # Number of training and validation steps per epoch
    STEPS_PER_EPOCH = 1000
    VALIDATION_STEPS = 1

    # Don't exclude based on confidence. Since we have two classes
    # then 0.5 is the minimum anyway as it picks between nucleus and BG
    DETECTION_MIN_CONFIDENCE = 0

    # Input image resizing
    # Random crops of size 512x512
    IMAGE_RESIZE_MODE = "square"
    IMAGE_MIN_DIM = sz
    IMAGE_MAX_DIM = sz

    # Length of square anchor side in pixels
    RPN_ANCHOR_SCALES = (6, 12, 36, 108, 324)
    
    RPN_ANCHOR_RATIOS = [0.5, 1, 3]

    # ROIs kept after non-maximum supression (training and inference)
    POST_NMS_ROIS_TRAINING = 1000
    POST_NMS_ROIS_INFERENCE = 2000

    # Non-max suppression threshold to filter RPN proposals.
    # You can increase this during training to generate more propsals.
    RPN_NMS_THRESHOLD = 0.9

    # How many anchors per image to use for RPN training
    RPN_TRAIN_ANCHORS_PER_IMAGE = 128

    # Image mean (RGB)
    MEAN_PIXEL = np.array([56.37015, 80.04626, 85.10948])

    # If enabled, resizes instance masks to a smaller size to reduce
    # memory load. Recommended when using high-resolution images.
    USE_MINI_MASK = True
    MINI_MASK_SHAPE = (56, 56)  # (height, width) of the mini-mask

    TRAIN_ROIS_PER_IMAGE = 128

    # Maximum number of ground truth instances to use in one image
    MAX_GT_INSTANCES = 200

    # Max number of final detections per image
    DETECTION_MAX_INSTANCES = 400
    
config = ShipsConfig()

from imgaug import augmenters as iaa
augmentation = iaa.Sometimes(0.8, [
    iaa.Fliplr(0.5),
    iaa.Flipud(0.5),
    iaa.Affine(
            scale={"x": (0.9, 1), "y": (0.9, 1)}, # scale images to 80-120% of their size, individually per axis
            rotate=(-5, 5), # rotate by -45 to +45 degrees
            shear=(-1, 1), # shear by -16 to +16 degrees
    )])

from keras.callbacks import *

class find_lr(Callback):

    def __init__(self):
        super(find_lr, self).__init__()
        self.trn_iterations = 0.
        self.history = {}
        
    def clr(self):
        
        y = 10**(5e-3*self.trn_iterations - 6)
        return y
    
    def plot_lr(self):
        plt.xlabel('Training Iterations')
        plt.ylabel('Learning Rate')
        plt.title('lr vs. training iterations')
        plt.plot(self.history['iterations'], self.history['lr'])
        
    def plot_loss(self,cut=6):
        plt.plot(self.history['loss'])
        plt.title('loss vs. learning rate')
        plt.ylabel('loss')
        plt.xlabel('learning rate')

    def on_train_begin(self, logs={}):
        logs = logs or {}
        K.set_value(self.model.optimizer.lr, self.clr())        
            
    def on_batch_end(self, epoch, logs=None):
        logs = logs or {}
        self.trn_iterations += 1
        self.history.setdefault('lr', []).append(K.get_value(self.model.optimizer.lr))
        self.history.setdefault('iterations', []).append(self.trn_iterations)

        for k, v in logs.items():
            self.history.setdefault(k, []).append(v)
        
        K.set_value(self.model.optimizer.lr, self.clr())

from keras.callbacks import *

class cyclic_lr(Callback):

    def __init__(self, base_lr=1e-6, max_lr=3e-4, T0 = 1000, cycle_mult = 2):
        super(cyclic_lr, self).__init__()
        self.base_lr = base_lr
        self.max_lr = max_lr
        self.T0 = T0
        self.cycle_mult = 2
        self.trn_iterations = 0.
        self.history = {}

    def get_period(self):
        marker = self.T0*(self.cycle_mult**np.arange(10) -1 )/(self.cycle_mult -1 )
        
        if (self.trn_iterations > marker).any():
            T_ps = np.where(self.trn_iterations > marker)[0].max()
        else: 
            T_ps = 0
        T_new = self.T0*self.cycle_mult**T_ps
        iter_new = self.trn_iterations - marker[T_ps]
        return T_new, iter_new
        
    def clr(self):
        T_new, iter_new = self.get_period()
        y = (self.max_lr - self.base_lr)/2*np.cos(np.pi*iter_new/T_new) + (self.max_lr + self.base_lr)/2
        return y
    
    def plot_lr(self):
        plt.xlabel('Training Iterations')
        plt.ylabel('Learning Rate')
        plt.title('lr vs. training iterations')
        plt.plot(self.history['iterations'], self.history['lr'])
        
    def plot_loss(self,cut=6):
        plt.plot(self.history['loss'])
        plt.title('loss vs. learning rate')
        plt.ylabel('loss')
        plt.xlabel('learning rate')

    def on_train_begin(self, logs={}):
        logs = logs or {}
        K.set_value(self.model.optimizer.lr, self.clr())        
            
    def on_batch_end(self, epoch, logs=None):
        logs = logs or {}
        self.trn_iterations += 1
        self.history.setdefault('lr', []).append(K.get_value(self.model.optimizer.lr))
        self.history.setdefault('iterations', []).append(self.trn_iterations)

        for k, v in logs.items():
            self.history.setdefault(k, []).append(v)
        
        K.set_value(self.model.optimizer.lr, self.clr())

find_lr_object = find_lr()

def init_model(init_mode,*args):
    # imagenet, coco, saved, or last
    init_with = init_mode
    name = args[0]
    if init_with == "imagenet":
        model.load_weights(model.get_imagenet_weights(), by_name=True)
    elif init_with == "coco":
        model.load_weights(COCO_MODEL_PATH, by_name=True,exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", "mrcnn_bbox", "mrcnn_mask"])
    elif init_with == 'saved':
        model_path = os.path.join('/home/zhang.chi9/Airbus_segmentaiton/weights', name +'.h5')
        model.load_weights(model_path, by_name=True)
    elif init_with == "last":
        # Load the last model you trained and continue training
        model.load_weights(model.find_last(), by_name=True)
        
def train_model(stage = 1, init = 'last'): 
    # Training - Stage 1
    if stage == 1:
        print("Training network heads")
        init_model(init, '128_stage_3')
        model.train(dataset_train, dataset_val,
                    learning_rate=config.LEARNING_RATE,
                    epochs=1,
                    layers='heads',
                    augmentation=augmentation,
                    custom_callbacks=[find_lr_object])
    # Training - Stage 2: Finetune layers from ResNet stage 4 and up
    elif stage == 2:
        print("Fine tune Resnet stage 4 and up")
        init_model(init, str(sz)+'_stage_1')
        model.train(dataset_train, dataset_val,
                    learning_rate=config.LEARNING_RATE,
                    epochs=1,
                    layers='4+',
                    augmentation=augmentation,
                    custom_callbacks=[find_lr_object])
    # Training - Stage 3,Fine tune all layers
    elif stage == 3:
        print("Fine tune all layers")
        init_model(init, str(sz)+'_stage_2')
        model.train(dataset_train, dataset_val,
                    learning_rate=config.LEARNING_RATE / 10,
                    epochs=1,
                    layers='all',
                    augmentation=augmentation,
                    custom_callbacks=[find_lr_object])

    save_model(str(sz)+'_stage_'+str(stage))
    
def save_model(name):
    model_path = os.path.join('/home/zhang.chi9/Airbus_segmentaiton/weights', name +'.h5')
    model.keras_model.save_weights(model_path)

# Create model in training mode

model = modellib.MaskRCNN(mode="training", config=config,
                          model_dir=MODEL_DIR)

#train_model(stage = 1, init = 'coco')
150430 images founded for training
88486 images founded for testing
Out of 150430 training images, only 29070 images that has ships
Out of 88486 training images, only 13486 images that has ships

Visulization of the results

In [24]:
class InferenceConfig(ShipsConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

inference_config = InferenceConfig()

# Recreate the model in inference mode
model = modellib.MaskRCNN(mode="inference", 
                          config=inference_config,
                          model_dir=MODEL_DIR)

model_path = '/home/zhang.chi9/Airbus_segmentaiton/models/mask_rcnn_ships_0259.h5'

init_with = "saved"  # imagenet, coco, or last
print("Loading weights from ", init_with)
if init_with == "saved":
    model.load_weights(model_path, by_name=True)
elif init_with == "last":
    # Load the last model you trained and continue training
    model.load_weights(model.find_last(), by_name=True)
Loading weights from  saved
In [29]:
class valid_img:
    
    def __init__(self, image_id = random.choice(dataset_val.image_ids)):
        self.image_id = image_id
        self.original_image, self.image_meta, self.gt_class_id, self.gt_bbox, self.gt_masks =\
    modellib.load_image_gt(dataset_val, inference_config, image_id, use_mini_mask=False)
        results = model.detect([self.original_image], verbose=0)
        r = results[0]
        self.rois, self.masks, self.class_ids, self.scores  = r['rois'], r['masks'], r['class_ids'], r['scores']
        if len(self.scores) == 0: self.masks = np.zeros((768, 768 ,1), dtype = np.float)
        self.masks = np.float32(self.masks)
        self.gt_masks = np.float32(self.gt_masks)
        self.gt_num_ships = len(self.gt_class_id)
        self.num_ships = len(self.class_ids)
        self.num_tta = 6
        self.tta()
        
    def visual(self):
        visualize.display_instances(self.original_image, self.gt_bbox, self.gt_masks, self.gt_class_id, 
                            dataset_train.class_names, figsize=(8, 8))
        visualize.display_instances(self.original_image, self.rois, self.masks, self.class_ids, 
                            dataset_val.class_names, self.scores, ax=get_ax())
        
    def visual_gt(self):
        visualize.display_instances(self.original_image, self.gt_bbox, self.gt_masks, self.gt_class_id, 
                            dataset_train.class_names, figsize=(8, 8))

    def visual_masks(self,anymask):
        plt.figure(figsize=(8,8))
        plt.imshow(anymask)
        plt.imshow(self.rescaled_gt_masks,alpha = 0.6)
        
    @property
    def rescaled_gt_masks(self):
        # Take the individual ship masks and create a color mask array for each ships
        rescaled_masks = np.zeros((768, 768), dtype = np.float)
        scale = lambda x: (self.gt_masks.shape[2]+x+1) / (self.gt_masks.shape[2]*2) ## scale the heatmap image to shift 
        for i in range(self.gt_masks.shape[2]):
            mask = self.gt_masks[:,:,i]
            rescaled_masks[:,:] += scale(i) * mask
        return rescaled_masks
    
    @property
    def rescaled_masks(self):
        # Take the individual ship masks and create a color mask array for each ships
        rescaled_masks = np.zeros((768, 768), dtype = np.float)
        scale = lambda x: self.scores[x]
        for i in range(self.masks.shape[2]):
            mask = self.masks[:,:,i]
            rescaled_masks[:,:] += scale(i) * mask
        return rescaled_masks
    
    @property
    def rescaled_tta_masks(self):
        rescaled_tta_masks_list = []
        for ind in range(self.num_tta):
            rescaled_tta_masks = np.zeros((768, 768), dtype = np.float)
            if self.tta_scores[ind].size != 0:
                #scale = lambda x: self.tta_scores[ind][x]
                img = self.tta_erode_masks[ind]
                for i in range(img.shape[2]):
                    mask = img[:,:,i]
                    #rescaled_tta_masks[:,:] += scale(i) * mask
                    rescaled_tta_masks[:,:] += mask
            rescaled_tta_masks_list.append(rescaled_tta_masks)
        return rescaled_tta_masks_list
    
    @property
    def detection_matrix(self):
        n_masks = self.gt_masks.shape[2]
        n_pred = self.masks.shape[2]
        matri = np.zeros((n_pred,n_masks))
        for indi in range(n_pred):
            mask = self.masks[:,:,indi]
            for indj in range(n_masks): 
                p = self.gt_masks[:,:,indj]
                matri[indi,indj] = IoU(p, mask)
        return matri
    
    def show_matrix(self):
        mpld3.disable_notebook()
        ax = sns.heatmap(t.detection_matrix, linewidth=0.5)
        plt.show()
        mpld3.enable_notebook()
        
    @staticmethod
    def IoU(pred, targs):
        intersection = (pred*targs).sum()
        return intersection / ((pred+targs).sum() - intersection + 1.0)
    

    def f2_score(self,pred,gt):
        n_th = 10
        b = 4
        thresholds = [0.5 + 0.05*i for i in range(n_th)]
        n_masks = gt.shape[2]
        n_pred = pred.shape[2]
        ious = []
        score = 0
        for indi in range(n_masks):
            mask = gt[:,:,indi]
            buf = []
            for indj in range(n_pred): 
                p = pred[:,:,indj]
                buf.append(self.IoU(p,mask))
            ious.append(buf)
        for t in thresholds:   
            tp, fp, fn = 0, 0, 0
            for i in range(n_masks):
                match = False
                for j in range(n_pred):
                    if ious[i][j] > t: match = True
                if not match: fn += 1

            for j in range(n_pred):
                match = False
                for i in range(n_masks):
                    if ious[i][j] > t: match = True
                if match: tp += 1
                else: fp += 1
            score += ((b+1)*tp)/((b+1)*tp + b*fn + fp)
        return score/n_th
    
    def tta(self):
        image_augs = [iaa.Fliplr(1),iaa.Flipud(1),iaa.Affine(rotate=(90,90)),
                  iaa.Affine(rotate=(180,180)),iaa.Affine(rotate=(270,270))]
        inverse_image_augs = [iaa.Fliplr(1),iaa.Flipud(1),iaa.Affine(rotate=(-90,-90)),
                      iaa.Affine(rotate=(-180,-180)),iaa.Affine(rotate=(-270,-270))]

        auged_img = [image_aug.augment_image(self.original_image) for image_aug in image_augs]
        
        self.tta_masks = [self.masks]
        self.tta_erode_masks = [self.remove_overlap_erode(self.masks,self.scores)] # 
        self.tta_scores = [self.scores]
        for ind in range(len(auged_img)): 
            img = auged_img[ind]
            results = model.detect([img], verbose=0)
            r = results[0]
            if len(r['scores']) == 0: r['masks'] = np.zeros((768, 768, 1), dtype = np.float)
            self.tta_masks.append(inverse_image_augs[ind].augment_image(r['masks']))
            self.tta_erode_masks.append(self.remove_overlap_erode(inverse_image_augs[ind].augment_image(r['masks']),r['scores'])) #
            self.tta_scores.append(r['scores'])
    
    @staticmethod
    def get_erode_iteration(image):
        percent = []
        for i in range(10):
            image_eroded = binary_erosion(image,iterations=(i+1)).astype(image.dtype)
            percent.append(np.sum(image_eroded)/np.sum(image))
        percent = np.array(percent)
        it = 0
        if len(percent[percent >0.5]) != 0: it= np.argmin(percent[percent >0.5])+1
        return it
    
    def erode_image(self,image):
        if np.sum(image) != 0:
            image = np.float32(image)
            eroded_image = np.zeros(image.shape)
            for i in range(image.shape[2]):
                a = image[:,:,i]
                it = self.get_erode_iteration(a)
                if it == 0:
                    eroded_image[:,:,i] = a
                else:
                    eroded_image[:,:,i] = binary_erosion(a,iterations=it).astype(a.dtype)
                #eroded_image[:,:,i] = ndi.gaussian_laplace(a, sigma=5)
                #eroded_image[:,:,i] = ndi.uniform_filter(, size=4, mode='constant')
            return eroded_image
        else:
            return image
    
    @staticmethod
    def remove_overlap(mask,scores):
        if len(scores) != 0:
            x = []
            ids = (-scores).argsort() # sort the array from largest to smallest
            background = np.zeros((sz,sz))
            for i in ids:
                tmp =  mask[:,:,i]- background
                background += np.float32(tmp > 0)
                if np.sum(np.float32(tmp > 0))>0: x.append(np.float32(tmp > 0))
                
            no_overlap_mask = np.zeros((sz,sz,len(x)))
            for i in range(len(x)):
                no_overlap_mask[:,:,i] = x[i]
            return no_overlap_mask
        else:
            return mask

    def remove_overlap_erode(self,mask,scores):
        image = self.remove_overlap(mask,scores)
        return self.erode_image(image)
    
    def visual_tta(self):
        plt.figure(figsize=(12,16))
        for ind in range(self.num_tta):
            img = self.rescaled_tta_masks[ind]
            plt.subplot(4,2,ind+1)
            plt.imshow(img)
    
    @property
    def tta_mean(self):
        tta_mean = np.zeros((sz,sz))
        for i in range(self.num_tta):
            tta_mean += self.rescaled_tta_masks[i]/6
        return tta_mean
    
    @staticmethod
    def stack3d(img):
        stacks = np.zeros((sz,sz))
        scale = lambda x: (img.shape[2]+x+1) / (img.shape[2]*2)
        for i in range(img.shape[2]):
            stacks += img[:,:,i]*scale(i)
        return stacks
    
    @staticmethod
    def get_drawer_ref(labled,n_objs):
        if n_objs != 0:
            drawer_ref = {}
            ind = 0
            for i in range(n_objs):
                obj = (labled == i + 1).astype(float)
                if(obj.sum() > threshold_obj): 
                    drawer_ref[ind] = obj
                    ind += 1
        else:
            drawer_ref = [labled]
        return drawer_ref


    def put_into_drawer(self,tta_masks, tta_scores, ref):
        drawer = {k: [] for k in range(len(ref))}
        for ind in range(len(tta_masks)):
            tta_mask = tta_masks[ind]
            for i in range(tta_mask.shape[2]):
                img = tta_mask[:,:,i]
                if tta_scores[ind].size != 0:
                    scale = tta_scores[ind][i]
                else:
                    scale = 1
                drawer_ind = self.get_drawer_num(img,ref)
                if drawer_ind != -1:
                    drawer[drawer_ind].append(img*scale)
        return drawer

    @staticmethod
    def open_drawer(drawer):
        th = 0.01
        all_img_in_drawer = []
        all_img_scores = []
        for i in range(len(drawer)):
            img = np.zeros((sz,sz))
            if len(drawer[i]) != 0:
                for j in range(len(drawer[i])):
                    img += drawer[i][j]/6

                if not ((img>th)==0).all() : 
                    all_img_scores.append(np.sum(img)/np.sum(img>th)) 
                    all_img_in_drawer.append(img>th)

        if len(all_img_in_drawer) !=0: 
            stack = np.zeros((sz,sz,len(all_img_in_drawer)))
        else: 
            stack = np.zeros((sz,sz,1))

        for i in range(len(all_img_in_drawer)):
            stack[:,:,i] = all_img_in_drawer[i]


        return stack,all_img_scores

    @staticmethod
    def get_drawer_num(img,ref):
        score_wrp_ref = []
        for i in range(len(ref)):
            target = ref[i]
            score_wrp_ref.append(np.sum(img*target))
        if np.max(score_wrp_ref) != 0:
            drawer_ind = np.argmax(score_wrp_ref) 
        else:
            drawer_ind = -1
        return drawer_ind
In [55]:
def image_stats(dataset,image_id):
    """Returns a dict of stats for one image."""
    image = dataset.load_image(image_id)
    mask, _ = dataset.load_mask(image_id)
    bbox = utils.extract_bboxes(mask)
    # Sanity check
    assert mask.shape[:2] == image.shape[:2]
    # Return stats dict
    return {
        "id": image_id,
        "shape": list(image.shape),
        "bbox": [[b[2] - b[0], b[3] - b[1]]
                 for b in bbox
                 # Uncomment to exclude nuclei with 1 pixel width
                 # or height (often on edges)
                 # if b[2] - b[0] > 1 and b[3] - b[1] > 1
                ],
        "color": np.mean(image, axis=(0, 1)),
    }

stats = []
for i in np.arange(200):
    stats.append(image_stats(dataset_train,i))
In [56]:
# Image stats
image_shape = np.array([s['shape'] for s in stats])
image_color = np.array([s['color'] for s in stats])
print("Image Count: ", image_shape.shape[0])
print("Height  mean: {:.2f}  median: {:.2f}  min: {:.2f}  max: {:.2f}".format(
    np.mean(image_shape[:, 0]), np.median(image_shape[:, 0]),
    np.min(image_shape[:, 0]), np.max(image_shape[:, 0])))
print("Width   mean: {:.2f}  median: {:.2f}  min: {:.2f}  max: {:.2f}".format(
    np.mean(image_shape[:, 1]), np.median(image_shape[:, 1]),
    np.min(image_shape[:, 1]), np.max(image_shape[:, 1])))
print("Color   mean (RGB): {:.2f} {:.2f} {:.2f}".format(*np.mean(image_color, axis=0)))

# Histograms
fig, ax = plt.subplots(1, 3, figsize=(16, 4))
ax[0].set_title("Height")
_ = ax[0].hist(image_shape[:, 0], bins=20)
ax[1].set_title("Width")
_ = ax[1].hist(image_shape[:, 1], bins=20)
ax[2].set_title("Height & Width")
_ = ax[2].hist2d(image_shape[:, 1], image_shape[:, 0], bins=10, cmap="Blues")
Image Count:  200
Height  mean: 768.00  median: 768.00  min: 768.00  max: 768.00
Width   mean: 768.00  median: 768.00  min: 768.00  max: 768.00
Color   mean (RGB): 57.16 81.65 85.47
In [57]:
nuclei_per_image = np.array([len(s['bbox']) for s in stats])

print("Ships in image:  mean: {:.1f}  median: {:.1f}  min: {:.1f}  max: {:.1f}".format(
    nuclei_per_image.mean(), np.median(nuclei_per_image), nuclei_per_image.min(), nuclei_per_image.max()))
plt.hist(nuclei_per_image, bins=10)
Ships in image:  mean: 2.0  median: 1.0  min: 1.0  max: 14.0
Out[57]:
(array([160.,  13.,  11.,   9.,   1.,   2.,   2.,   1.,   0.,   1.]),
 array([ 1. ,  2.3,  3.6,  4.9,  6.2,  7.5,  8.8, 10.1, 11.4, 12.7, 14. ]),
 <a list of 10 Patch objects>)
In [58]:
# Ships size stats

nucleus_shape = np.array([b for s in stats for b in s['bbox']])
nucleus_area = nucleus_shape[:, 0] * nucleus_shape[:, 1]


print("  Total Ships: ", nucleus_shape.shape[0])
print("  Ships Height. mean: {:.2f}  median: {:.2f}  min: {:.2f}  max: {:.2f}".format(
    np.mean(nucleus_shape[:, 0]), np.median(nucleus_shape[:, 0]),
    np.min(nucleus_shape[:, 0]), np.max(nucleus_shape[:, 0])))
print("  Ships Width.  mean: {:.2f}  median: {:.2f}  min: {:.2f}  max: {:.2f}".format(
    np.mean(nucleus_shape[:, 1]), np.median(nucleus_shape[:, 1]),
    np.min(nucleus_shape[:, 1]), np.max(nucleus_shape[:, 1])))
print("  Ships Area.   mean: {:.2f}  median: {:.2f}  min: {:.2f}  max: {:.2f}".format(
    np.mean(nucleus_area), np.median(nucleus_area),
    np.min(nucleus_area), np.max(nucleus_area)))

_ = plt.hist(nucleus_shape[:, 1], bins=20)
#plt.xlim(0,50)
  Total Ships:  393
  Ships Height. mean: 40.62  median: 24.00  min: 3.00  max: 248.00
  Ships Width.  mean: 47.90  median: 26.00  min: 4.00  max: 347.00
  Ships Area.   mean: 3459.94  median: 682.00  min: 15.00  max: 59768.00
In [59]:
# Show 2D histogram
_ = plt.hist2d(nucleus_shape[:, 1], nucleus_shape[:, 0], bins=200, cmap="Reds")
plt.clim(0,2)
In [60]:
# Ship height/width ratio
nucleus_aspect_ratio = nucleus_shape[:, 0] / nucleus_shape[:, 1]
print("Ships Aspect Ratio.  mean: {:.2f}  median: {:.2f}  min: {:.2f}  max: {:.2f}".format(
    np.mean(nucleus_aspect_ratio), np.median(nucleus_aspect_ratio),
    np.min(nucleus_aspect_ratio), np.max(nucleus_aspect_ratio)))
plt.figure(figsize=(15, 5))

_ = plt.hist(nucleus_aspect_ratio, bins=20, range=[0, 5])
Ships Aspect Ratio.  mean: 1.01  median: 0.88  min: 0.18  max: 4.18

visualize the augmentation

In [27]:
# Augmentation 
from imgaug import augmenters as iaa
augmentation = iaa.Sometimes(0.8, [
    iaa.Fliplr(0.5),
    iaa.Flipud(0.5),
    iaa.Affine(
            scale={"x": (0.9, 1), "y": (0.9, 1)}, # scale images to 80-120% of their size, individually per axis
            rotate=(-5, 5), # rotate by -45 to +45 degrees
            shear=(-1, 1), # shear by -16 to +16 degrees
    )])

#image_id = np.random.choice(dataset_train.image_ids, 1)[0]
# Load the image multiple times to show augmentations
limit = 4
ax = get_ax(rows=2, cols=limit//2)
for i in range(limit):
    image, image_meta, class_ids, bbox, mask = modellib.load_image_gt(
        dataset_train, inference_config, image_id, use_mini_mask=False, augment=False, augmentation=augmentation)
    visualize.display_instances(image, bbox, mask, class_ids,
                                dataset_train.class_names, ax=ax[i//2, i % 2],
                                show_mask=True, show_bbox=False)
In [45]:
for i in range(4):
    image_id = np.random.choice(dataset_val.image_ids, 1)[0]
    v = valid_img(image_id = image_id)
    threshold_obj = 4
    threshold = 0.35
    labled,n_objs = ndi.label(v.tta_mean > (threshold*min(v.tta_mean.max(),1)))

    drawer_ref = v.get_drawer_ref(labled,n_objs)

    tta_average,tta_average_scores = v.open_drawer(v.put_into_drawer(v.tta_masks,v.tta_scores, drawer_ref))

    tta_average = v.remove_overlap(tta_average,np.array(tta_average_scores))

    fig = plt.figure(figsize=(21,7))
    plt.subplot(1,3,1),plt.imshow(v.original_image),plt.title('Original Image')
    plt.subplot(1,3,2),plt.imshow(v.stack3d(v.gt_masks)),plt.title('Ground Truth')
    plt.subplot(1,3,3),plt.imshow(v.stack3d(tta_average)),plt.title('Prediction')
    fig.savefig(f'seg_val{i}.png')

Prediction on test dataset

In [ ]:
TEST_DIR = '/scratch/zhang.chi9/test_v2'
class test_img(valid_img):
    
    def __init__(self, fname):
        self.fname = fname
        self.original_image = plt.imread(os.path.join(TEST_DIR, fname))
        results = model.detect([self.original_image], verbose=0)
        r = results[0]
        self.rois, self.masks, self.class_ids, self.scores  = r['rois'], r['masks'], r['class_ids'], r['scores']
        self.masks = np.float32(self.masks)
        self.num_tta = 6
        self.tta()
    
    def visual(self):
        visualize.display_instances(self.original_image, self.rois, self.masks, self.class_ids, 
                            dataset_val.class_names, self.scores, ax=get_ax())
        
df_has_ships = pd.read_csv('ship_detection.csv')

potential_has_ship = list(df_has_ships.id[df_has_ships.mean(axis=1)>0.5])
potential_no_ship = list(df_has_ships.id[df_has_ships.mean(axis=1)<=0.5])

subdf = pd.DataFrame(columns=['ImageId','EncodedPixels','size','maskrcnn_score'])

df_ind = 0

from tqdm import tqdm
for i in tqdm(range(len(potential_has_ship))):
    
    fname = potential_has_ship[i]
    t = test_img(fname)

    threshold_obj = 4
    threshold = 0.35
    labled,n_objs = ndi.label(t.tta_mean > (threshold*min(t.tta_mean.max(),1)))

    drawer_ref = t.get_drawer_ref(labled,n_objs)

    tta_average,tta_average_scores = t.open_drawer(t.put_into_drawer(t.tta_masks,t.tta_scores, drawer_ref))
    tta_average = t.remove_overlap(tta_average,np.array(tta_average_scores))
    
    if len(tta_average_scores) == 0: 
        subdf.loc[df_ind] = [fname, np.nan,np.nan, np.nan]
        df_ind += 1
    else:
        for i in range(tta_average.shape[2]):
            p = tta_average[:,:,i]
            rle = rle_encode(p)
            subdf.loc[df_ind] = [fname, rle, np.sum(tta_average[:,:,i]), tta_average_scores[i]]
            df_ind += 1
        
        

df_ind = len(subdf)
for j in range(len(potential_no_ship)):
    subdf.loc[df_ind] = [potential_no_ship[j], np.nan,np.nan, np.nan]
    df_ind += 1



subdf.to_csv('submission.csv',index=False)