import base64
import json
import os
import random
import time
from io import BytesIO
from pathlib import Path

import albumentations as aug
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import requests
import tensorflow as tf
from dotenv import dotenv_values, load_dotenv
from matplotlib import pyplot as plt
from PIL import Image
from tqdm import tqdm

import azureml.cityscapes as cityscapes
from azureml.core import Model, Workspace
from azureml.models import deeplab_v3plus, unet_xception
from azureml.models.keras_segmentation.models import fcn

pio.renderers.default = "notebook"

load_dotenv()
AZURE_SUBSCRIPTION_ID = os.getenv("AZURE_SUBSCRIPTION_ID")
AZURE_RESOURCE_GROUP = os.getenv("AZURE_RESOURCE_GROUP")
AZURE_WORKSPACE_NAME = os.getenv("AZURE_WORKSPACE_NAME")


# Let's load the images 

raw_data_path = Path("./../data/raw")
leftImg8bit_path = Path(raw_data_path, "leftImg8bit")
gtFine_path = Path(raw_data_path, "gtFine")

# Train dataset
train_input_img_paths = sorted(
    Path(leftImg8bit_path, "train").glob("**/*_leftImg8bit.png")
)
train_label_ids_img_paths = sorted(
    Path(gtFine_path, "train").glob("**/*_gtFine_labelIds.png")
)

# Validation dataset
val_input_img_paths = sorted(
    Path(leftImg8bit_path, "val").glob("**/*_leftImg8bit.png")
)
val_label_ids_img_paths = sorted(
    Path(gtFine_path, "val").glob("**/*_gtFine_labelIds.png")
)
val_label_colors_img_paths = sorted(
    Path(gtFine_path, "val").glob("**/*_color.png")
)


rand_idx = np.random.randint(0, len(val_input_img_paths))
image_id = (
    str(val_input_img_paths[rand_idx])
    .split("/")[-1]
    .replace("_leftImg8bit.png", "")
)
img_size = np.array(Image.open(val_input_img_paths[rand_idx])).shape



augment = aug.Compose(
    [
        aug.OneOf(  # Color augmentations
            [
                aug.RandomBrightnessContrast(),
                aug.RandomGamma(),
                aug.RandomToneCurve(),
            ]
        ),
        aug.OneOf(  # Camera augmentations
            [
                aug.MotionBlur(),
                aug.GaussNoise(),
            ]
        ),
        aug.OneOf(  # Geometric augmentations
            [
                aug.HorizontalFlip(),
                aug.RandomCrop(
                    width=int(img_size[0] / random.uniform(1.0, 2.0)),
                    height=int(img_size[1] / random.uniform(1.0, 2.0)),
                ),
                aug.SafeRotate(
                    limit=15,
                ),
            ]
        ),
    ]
)

img = np.array(Image.open(val_input_img_paths[rand_idx]))
mask = np.array(Image.open(val_label_colors_img_paths[rand_idx]))

augmented = augment(
    image=img,
    mask=mask,
)



# plot images
fig, ax = plt.subplots(
    nrows=2,
    ncols=2,
    figsize=(24, 16),
)
fig.suptitle(f"Dataset example : Image ID = {image_id}", fontsize=20)

ax[0,0].title.set_text("Original image (leftImg8bit)")
ax[0,0].imshow(img)

ax[0,1].title.set_text("Label colors (gtFine)")
ax[0,1].imshow(np.array(mask))

ax[1,0].title.set_text("Augmented image")
ax[1,0].imshow(np.array(augmented["image"]))

ax[1,1].title.set_text("Augmented mask")
ax[1,1].imshow(np.array(augmented["mask"]))

plt.show()


# Let's observe the categories distribution
subset_size = 1000

data = [
    np.unique(
        cityscapes.cityscapes_label_ids_to_category_ids(
            np.array(Image.open(img))
        ).flatten(),
        return_counts=True,
    )[1]
    / (img_size[0] * img_size[1])
    for img in tqdm(random.choices(train_label_ids_img_paths, k=subset_size))
]

df = pd.DataFrame(data)


fig = go.Figure()
for category in tqdm(cityscapes.CITYSCAPES_CATEGORIES):
    fig.add_trace(
        go.Box(
            y=df[category.id],
            name=category.name,
            marker_color=f"rgb{category.color_rgb}",
        )
    )

fig.update_traces(boxmean="sd")
fig.update_traces(notched=True)
fig.update_layout(title_text=f"Pixel category distribution (n={subset_size})")
fig.show()

100%|██████████| 1000/1000 [01:05<00:00, 15.37it/s]
100%|██████████| 8/8 [00:00<00:00, 1286.79it/s]


results_df = pd.read_csv("../results/models_comparison.csv")

results_df


fig = px.scatter(
    results_df,
    x="Training duration per epoch",
    y="Validation Metric (Mean IoU)",
    color="Architecture",
    size="Image resize",
    symbol="Augmentation",
    title="Models performances",
)
fig.show()


fig = px.box(
    results_df,
    x="Architecture",
    y="Training duration per epoch",
    color="Architecture",
    title="Architecture effect on Duration : -4.5% (DeepLab vs. U-Net) / +4.6% (DeepLab vs. FCN-8) on average",
)
fig.update_traces(boxmean="sd")
fig.show()


fig = px.box(
    results_df,
    x="Architecture",
    y="Validation Metric (Mean IoU)",
    color="Architecture",
    title="Architecture effect on Metric : +45% (DeepLab vs. U-Net) / +24% (DeepLab vs. FCN-8) on average",
)
fig.update_traces(boxmean="sd")
fig.show()


fig = px.box(
    results_df,
    x="Augmentation",
    y="Training duration per epoch",
    color="Architecture",
    title="Augmentation effect on Duration : +2.2% (with vs. without) on average",
)
fig.update_traces(boxmean="sd")
fig.show()


fig = px.box(
    results_df,
    x="Augmentation",
    y="Validation Metric (Mean IoU)",
    color="Architecture",
    title="Augmentation effect on Metric : -13% (with vs. without) on average",
)
fig.update_traces(boxmean="sd")
fig.show()


fig = px.box(
    results_df,
    x="Image resize",
    y="Training duration per epoch",
    color="Architecture",
    title="Resize effect on Duration : +6% (resize=X vs. resize=X/2) on average",
)
fig.update_xaxes(type='category')
fig.update_traces(boxmean="sd")
fig.show()


fig = px.box(
    results_df,
    x="Image resize",
    y="Validation Metric (Mean IoU)",
    color="Architecture",
    title="Resize effect on Metric : +56% (resize=X vs. resize=X/2) on average",
)
fig.update_xaxes(type='category')
fig.update_traces(boxmean="sd")
fig.show()


# Get the model
model_name = "deeplab_v3plus_256"
model_input_size = int(model_name.replace("_augment", "").split("_")[-1])
img_size = (model_input_size, model_input_size)


with open(val_input_img_paths[rand_idx], "rb") as f:
    im_read = f.read()
    im_b64 = base64.b64encode(im_read)
    im_b64_str = im_b64.decode("utf-8")

    im_binary = base64.b64decode(im_b64)
    im_bytes = BytesIO(im_binary)
    input_img = Image.open(im_bytes).resize(img_size)

    # Local model prediction
    model_path = Path("./../results/downlad/", model_name)
    if not Path(model_path, "model").exists():
        Model(
            Workspace(
                subscription_id=AZURE_SUBSCRIPTION_ID,
                resource_group=AZURE_RESOURCE_GROUP,
                workspace_name=AZURE_WORKSPACE_NAME,
            ),
            model_name,
        ).download(target_dir=model_path)

    model = tf.keras.models.load_model(
        Path(model_path, "model/data/model"),
        custom_objects={
            "UpdatedMeanIoU": cityscapes.UpdatedMeanIoU,
            "jaccard_loss": cityscapes.jaccard_loss,
        },
    )

    start = time.time()
    output_img = model.predict(np.expand_dims(input_img, 0))

    local_predict_time = time.time() - start
    local_output_mask = cityscapes.cityscapes_category_ids_to_category_colors(
        np.squeeze(
            np.argmax(model.predict(np.expand_dims(input_img, 0)), axis=-1)
        )
    )

    # AzureML registered model Endpoint prediction
    inference_config = dotenv_values(Path(f"../.env.{model_name}"))
    url = inference_config["AZURE_INFERENCE_ENDPOINT"]
    headers = {
        "Content-Type": "application/json",
        "Authorization": ("Bearer " + inference_config["AZURE_INFERENCE_KEY"]),
    }
    data = {"image": im_b64_str}
    payload = str.encode(json.dumps(data))
    r = requests.post(url, headers=headers, data=payload)
    result = r.content

    endpoint_predict_time = r.elapsed.total_seconds()
    endpoint_output_mask = Image.open(BytesIO(base64.b64decode(result)))

    # Flask WebApp prediction
    url = "https://ocp8-webapp.azurewebsites.net/api"
    headers = {
        "Content-Type": "application/json",
    }
    data = {"image_id": image_id}
    r = requests.get(url, headers=headers, params=data)
    predict_time = r.elapsed.total_seconds()
    result = json.loads(r.content)

    webapp_predict_time = r.elapsed.total_seconds()
    webapp_output_mask = Image.open(
        BytesIO(base64.b64decode(result["categories_img_b64_str"]))
    )

    # plot images
    fig, ax = plt.subplots(
        nrows=2,
        ncols=3,
        figsize=(18, 12),
    )
    fig.suptitle("Prediction time comparisons", fontsize=16)

    ax[0, 0].title.set_text("Input image : {}".format(image_id))
    val_img = tf.keras.utils.load_img(
        val_input_img_paths[rand_idx], target_size=img_size
    )
    ax[0, 0].imshow(val_img)

    ax[0, 1].title.set_text("Original label colors")
    val_colors = tf.keras.utils.load_img(
        val_label_colors_img_paths[rand_idx], target_size=img_size
    )
    ax[0, 1].imshow(val_colors)

    ax[0, 2].title.set_text("Original category colors")
    val_colors = tf.keras.utils.load_img(
        val_label_colors_img_paths[rand_idx], target_size=img_size
    )
    ax[0, 2].imshow(val_colors)

    ax[1, 0].title.set_text(
        "Local prediction : {} s.".format(round(local_predict_time, 3))
    )
    ax[1, 0].imshow(local_output_mask)

    ax[1, 1].title.set_text(
        "Endpoint prediction : {} s.".format(round(endpoint_predict_time, 3))
    )
    ax[1, 1].imshow(endpoint_output_mask)

    ax[1, 2].title.set_text(
        "Webapp prediction : {} s.".format(round(webapp_predict_time, 3))
    )
    ax[1, 2].imshow(webapp_output_mask)

    plt.show()

Model name	FCN-8	U-Net	Deeplab v3+
Total params	69,775,768	2,060,424	2,143,304
Trainable params	69,773,848	2,056,648	2,110,216
Non-trainable params	1,920	3,776	33,088
Architecture

Loss	Metric	Intersection vs. Union	Confusion Matrix	Pros	Cons
Sparse Categorical Cross-entropy	Pixel Accuracy	$ - $	$ \frac{TP + TN}{TP + FP + TN + FN} $	Easy to interpret	Bad with imbalanced target classes.
Dice	F1	$ \frac{2 \lvert\lvert A \cap B \rvert\rvert}{\lvert\lvert A \rvert\rvert + \lvert\lvert B \rvert\rvert} $	$ \frac{2 TP}{2 TP + FP + FN} $	Good with imbalanced target classes.	Not easy to interpret.
Jaccard	Intersection over Union (IoU)	$ \frac{\lvert\lvert A \cap B \rvert\rvert}{\lvert\lvert A \cup B \rvert\rvert} $	$ \frac{TP}{TP + FP + FN} $	Easy to interpret. Good with imbalanced target classes.

	Architecture	Augmentation	Image resize	Validation Loss (Jaccard index)	Validation Metric (Mean IoU)	Training duration per epoch
0	fcn_8	True	512	0.340	0.398	341.76
1	fcn_8	False	512	0.247	0.545	329.58
2	deeplab_v3plu	True	512	0.201	0.645	331.00
3	deeplab_v3plu	False	512	0.245	0.600	356.00
4	unet_xception	True	512	0.281	0.515	349.00
5	unet_xception	False	512	0.239	0.553	337.11
6	fcn_8	True	256	0.603	0.304	337.22
7	fcn_8	False	256	0.271	0.559	325.20
8	deeplab_v3plu	True	256	0.261	0.559	295.52
9	deeplab_v3plu	False	256	0.241	0.630	297.65
10	unet_xception	True	256	0.430	0.446	313.85
11	unet_xception	False	256	0.270	0.561	325.00
12	fcn_8	True	128	0.385	0.471	293.14
13	fcn_8	False	128	0.323	0.520	323.60
14	deeplab_v3plu	True	128	0.408	0.454	352.57
15	deeplab_v3plu	False	128	0.329	0.519	270.68
16	unet_xception	True	128	0.382	0.481	295.13
17	unet_xception	False	128	0.328	0.516	300.64
18	fcn_8	True	64	0.765	0.198	307.92
19	fcn_8	False	64	0.406	0.464	286.76
20	deeplab_v3plu	True	64	0.576	0.340	350.33
21	deeplab_v3plu	False	64	0.754	0.242	387.40
22	unet_xception	True	64	0.996	0.113	460.17
23	unet_xception	False	64	0.954	0.123	420.17

Future Vision Transport : Design an Autonomous Vehicle¶

Context¶

State of the art¶

Deep Neural Network (DNN) models¶

Results on the Cityscapes Dataset¶

Project modules¶

Exploratory data analysis (EDA)¶

Load data¶

Models selection and training¶

Selected models architectures¶

Loss and metric¶

Training process¶

Training results¶

Model deployment and testing¶

Deployment as AzureML Endpoint¶

Test the Endpoints¶

Deployment as Flask webapp¶

Prediction time comparison¶

Conclusion¶