Download the data

Download instructions available on the jbrownlee's Datasets repo.

Expert judgements

Load and check the expert judgments

from pandas import read_csv

experts = read_csv(
    "flicker8k/ExpertAnnotations.txt", 
    sep = "\t", 
    header=None, 
    names=["image_file_name", "caption_id", "expert_1", "expert_2", "expert_3"]
)
experts.head()
image_file_name caption_id expert_1 expert_2 expert_3
0 1056338697_4f7d7ce270.jpg 2549968784_39bfbe44f9.jpg#2 1 1 1
1 1056338697_4f7d7ce270.jpg 2718495608_d8533e3ac5.jpg#2 1 1 2
2 1056338697_4f7d7ce270.jpg 3181701312_70a379ab6e.jpg#2 1 1 2
3 1056338697_4f7d7ce270.jpg 3207358897_bfa61fa3c6.jpg#2 1 2 2
4 1056338697_4f7d7ce270.jpg 3286822339_5535af6b93.jpg#2 1 1 2
experts.shape
(5822, 5)

Check cases where all experts agrees

experts_agreement_bool = experts.apply(
    lambda x: x["expert_1"] == x["expert_2"] and x["expert_2"] == x["expert_3"], 
    axis=1
)
experts_agreement = experts[experts_agreement_bool][["image_file_name", "caption_id", "expert_1"]].rename(columns={"expert_1":"expert"})
experts_agreement.head()
image_file_name caption_id expert
0 1056338697_4f7d7ce270.jpg 2549968784_39bfbe44f9.jpg#2 1
5 1056338697_4f7d7ce270.jpg 3360930596_1e75164ce6.jpg#2 1
6 1056338697_4f7d7ce270.jpg 3545652636_0746537307.jpg#2 1
8 106490881_5a2dd9b7bd.jpg 1425069308_488e5fcf9d.jpg#2 1
9 106490881_5a2dd9b7bd.jpg 1714316707_8bbaa2a2ba.jpg#2 2
experts_agreement["expert"].value_counts().sort_index()
1    2350
2     580
3     214
4     247
Name: expert, dtype: int64

Load images and captions

import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from textwrap import wrap
def load_image(file_name, relative_folder):
    return mpimg.imread(os.path.join(relative_folder, file_name))
def get_caption(caption_id, captions):
    return captions[captions["caption_id"] == caption_id]["caption"].values[0]
def plot_images_and_captions(image_names, caption_ids, relative_image_folder, wrap_value = 50):
    assert len(image_names) == len(caption_ids) == 4, "Number of images and captions should be equal to 4"
    fig = plt.figure(figsize=(10, 10))
    for idx, (image_file_name, caption_id) in enumerate(zip(image_names, caption_ids)):
        sub = fig.add_subplot(2,2,idx+1)
        imgplot = plt.imshow(load_image(image_file_name, relative_folder=relative_image_folder))
        sub.set_title("\n".join(wrap(get_caption(caption_id, captions), wrap_value)))
    plt.tight_layout()
captions = read_csv("flicker8k/Flickr8k.token.txt", sep="\t", header=None, names=["caption_id", "caption"])
captions.head()
caption_id caption
0 1000268201_693b08cb0e.jpg#0 A child in a pink dress is climbing up a set o...
1 1000268201_693b08cb0e.jpg#1 A girl going into a wooden building .
2 1000268201_693b08cb0e.jpg#2 A little girl climbing into a wooden playhouse .
3 1000268201_693b08cb0e.jpg#3 A little girl climbing the stairs to her playh...
4 1000268201_693b08cb0e.jpg#4 A little girl in a pink dress going into a woo...

Sample images

Relevant sample (score = 4)

relevant_pairs = experts_agreement[experts_agreement["expert"] == 4].sample(4, random_state=675)[["image_file_name", "caption_id"]]
relevant_pairs
image_file_name caption_id
1869 2610447973_89227ff978.jpg 2610447973_89227ff978.jpg#2
2906 3071676551_a65741e372.jpg 3071676551_a65741e372.jpg#2
3917 3365783912_e12c3510d8.jpg 3365783912_e12c3510d8.jpg#2
4669 3589367895_5d3729e3ea.jpg 3589367895_5d3729e3ea.jpg#2
relevant_image_names = list(relevant_pairs["image_file_name"])
relevant_caption_ids = list(relevant_pairs["caption_id"])
plot_images_and_captions(
    image_names=relevant_image_names, 
    caption_ids=relevant_caption_ids, 
    relative_image_folder="flicker8k/Flicker8k_Dataset/", 
    wrap_value=50
)

Irrelevant sample (score = 1)

irrelevant_pairs = experts_agreement[experts_agreement["expert"] == 1].sample(4, random_state=675)[["image_file_name", "caption_id"]]
irrelevant_pairs
image_file_name caption_id
4184 3461041826_0e24cdf597.jpg 150387174_24825cf871.jpg#2
3833 3349451628_4249a21c8f.jpg 1764955991_5e53a28c87.jpg#2
377 1446053356_a924b4893f.jpg 2542662402_d781dd7f7c.jpg#2
5274 468102269_135938e209.jpg 166507476_9be5b9852a.jpg#2
irrelevant_image_names = list(irrelevant_pairs["image_file_name"])
irrelevant_caption_ids = list(irrelevant_pairs["caption_id"])
plot_images_and_captions(
    image_names=irrelevant_image_names, 
    caption_ids=irrelevant_caption_ids, 
    relative_image_folder="flicker8k/Flicker8k_Dataset/", 
    wrap_value=50
)

Score = 2 (Caption describes minor aspects of the image)

pairs_score_2 = experts_agreement[experts_agreement["expert"] == 2].sample(4, random_state=675)[["image_file_name", "caption_id"]]
image_names_score_2 = list(pairs_score_2["image_file_name"])
caption_ids_score_2 = list(pairs_score_2["caption_id"])
plot_images_and_captions(
    image_names=image_names_score_2, 
    caption_ids=caption_ids_score_2, 
    relative_image_folder="flicker8k/Flicker8k_Dataset/", 
    wrap_value=50
)

Score = 3 (Caption almost describes the images with minor mistakes)

pairs_score_3 = experts_agreement[experts_agreement["expert"] == 3].sample(4, random_state=675)[["image_file_name", "caption_id"]]
image_names_score_3 = list(pairs_score_3["image_file_name"])
caption_ids_score_3 = list(pairs_score_3["caption_id"])
plot_images_and_captions(
    image_names=image_names_score_3, 
    caption_ids=caption_ids_score_3, 
    relative_image_folder="flicker8k/Flicker8k_Dataset/", 
    wrap_value=50
)