## Imports
import pandas as pd
import numpy as np
import os
from os import listdir
from os.path import isfile, join
from fastai import *
from fastai.vision import *
# function I will use to create folders for my classes in directory
def createFolder(directory):
try:
if not os.path.exists(directory):
os.makedirs(directory)
except OSError:
print ('Error: Creating directory. ' + directory)
warnings.filterwarnings("ignore")
# this list contains a set of lists containing the name of the folder
# and the name of the text file that will contain the path of the images to download.
folder_paths_list = [['classic', 'classic_art.txt'],['contemporary', 'contemporary_art.txt'],
['medieval','medieval_art.txt'], ['modern','modern_art.txt'],
['renaissance', 'renaissance_art.txt'], ['romanticism','romanticism_art.txt']]
# create folders in the data folder. each folder will contain the pictures that belong to that category.
for i in folder_paths_list:
createFolder('./data/art/'+i[0])
Get images url from google images by:
urls = Array.from(document.querySelectorAll('.rg_di .rg_meta')).map(el=>JSON.parse(el.textContent).ou);
window.open('data:text/csv;charset=utf-8,' + escape(urls.join('\n')));
# download pictures in to the folders.
path = Path('data/art')
for i in folder_paths_list:
dest = path/i[0]
dest.mkdir(parents=True, exist_ok=True)
download_images(path/i[0]/i[1], dest, max_pics=200)
# verify images
classes = [i[0] for i in folder_paths_list]
for c in classes:
print(c)
verify_images(path/c, delete=True, max_size=500)
# create data set and split it into training and testing datasets
np.random.seed(42)
data = ImageDataBunch.from_folder(path, train=".", valid_pct=0.2,
ds_tfms=get_transforms(), size=224, num_workers=4).normalize(imagenet_stats)
data.classes
data.show_batch(rows=3, figsize=(7,8))
print("these are my data classes", data.classes)
print('There are in total {} classes'.format(data.c))
print('{} pictures are in my training dataset, {} are in my testing dataset'.format(len(data.train_ds), len(data.valid_ds)))
First, let's create out convolutional neural network using the data we have created and by using resnet34. we also want to print out the error rate at each iteration.
learn = cnn_learner(data, models.resnet34, metrics=error_rate)
Now let's fit the model onto the data and iterate n times
learn.fit_one_cycle(4)
The table shows that the model is wrong ~37% of the times
# save model
learn.save('stage-1')
To improve the model we want to unfreeze the rest of our model and run the learning rate finder
learn.unfreeze()
learn.lr_find()
# plot learning rate
learn.recorder.plot()
learn.fit_one_cycle(5, max_lr=slice(1e-5,1e-3))
The error rate is now slightle better. This tells us that our model is wrong ~33% of the times.
# save model
learn.save('stage-2')
# load the latest model
learn.load('stage-2');
interp = ClassificationInterpretation.from_learner(learn)
losses,idxs = interp.top_losses()
len(data.valid_ds)==len(losses)==len(idxs)
# look at top images that are classfied wrong
interp.plot_top_losses(6, figsize=(15,11))
# plot confusion matrix.
interp.plot_confusion_matrix()
The confusion matrix tells us that the most commor error that our model makes is between modern art and contemporary art.
We can do better if the dataset if less noisy.
# import FastAI widgets
from fastai.widgets import *
# we are going to create a dataset that contains all images together (test + train)
db = (ImageList.from_folder(path)
.split_none()
.label_from_folder()
.transform(get_transforms(), size=224)
.databunch()
)
# create model on new data, load state2 model and check the top losses
learn_cln = cnn_learner(db, models.resnet34, metrics=error_rate)
learn_cln.load('stage-2');
ds, idxs = DatasetFormatter().from_toplosses(learn_cln)
ImageCleaner(ds, idxs, path)
ds, idxs = DatasetFormatter().from_similars(learn_cln)
ImageCleaner(ds, idxs, path, duplicates=True)
Create folder with test pictures
createFolder('./data/test_pics')
# manually add pictures in the test folder that you want to test
path_test = Path('data/test_pics')
# list all of items in the test directory
pictures = [f for f in listdir('./data/test_pics') if isfile(join('./data/test_pics', f))]
pictures
# export model
learn.export()
defaults.device = torch.device('cpu')
Show test pictures that I have selected
img = open_image(path_test/pictures[0])
img
A painting by Piero della Francesca that is entitled: Double Portrait of Battista Sforza, Duchess of Urbino and Federico da Duke of Urbino Montefeltro, It is showing a husband and wife looking at each other over a frame. This is Northern Renaissance art made with oil and wood.
img = open_image(path_test/pictures[1])
img
Pablo Picasso’s 1937 painting “Femme au Béret et à la Robe Quadrillée (Marie-Thérèse Walter)” is considered modern art as modern art period spans from 1860s to the 1970s
for p in pictures:
img = open_image(path_test/p)
learn = load_learner(path)
pred_class,pred_idx,outputs = learn.predict(img)
print('picture {} is classified as {}'.format(pictures.index(p)+1, pred_class))