from os.path import join
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from sklearn.model_selection import train_test_split
from tensorflow.python import keras
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense,Flatten,Conv2D,Dropout,MaxPooling2D,ZeroPadding2D,BatchNormalization
from tensorflow.python.keras.preprocessing.image import load_img,img_to_array
from tensorflow.python.keras.applications.resnet50 import preprocess_input
from keras.optimizers import RMSprop
train_dir = '/Users/apple/Desktop/Tools/Projects/deep learning(imagedata)/train'
test_dir = '/Users/apple/Desktop/Tools/Projects/deep learning(imagedata)/test'
train_path = [join(train_dir,filename) for filename in os.listdir(train_dir)]
test_path = [join(test_dir,filename) for filename in os.listdir(test_dir)]
train_cat = [join(train_dir,filename) for filename in os.listdir(train_dir) if 'cat' in filename]
train_dog = [join(train_dir,filename) for filename in os.listdir(train_dir) if 'dog' in filename]
image_size = 64 # set the image size of 64*64, so CNN would be able to run
def read_and_prep_images(img_paths, img_height=image_size, img_width=image_size):
"""
Covert image data into an array:
each pixel would be replaced by a number based on the level of its shade.
Reshape the image array and standardize it.
"""
imgs = [load_img(img_path, target_size=(img_height, img_width)) for img_path in img_paths]
img_array = np.array([img_to_array(img) for img in imgs])
output = img_array.reshape(img_array.shape[0],image_size,image_size,3)/255
return(output)
train = read_and_prep_images(train_path)
train.max() # the array is already standardized, all the values are between 0 and 1
labels = []
for i in train_path:
if 'dog' in i:
labels.append(1)
else:
labels.append(0)
sns.countplot(labels,palette = 'coolwarm')
train.shape
x = train
y = labels
# plot the first image in the dataset
plt.imshow(train[0])
# check image shape
train[0].shape
input_shape = (64,64,3)
cnn1 = Sequential([
Conv2D(8, kernel_size=(3, 3), activation='relu',padding='same',input_shape=input_shape),
MaxPooling2D(pool_size=(2, 2),strides=2),
Conv2D(16, kernel_size=(5, 5), activation='relu'),
MaxPooling2D(pool_size=(2, 2),strides=2),
Flatten(),
Dense(400, activation='relu'),
Dense(84, activation='relu'),
Dense(1, activation='sigmoid')
])
cnn2 = Sequential([
Conv2D(32, kernel_size=(3, 3), activation='relu',input_shape=input_shape),
MaxPooling2D(pool_size=(2, 2),strides=2),
Conv2D(64, kernel_size=(3, 3), activation='relu'),
MaxPooling2D(pool_size=(2, 2),strides=2),
Conv2D(128, kernel_size=(3, 3), activation='relu'),
MaxPooling2D(pool_size=(2, 2),strides=2),
Flatten(),
Dense(400, activation='relu'),
Dense(84, activation='relu'),
Dense(1, activation='sigmoid')
])
cnn3 = Sequential([
Conv2D(16, kernel_size=(3, 3), activation='relu',padding='same',input_shape=input_shape),
Conv2D(16, kernel_size=(3, 3), activation='relu',padding='same'),
MaxPooling2D(pool_size=(2, 2),strides=2),
Conv2D(32, kernel_size=(3, 3), activation='relu'),
Conv2D(32, kernel_size=(3, 3), activation='relu'),
MaxPooling2D(pool_size=(2, 2),strides=2),
Conv2D(64, kernel_size=(3,3), padding="same", activation='relu'),
Conv2D(64, kernel_size=(3,3), padding="same", activation='relu'),
MaxPooling2D(pool_size=(2, 2)),
Conv2D(128, kernel_size=(3,3), padding="same", activation='relu'),
Conv2D(128, kernel_size=(3,3), padding="same", activation='relu'),
MaxPooling2D(pool_size=(2, 2)),
Flatten(),
Dense(512, activation='relu'),
Dropout(0.5),
Dense(1, activation='sigmoid')
])
cnn3.summary()
def Compile_model(model):
model.compile(loss=keras.losses.binary_crossentropy,
optimizer='RMSprop',
metrics=['accuracy'])
return model
#model1 = Compile_model(cnn1)
#model2 = Compile_model(cnn2)
model3 = Compile_model(cnn3)
model1.fit(x, y,
batch_size=100,
epochs=3,
validation_split = 0.2)
model2.fit(x, y,
batch_size=100,
epochs=10,
validation_split = 0.2)
model3.fit(x, y,
batch_size=100,
epochs=10,
validation_split = 0.2)
model3.save_weights('model3_weights.h5')
model3.save('model3.h5')
test = read_and_prep_images(test_path)
y_test = model3.predict_proba(test)
y_test[0]
test_id = range(1,len(test)+1)
output = pd.DataFrame({'id':test_id,'label':list(y_test)})
output['label'] = output['label'].str.get(0)
output.head()
label = output['label'].astype(float)
output = pd.DataFrame({'id':test_id,'label':label})
output.sort_values(by='label',ascending=False).head()
output = output.set_index('id',drop='True')
output.head()
output.to_csv('outputcatdog.csv')