AI 이론/딥러닝 이론

[이미지 딥러닝 모델링] zipfile, optimizers, ImageDataGenerator

jasonshin 2021. 12. 2. 17:36

정리 1. 

import tensorflow as tf
import os
import zipfile
 
구글코랩에서 인터넷에 있는 파일을 다운로드 하는 방법
!wget --no-check-certificate \
    -O "/tmp/happy-or-sad.zip"
 
 
파이썬으로 압축파일 푸는 방법 
 
import os
import zipfile
filename = '/tmp/happy-or-sad.zip'
zip_ref = zipfile.ZipFile(filename, 'r')
zip_ref.extractall('/tmp/happy-or-sad')
zip_ref.close()
 
 
train_happy_dir = '/tmp/happy-or-sad/happy'
train_sad_dir = '/tmp/happy-or-sad/sad'
train_happy_names = os.listdir(train_happy_dir)
train_sad_names = os.listdir(train_sad_dir)
 
## 콜백 함수를 만든다.
class myCallback(tf.keras.callbacks.Callback):
  # Your Code
    def on_epoch_end(selfepochlogs={}) :
        if logs['accuracy'] >= 0.999 :
          print('\nReached 99% accuracy so cancelling training!')
          self.model.stop_training = True
callbacks = myCallback()
 
 
jpg, png와 같은 이미지 파일을 학습 데이터로 만든 방법 (텐서플로우의 ImageDataGenerator)
 
## 모델링을 하고, 컴파일 코드를 작성한다.

import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Flatten, Conv2D, MaxPool2D
# name에러가 뜨면 import안했구나 생각하면 됨.
model = tf.keras.models.Sequential([                        
(Conv2D(filters=16, kernel_size=(3,3), activation='relu', input_shape=(300,3003) )),
(MaxPool2D(pool_size=(2,2), strides=2)),
    
(Conv2D(filters=32, kernel_size=(3,3), activation='relu')),
(MaxPool2D(pool_size=(2,2), strides=2)),

(Conv2D(filters=64, kernel_size=(3,3), activation='relu')),
(MaxPool2D(pool_size=(2,2), strides=2)),

(Flatten()),

(Dense(units=512, activation='relu')),

(Dense(units=1, activation='sigmoid'))])
 

from tensorflow.keras.optimizers import RMSprop

model.compile(loss='binary_crossentropy',
              optimizer=RMSprop(learning_rate =0.001),
              metrics=['accuracy'])

 

from tensorflow.keras.preprocessing.image import ImageDataGenerator


train_datagen = ImageDataGenerator(rescale= 1/255.0)

train_generator = train_datagen.flow_from_directory(
        '/tmp/happy-or-sad', target_size=(300300), class_mode='binary' )
history = model.fit(train_generator, epochs=15, callbacks=callbacks)

정리 2.

(모델링 함수이용)

압축 풀기

import os
import zipfile
filename = '/tmp/cats_and_dogs_filtered.zip'
zip_ref = zipfile.ZipFile(filename, 'r')
zip_ref.extractall('/tmp/cats_and_dogs_filtered')
zip_ref.close()

 

데이터 억세스할 경로를 만든다.

train_cats_dir = '/tmp/cats_and_dogs_filtered/cats_and_dogs_filtered/train/cats'
 
validation_cats_dir = '/tmp/cats_and_dogs_filtered/cats_and_dogs_filtered/validation/cats'

파일명을 찍어본다.

train_cats_names = os.listdir(train_cats_dir)
train_dogs_names = os.listdir(train_dogs_dir)
validation_cats_names = os.listdir(validation_cats_dir)
validation_dogs_names = os.listdir(validation_dogs_dir)

 

개와 고양이 8개씩 화면에 이미지 표시해보자

%matplotlib inline

 

import matplotlib.image as mpimg
import matplotlib.pyplot as plt

 

# Parameters for our graph; we'll output images in a 4x4 configuration
nrows = 4
ncols = 4

 

pic_index = 0 # Index for iterating over images
 
# Set up matplotlib fig, and size it to fit 4x4 pics
fig = plt.gcf()
fig.set_size_inches(ncols*4, nrows*4)

pic_index+=8

next_cat_pix = [os.path.join(train_cats_dir, fname) 
                for fname in train_cats_names[ pic_index-8:pic_index] 
               ]

next_dog_pix = [os.path.join(train_dogs_dir, fname) 
                for fname in train_dogs_names[ pic_index-8:pic_index]
               ]

for i, img_path in enumerate(next_cat_pix+next_dog_pix):
  # Set up subplot; subplot indices start at 1
  sp = plt.subplot(nrows, ncols, i + 1)
  sp.axis('Off'# Don't show axes (or gridlines)

  img = mpimg.imread(img_path)
  plt.imshow(img)

plt.show()

Building a Small Model from Scratch to Get to ~72% Accuracy

이미지의 사이즈를 150x150, 칼라(rgb) 로 처리하자.

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense
 
def build_model(): 
    model = Sequential()
    model.add(Conv2D(filters=16, kernel_size=(3,3), activation='relu', input_shape=(300,3003) ))
    model.add(MaxPool2D(pool_size=(2,2), strides=2))
    
    model.add(Conv2D(filters=32, kernel_size=(3,3), activation='relu'))
    model.add(MaxPool2D(pool_size=(2,2), strides=2))

    model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu'))
    model.add(MaxPool2D(pool_size=(2,2), strides=2))

    model.add(Flatten())

    model.add(Dense(units=512, activation='relu'))

    model.add(Dense(units=1, activation='sigmoid'))

    return model
 
model = build_model()

 

서머리 해보자

model.summary()

 

RMSprop 으로 컴파일한다.

from tensorflow.keras.optimizers import RMSprop, Adam, Adagrad, Adadelta

model.compile(optimizer=RMSprop(learning_rate =0.001), loss='binary_crossentropy', metrics=['accuracy'])
 
 

Data Preprocessing

# 파일로 되어있는 이미지를 학습을 통해서 넘파이로 바꿔줘야 한다.
# 실제로 복잡한 작업ㅇ을 해야 하는 것을, 텐서플로우에서 쉽게 처리 할 수 있게 라이브러리를 제공한다.

# ImageDataGenerator

ImageDataGenerator 사용하기

from tensorflow.keras.preprocessing.image import ImageDataGenerator
# 파일로 되어 있는 이미지의 피쳐 스케일링을 한다. ==> 255.0으로 나누는 것!
 
train_datagen = ImageDataGenerator(rescale= 1/255.0)
validation_datagen = ImageDataGenerator(rescale= 1/255.0)
 
# 파일이 들어있는 디렉토리를 알려주고, 이미지 사이즈 정보도 알려주고, 분류할 정보도 알려준다. 

# target_size 파라미터는 우리가 마음대로 정해줄 수 있다. 단, 모델의 input_shape과 동일해야 한다.

# class_mode의 2개 분류는 binary, 3개 분류는 categorical로 설정
train_generator = train_datagen.flow_from_directory('/tmp/cats_and_dogs_filtered/cats_and_dogs_filtered/train', target_size=(300300), class_mode='binary')
# train_generator 에는 X_train, y_train이 들어있게 한다.
# y_train의 값은 폴더의 이름으로 설정된다. 
# 따라서 폴더의 이름을 알파벳순으로 정렬한 후, 0부터 차례로 숫자를 매긴다. 
validation_generator = validation_datagen.flow_from_directory('/tmp/cats_and_dogs_filtered/cats_and_dogs_filtered/validation', target_size=(300300), class_mode='binary')
 
# 전체 데이터수 = batch_size * steps_per_epoch 와 같다.
# 1000          = 20         * 50
 
epoch_history = model.fit(train_generator, epochs=15, steps_per_epoch= 8, validation_data= validation_generator)

Running the Model

픽사베이에서 무료 사진 다운로드 받아서, 실제로 예측해 보자

import numpy as np

from google.colab import files
from keras.preprocessing import image

uploaded=files.upload()

for fn in uploaded.keys():
 
  # predicting images
  path='/content/' + fn
  img=image.load_img(path, target_size=(300300))
  
  x=image.img_to_array(img)
  x=np.expand_dims(x, axis=0)
  images = np.vstack([x])
  
  classes = model.predict(images, batch_size=10)
  
  print(classes[0])
  
  if classes[0]>0.5 :
    print(fn + " is a dog")
    
  else:
    print(fn + " is a cat")

Evaluating Accuracy and Loss for the Model

training/validation accuracy 와 loss 를 차트로 시각화 한다.

model.evaluate(validation_generator)
plt.plot(epoch_history.history['accuracy'])
plt.plot(epoch_history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(['Train''Validation'])
plt.show()

 

plt.plot(epoch_history.history['loss'])
plt.plot(epoch_history.history['val_loss'])
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('loss')
plt.legend(['Train''Validation'])
plt.show()
반응형