AI 이론/딥러닝 이론

데이터분리하여 이미지증강 코딩

jasonshin 2021. 12. 6. 18:00
Cats-v-dogs dataset 으로, 데이터를 분리하고, 이미지 증강을 활용하요, 인공지능 학습시킨다.
# /tmp 디렉토리 안에다, 학습을 위한 데이터를 분류하기 위해
# cats-v-dogs 디렉토리를 만들고, 
# 그 아래 training 과 testing 디렉토리 만든 후
# 각각 디렉토리 안에  cats 와 dogs 디렉토리를 만든다.
try:
    #YOUR CODE GOES HERE
    os.mkdir('/tmp/cats-v-dogs')

    os.mkdir('/tmp/cats-v-dogs/training')
    os.mkdir('/tmp/cats-v-dogs/testing')

    os.mkdir('/tmp/cats-v-dogs/training/cats')
    os.mkdir('/tmp/cats-v-dogs/training/dogs')

    os.mkdir('/tmp/cats-v-dogs/testing/cats')
    os.mkdir('/tmp/cats-v-dogs/testing/dogs')
##  split_data 라는 파이썬 함수를 만든다.

# SOURCE directory 
# TRAINING directory 
# TESTING directory 
# SPLIT SIZE 

# 소스 경로에 있는 파일들을 잘 섞은 후
# 스플릿 사이즈만큼 학습용과 테스트용 파일로 분리한 후
# 트레이닝 디렉토리와 테스팅 디렉토리에 파일을 저장 시켜 주는 함수 작성 (파일 크기가 0보다 큰 파일만 저장)

# 힌트.
# os.listdir(DIRECTORY) gives you a listing of the contents of that directory
# os.path.getsize(PATH) gives you the size of the file
# copyfile(source, destination) copies a file from source to destination
# random.sample(list, len(list)) shuffles a list




def split_data(SOURCETRAININGTESTINGSPLIT_SIZE):
# YOUR CODE STARTS HERE
    # 1. 파일을 잘 섞어준다. 
    file_names = os.listdir(SOURCE)
    shuffled_names = random.sample(file_names, len(file_names))

    # 2. 트레이닝셋과 테스트셋으로 나눈다. (비율 계산)
    # 나누기 위한 기준이 되는 인덱스를 먼저 계산한다. 
    index = int(len(shuffled_names) * SPLIT_SIZE)

    training_names = shuffled_names[ 0 : index ]
    testing_names = shuffled_names[index : ]

    # 3. 원본 경로에 있는 파일을, 트레이닝과 테스팅 경로로 복사해준다. 
    for file_name in training_names :
        if os.path.getsize(SOURCE + file_name) > 0 : 
            copyfile(SOURCE + file_name, TRAINING + file_name )

    for file_name in testing_names :
        if os.path.getsize(SOURCE + file_name) > 0 : 
            copyfile(SOURCE + file_name, TESTING + file_name )





# YOUR CODE ENDS HERE


CAT_SOURCE_DIR = "/tmp/PetImages/Cat/"
TRAINING_CATS_DIR = "/tmp/cats-v-dogs/training/cats/"
TESTING_CATS_DIR = "/tmp/cats-v-dogs/testing/cats/"
DOG_SOURCE_DIR = "/tmp/PetImages/Dog/"
TRAINING_DOGS_DIR = "/tmp/cats-v-dogs/training/dogs/"
TESTING_DOGS_DIR = "/tmp/cats-v-dogs/testing/dogs/"

split_size = .9
split_data(CAT_SOURCE_DIR, TRAINING_CATS_DIR, TESTING_CATS_DIR, split_size)
split_data(DOG_SOURCE_DIR, TRAINING_DOGS_DIR, TESTING_DOGS_DIR, split_size)

# Expected output
# 666.jpg is zero length, so ignoring
# 11702.jpg is zero length, so ignoring
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense
# 적어도 3개의 CONVOLUTION LAYERS를 포함한 모델을 만드시오
model = tf.keras.models.Sequential([
# YOUR CODE HERE
    Conv2D(32, (3,3), activation='relu', input_shape = (3003003)), 
    MaxPool2D((2,2), 2), 
    Conv2D(64, (3,3), activation='relu'),
    MaxPool2D((2,2), 2), 
    Conv2D(128, (3,3), activation='relu'),
    MaxPool2D((2,2), 2), 

    Flatten(), 
    Dense(512, activation='relu'),
    Dense(1, activation='sigmoid')

])

model.compile(optimizer=RMSprop(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
 
# 이미지 증강도 하여, 코드를 작성하시오.


TRAINING_DIR = '/tmp/cats-v-dogs/training'
train_datagen = ImageDataGenerator(rescale=1/255.0, rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, horizontal_flip=True
                                   fill_mode = 'nearest')
train_generator = train_datagen.flow_from_directory(TRAINING_DIR, target_size = (300300), class_mode = 'binary')

VALIDATION_DIR = '/tmp/cats-v-dogs/testing'
validation_datagen = ImageDataGenerator(rescale=1/255.0)
validation_generator = validation_datagen.flow_from_directory(VALIDATION_DIR, target_size = (300300), class_mode = 'binary')



# Expected Output:
# Found 22498 images belonging to 2 classes.
# Found 2500 images belonging to 2 classes.

Found 22496 images belonging to 2 classes.

Found 2502 images belonging to 2 classes.

 

# 학습합니다.

history = model.fit(train_generator,
                              epochs=10,
                              verbose=1,
                              validation_data=validation_generator)

 

반응형