Skip to content

Latest commit

Β 

History

History
108 lines (87 loc) Β· 4.89 KB

pytorch_vision_deeplabv3_resnet101.md

File metadata and controls

108 lines (87 loc) Β· 4.89 KB
layout background-class body-class title summary category image author tags github-link github-id featured_image_1 featured_image_2 accelerator demo-model-link order
hub_detail
hub-background
hub
Deeplabv3
DeepLabV3 models with ResNet-50, ResNet-101 and MobileNet-V3 backbones
researchers
deeplab2.png
Pytorch Team
vision
scriptable
pytorch/vision
deeplab1.png
deeplab2.png
cuda-optional
1
import torch
model = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_resnet50', pretrained=True)
# λ˜λŠ” μ•„λž˜ 쀑 ν•˜λ‚˜
# model = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_resnet101', pretrained=True)
# model = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_mobilenet_v3_large', pretrained=True)
model.eval()

사전 ν›ˆλ ¨λœ λͺ¨λ“  λͺ¨λΈλ“€μ€ λ™μΌν•œ λ°©μ‹μœΌλ‘œ μ •κ·œν™”λœ μž…λ ₯ 이미지λ₯Ό κΈ°λŒ€ν•©λ‹ˆλ‹€. 즉, (N, 3, H, W) λͺ¨μ–‘μ˜ 3채널 RGB μ΄λ―Έμ§€μ˜ λ―Έλ‹ˆ 배치, μ—¬κΈ°μ„œ N 은 μ΄λ―Έμ§€μ˜ 개수, H 와 W은 각각 μ΅œμ†Œ 224 ν”½μ…€λ“€λ‘œ 이루어진 κ²ƒμœΌλ‘œ κΈ°λŒ€ν•©λ‹ˆλ‹€. μ΄λ―Έμ§€λŠ” [0, 1] λ²”μœ„λ‘œ λ‘œλ“œν•œ λ‹€μŒ mean = [0.485, 0.456, 0.406] κ³Ό std = [0.229, 0.224, 0.225] λ₯Ό μ‚¬μš©ν•˜μ—¬ μ •κ·œν™”λ₯Ό μ§„ν–‰ν•©λ‹ˆλ‹€.

λͺ¨λΈμ€ μž…λ ₯ Tensor와 높이와 λ„ˆλΉ„κ°€ κ°™μ§€λ§Œ 21개의 ν΄λž˜μŠ€κ°€ μžˆλŠ” 두 개의 ν…μ„œκ°€ μžˆλŠ” OrderedDictλ₯Ό λ°˜ν™˜ν•©λ‹ˆλ‹€. output['out'] 의미둠적 마슀크λ₯Ό ν¬ν•¨ν•˜κ³  있고, output['aux']μ—λŠ” ν”½μ…€ λ‹Ή 보쑰 손싀(auxiliary loss) 값을 ν¬ν•¨ν•˜κ³  μžˆμŠ΅λ‹ˆλ‹€. μΆ”λ‘  λͺ¨λ“œμ—μ„œλŠ”, output['aux']λŠ” μœ μš©ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€. λ”°λΌμ„œ, output['out']은 (N, 21, H, W)κ³Ό 같은 λͺ¨μ–‘을 κ°€μ§‘λ‹ˆλ‹€. μ’€ 더 μžμ„Έν•œ μ •λ³΄λŠ” μ΄κ³³μ—μ„œ 확인할 수 μžˆμŠ΅λ‹ˆλ‹€.

# νŒŒμ΄ν† μΉ˜ μ›Ήμ‚¬μ΄νŠΈμ—μ„œ μ˜ˆμ‹œ 이미지λ₯Ό λ‹€μš΄λ‘œλ“œν•©λ‹ˆλ‹€.
import urllib
url, filename = ("https://github.com/pytorch/hub/raw/master/images/deeplab1.png", "deeplab1.png")
try: urllib.URLopener().retrieve(url, filename)
except: urllib.request.urlretrieve(url, filename)
# μƒ˜ν”Œμ„ μ‹€ν–‰ν•©λ‹ˆλ‹€. (torchvision이 ν•„μš”ν•©λ‹ˆλ‹€.)
from PIL import Image
from torchvision import transforms
input_image = Image.open(filename)
input_image = input_image.convert("RGB")
preprocess = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0) # λͺ¨λΈμ΄ μ›ν•˜λŠ” λ―Έλ‹ˆ 배치λ₯Ό λ§Œλ“­λ‹ˆλ‹€.

# κ°€λŠ₯ν•œ 경우 속도λ₯Ό λΉ λ₯΄κ²Œ ν•˜κΈ° μœ„ν•΄ μž…λ ₯ 및 λͺ¨λΈμ„ GPU둜 μ΄λ™ν•©λ‹ˆλ‹€.
if torch.cuda.is_available():
    input_batch = input_batch.to('cuda')
    model.to('cuda')

with torch.no_grad():
    output = model(input_batch)['out'][0]
output_predictions = output.argmax(0)

μ—¬κΈ°μ„œ 좜λ ₯은 (21, H, W) ν˜•νƒœμ΄λ©°, 각 μœ„μΉ˜μ—μ„œλŠ” ν΄λž˜μŠ€λ§ˆλ‹€ μ˜ˆμΈ‘μ— ν•΄λ‹Ήν•˜λŠ” μ •κ·œν™”λ˜μ§€ μ•Šμ€ ν™•λ₯ μ΄ μžˆμŠ΅λ‹ˆλ‹€. 각 클래슀의 μ΅œλŒ€ μ˜ˆμΈ‘κ°’μ„ 얻은 λ‹€μŒ λ‹€μš΄μŠ€νŠΈλ¦Ό μž‘μ—…μ— μ‚¬μš©ν•˜λ €λ©΄, output_predictions = output.argmax(0)λ₯Ό μˆ˜ν–‰ν•©λ‹ˆλ‹€.

λ‹€μŒμ€ 각각 ν΄λž˜μŠ€λ§ˆλ‹€ 색상이 ν• λ‹Ήλœ μ˜ˆμΈ‘μ„ λ‚˜νƒ€λ‚΄λŠ” μž‘μ€ μ‘°κ°μž…λ‹ˆλ‹€.

# 색상 νŒ”λ ˆνŠΈλ₯Ό λ§Œλ“€κ³  각 클래슀의 색상을 μ„ νƒν•©λ‹ˆλ‹€.
palette = torch.tensor([2 ** 25 - 1, 2 ** 15 - 1, 2 ** 21 - 1])
colors = torch.as_tensor([i for i in range(21)])[:, None] * palette
colors = (colors % 255).numpy().astype("uint8")

# 각 μƒ‰μƒμ—μ„œ 21개 클래슀의 의미둠적 λΆ„ν•  μ˜ˆμΈ‘μ„ ν”Œλ‘œνŒ…ν•©λ‹ˆλ‹€.
r = Image.fromarray(output_predictions.byte().cpu().numpy()).resize(input_image.size)
r.putpalette(colors)

import matplotlib.pyplot as plt
plt.imshow(r)
# plt.show()

λͺ¨λΈ μ„€λͺ…

Deeplabv3-ResNet은 ResNet-50 λ˜λŠ” ResNet-101 백본이 μžˆλŠ” Deeplabv3 λͺ¨λΈλ‘œ κ΅¬μ„±λ˜μ–΄ μžˆμŠ΅λ‹ˆλ‹€. Deeplabv3-MobileNetV3-LargeλŠ” MobileNetV3 large 백본이 μžˆλŠ” DeepLabv3 λͺ¨λΈλ‘œ κ΅¬μ„±λ˜μ–΄ μžˆμŠ΅λ‹ˆλ‹€. 사전 ν›ˆλ ¨λœ λͺ¨λΈμ€ Pascal VOC 데이터 μ„ΈνŠΈμ— μžˆλŠ” 20개 μΉ΄ν…Œκ³ λ¦¬μ— λŒ€ν•΄ COCO train2017의 일뢀뢄 데이터 셋에 λŒ€ν•΄ ν›ˆλ ¨λ˜μ—ˆμŠ΅λ‹ˆλ‹€.

COCO val2017 데이터 μ…‹μ—μ„œ ν‰κ°€λœ 사전 ν›ˆλ ¨λœ λͺ¨λΈμ˜ μ •ν™•λ„λŠ” λ‹€μŒκ³Ό κ°™μŠ΅λ‹ˆλ‹€.

Model structure Mean IOU Global Pixelwise Accuracy
deeplabv3_resnet50 66.4 92.4
deeplabv3_resnet101 67.4 92.4
deeplabv3_mobilenet_v3_large 60.3 91.2

μ°Έμ‘°