import torch
import torchvision
import torchvision.models as models
class VGGPerceptualLoss(torch.nn.Module):
def __init__(self, path='/content/drive/MyDrive/VGGPerceptualLoss/vgg16.pth', resize=True):
super(VGGPerceptualLoss, self).__init__()
# load trained model
# load vgg into cuda if available, load into cpu if cuda not available
if torch.cuda.is_available():
self.vgg = models.vgg16().to('cuda')
else:
self.vgg = models.vgg16().to('cpu')
self.vgg.load_state_dict((torch.load(path)))
self.vgg.eval() # モデルを評価モードにする。
# preparation to get feature maps from the middle of the model
blocks = [
self.vgg.features[:4],
self.vgg.features[4:9],
self.vgg.features[9:16],
self.vgg.features[16:23]
]
# No grad to prevent back propagation
for bl in blocks:
for p in bl.parameters():
p.requires_grad = False
# Combine the blocks into ModuleList
self.blocks = torch.nn.ModuleList(blocks)
self.transform = torch.nn.functional.interpolate
self.resize = resize
self.register_buffer("mean", torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1))
self.register_buffer("std", torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1))
def forward(self, input, target, feature_layers=[0, 1, 2, 3], style_layers=[]):
# shape の調整
if input.shape[1] != 3:
input = input.repeat(1, 3, 1, 1) # model output
target = target.repeat(1, 3, 1, 1) # ground truth
# ピクセル値の標準化
input = (input-self.mean) / self.std
target = (target-self.mean) / self.std
# 画像サイズの調整
if self.resize:
input = self.transform(input, mode='bilinear', size=(224, 224), align_corners=False)
target = self.transform(target, mode='bilinear', size=(224, 224), align_corners=False)
loss = 0.0
x = input
y = target
# 順伝搬 (forward propagation)
for i, block in enumerate(self.blocks):
x = block(x)
y = block(y)
if i in feature_layers:
# add loss by using the feature map in the middle of the model
loss += torch.nn.functional.l1_loss(x, y)
if i in style_layers:
# add loss by using the "style layer" calculated from the feature maps
act_x = x.reshape(x.shape[0], x.shape[1], -1)
act_y = y.reshape(y.shape[0], y.shape[1], -1)
gram_x = act_x @ act_x.permute(0, 2, 1)
gram_y = act_y @ act_y.permute(0, 2, 1)
loss += torch.nn.functional.l1_loss(gram_x, gram_y)
return loss
import torchvision.models as models
# モデルをロード
if torch.cuda.is_available():
self.vgg = models.vgg16().to('cuda')
else:
self.vgg = models.vgg16().to('cpu')
# パラメータをロード
self.vgg.load_state_dict((torch.load(path)))
self.vgg.eval() # モデルを評価モードにする。
blocks = [
self.vgg.features[:4],
self.vgg.features[4:9],
self.vgg.features[9:16],
self.vgg.features[16:23]
]
# No grad to prevent back propagation
for bl in blocks:
for p in bl.parameters():
p.requires_grad = False
# Combine the blocks into ModuleList
self.blocks = torch.nn.ModuleList(blocks)
Forwardで順伝搬
ここで、x および y はそれぞれモデルの出力画像と正解画像(前処理済)です。これらを self.blocks に格納された VGG16 の各ブロックに順番に通し、特徴マップを抽出します。
x = block(x) や y = block(y) によって得られるのが特徴マップであり、これらの差を L1損失として加算し、Perceptual Loss を構成します。
# 順伝搬 (forward propagation)
for i, block in enumerate(self.blocks):
x = block(x)
y = block(y)
if i in feature_layers:
# add loss by using the feature map in the middle of the model
loss += torch.nn.functional.l1_loss(x, y)
この処理により、画像の高次特徴がどれだけ似ているかを損失として捉えることができます。
Style Transfer
Perceptual Loss の原論文では、Style Transfer(スタイル変換) という応用技術も提案されています。これは、出力画像の「内容」は保持しつつ、「スタイル(画風)」だけを別の画像に似せるというもので、例として写真をアニメ風や油絵風に変換することができます。
if i in style_layers:
# add loss by using the "style layer" calculated from the feature maps
act_x = x.reshape(x.shape[0], x.shape[1], -1)
act_y = y.reshape(y.shape[0], y.shape[1], -1)
gram_x = act_x @ act_x.permute(0, 2, 1)
gram_y = act_y @ act_y.permute(0, 2, 1)
loss += torch.nn.functional.l1_loss(gram_x, gram_y)
import torch
import torchvision
import torchvision.models as models
class VGGPerceptualLoss(torch.nn.Module):
def __init__(self, path='/content/drive/MyDrive/VGGPerceptualLoss/vgg16.pth', resize=True):
super(VGGPerceptualLoss, self).__init__()
# load trained model
# load vgg into cuda if available, load into cpu if cuda not available
if torch.cuda.is_available():
self.vgg = models.vgg16().to('cuda')
else:
self.vgg = models.vgg16().to('cpu')
self.vgg.load_state_dict((torch.load(path)))
self.vgg.eval() # モデルを評価モードにする。
# preparation to get feature maps from the middle of the model
blocks = [
self.vgg.features[:4],
self.vgg.features[4:9],
self.vgg.features[9:16],
self.vgg.features[16:23]
]
# No grad to prevent back propagation
for bl in blocks:
for p in bl.parameters():
p.requires_grad = False
# Combine the blocks into ModuleList
self.blocks = torch.nn.ModuleList(blocks)
self.transform = torch.nn.functional.interpolate
self.resize = resize
self.register_buffer("mean", torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1))
self.register_buffer("std", torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1))
def forward(self, input, target, feature_layers=[0, 1, 2, 3], style_layers=[]):
# shape の調整
if input.shape[1] != 3:
input = input.repeat(1, 3, 1, 1) # model output
target = target.repeat(1, 3, 1, 1) # ground truth
# ピクセル値の標準化
input = (input-self.mean) / self.std
target = (target-self.mean) / self.std
# 画像サイズの調整
if self.resize:
input = self.transform(input, mode='bilinear', size=(224, 224), align_corners=False)
target = self.transform(target, mode='bilinear', size=(224, 224), align_corners=False)
loss = 0.0
x = input
y = target
# 順伝搬 (forward propagation)
for i, block in enumerate(self.blocks):
x = block(x)
y = block(y)
if i in feature_layers:
# add loss by using the feature map in the middle of the model
loss += torch.nn.functional.l1_loss(x, y)
if i in style_layers:
# add loss by using the "style layer" calculated from the feature maps
act_x = x.reshape(x.shape[0], x.shape[1], -1)
act_y = y.reshape(y.shape[0], y.shape[1], -1)
gram_x = act_x @ act_x.permute(0, 2, 1)
gram_y = act_y @ act_y.permute(0, 2, 1)
loss += torch.nn.functional.l1_loss(gram_x, gram_y)
return loss
import torchvision.models as models
# モデルをロード
if torch.cuda.is_available():
self.vgg = models.vgg16().to('cuda')
else:
self.vgg = models.vgg16().to('cpu')
# パラメータをロード
self.vgg.load_state_dict((torch.load(path)))
self.vgg.eval() # モデルを評価モードにする。
blocks = [
self.vgg.features[:4],
self.vgg.features[4:9],
self.vgg.features[9:16],
self.vgg.features[16:23]
]
# No grad to prevent back propagation
for bl in blocks:
for p in bl.parameters():
p.requires_grad = False
# Combine the blocks into ModuleList
self.blocks = torch.nn.ModuleList(blocks)
Forwardで順伝搬
ここで、x および y はそれぞれモデルの出力画像と正解画像(前処理済)です。これらを self.blocks に格納された VGG16 の各ブロックに順番に通し、特徴マップを抽出します。
x = block(x) や y = block(y) によって得られるのが特徴マップであり、これらの差を L1損失として加算し、Perceptual Loss を構成します。
# 順伝搬 (forward propagation)
for i, block in enumerate(self.blocks):
x = block(x)
y = block(y)
if i in feature_layers:
# add loss by using the feature map in the middle of the model
loss += torch.nn.functional.l1_loss(x, y)
この処理により、画像の高次特徴がどれだけ似ているかを損失として捉えることができます。
Style Transfer
Perceptual Loss の原論文では、Style Transfer(スタイル変換) という応用技術も提案されています。これは、出力画像の「内容」は保持しつつ、「スタイル(画風)」だけを別の画像に似せるというもので、例として写真をアニメ風や油絵風に変換することができます。
if i in style_layers:
# add loss by using the "style layer" calculated from the feature maps
act_x = x.reshape(x.shape[0], x.shape[1], -1)
act_y = y.reshape(y.shape[0], y.shape[1], -1)
gram_x = act_x @ act_x.permute(0, 2, 1)
gram_y = act_y @ act_y.permute(0, 2, 1)
loss += torch.nn.functional.l1_loss(gram_x, gram_y)
import torch
import torchvision
import torchvision.models as models
class VGGPerceptualLoss(torch.nn.Module):
def __init__(self, path='/content/drive/MyDrive/VGGPerceptualLoss/vgg16.pth', resize=True):
super(VGGPerceptualLoss, self).__init__()
# load trained model
# load vgg into cuda if available, load into cpu if cuda not available
if torch.cuda.is_available():
self.vgg = models.vgg16().to('cuda')
else:
self.vgg = models.vgg16().to('cpu')
self.vgg.load_state_dict((torch.load(path)))
self.vgg.eval() # モデルを評価モードにする。
# preparation to get feature maps from the middle of the model
blocks = [
self.vgg.features[:4],
self.vgg.features[4:9],
self.vgg.features[9:16],
self.vgg.features[16:23]
]
# No grad to prevent back propagation
for bl in blocks:
for p in bl.parameters():
p.requires_grad = False
# Combine the blocks into ModuleList
self.blocks = torch.nn.ModuleList(blocks)
self.transform = torch.nn.functional.interpolate
self.resize = resize
self.register_buffer("mean", torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1))
self.register_buffer("std", torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1))
def forward(self, input, target, feature_layers=[0, 1, 2, 3], style_layers=[]):
# shape の調整
if input.shape[1] != 3:
input = input.repeat(1, 3, 1, 1) # model output
target = target.repeat(1, 3, 1, 1) # ground truth
# ピクセル値の標準化
input = (input-self.mean) / self.std
target = (target-self.mean) / self.std
# 画像サイズの調整
if self.resize:
input = self.transform(input, mode='bilinear', size=(224, 224), align_corners=False)
target = self.transform(target, mode='bilinear', size=(224, 224), align_corners=False)
loss = 0.0
x = input
y = target
# 順伝搬 (forward propagation)
for i, block in enumerate(self.blocks):
x = block(x)
y = block(y)
if i in feature_layers:
# add loss by using the feature map in the middle of the model
loss += torch.nn.functional.l1_loss(x, y)
if i in style_layers:
# add loss by using the "style layer" calculated from the feature maps
act_x = x.reshape(x.shape[0], x.shape[1], -1)
act_y = y.reshape(y.shape[0], y.shape[1], -1)
gram_x = act_x @ act_x.permute(0, 2, 1)
gram_y = act_y @ act_y.permute(0, 2, 1)
loss += torch.nn.functional.l1_loss(gram_x, gram_y)
return loss
import torchvision.models as models
# モデルをロード
if torch.cuda.is_available():
self.vgg = models.vgg16().to('cuda')
else:
self.vgg = models.vgg16().to('cpu')
# パラメータをロード
self.vgg.load_state_dict((torch.load(path)))
self.vgg.eval() # モデルを評価モードにする。
blocks = [
self.vgg.features[:4],
self.vgg.features[4:9],
self.vgg.features[9:16],
self.vgg.features[16:23]
]
# No grad to prevent back propagation
for bl in blocks:
for p in bl.parameters():
p.requires_grad = False
# Combine the blocks into ModuleList
self.blocks = torch.nn.ModuleList(blocks)
Forwardで順伝搬
ここで、x および y はそれぞれモデルの出力画像と正解画像(前処理済)です。これらを self.blocks に格納された VGG16 の各ブロックに順番に通し、特徴マップを抽出します。
x = block(x) や y = block(y) によって得られるのが特徴マップであり、これらの差を L1損失として加算し、Perceptual Loss を構成します。
# 順伝搬 (forward propagation)
for i, block in enumerate(self.blocks):
x = block(x)
y = block(y)
if i in feature_layers:
# add loss by using the feature map in the middle of the model
loss += torch.nn.functional.l1_loss(x, y)
この処理により、画像の高次特徴がどれだけ似ているかを損失として捉えることができます。
Style Transfer
Perceptual Loss の原論文では、Style Transfer(スタイル変換) という応用技術も提案されています。これは、出力画像の「内容」は保持しつつ、「スタイル(画風)」だけを別の画像に似せるというもので、例として写真をアニメ風や油絵風に変換することができます。
if i in style_layers:
# add loss by using the "style layer" calculated from the feature maps
act_x = x.reshape(x.shape[0], x.shape[1], -1)
act_y = y.reshape(y.shape[0], y.shape[1], -1)
gram_x = act_x @ act_x.permute(0, 2, 1)
gram_y = act_y @ act_y.permute(0, 2, 1)
loss += torch.nn.functional.l1_loss(gram_x, gram_y)
for epoch in range(num_epochs):
for inputs, targets in train_loader:
optimizer.zero_grad()
inputs = inputs.to(device)
targets = targets.to(device)
outputs = model(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()