Paint Transformerで、高速に１枚の画像から油絵を描く

1.はじめに

　以前、１枚の画像から油絵を描くプロセスを作成する技術をご紹介しました。今回ご紹介するのは、その技術を高速化したPaint Transformerという技術です。

＊この論文は、2021.8に提出されました。

2.Paint Transformer とは？

　下記が学習のフレームワークで、速さの秘密は複数のストロークを並行して予測してキャンバス画像とターゲット画像の差を最小化するようなパラメータを求める構成にあります。

　まず、背景ストロークセットSb（Background Stroke_Sb）と前景ストロークセットSf（Forground Stroke_Sf）をランダムにサンプリングします。次に、Stroke Rendererを使って、空のキャンバス（Blank Canvas）にSbをレンダリングしてキャンバス画像Ic（Canvas Image Ic）を生成し、キャンバス画像IcにSfをレンダリングしてターゲット画像It（Target Image It）を生成します。

　そして、IcとItからStroke PredictorでストロークSr（Prediction Stroke）を予測し、Stroke Rendererで予測画像Irを生成します。ここで、学習するのはStroke Predictorのみで、Stroke Rendererは微分可能なモジュールです。ロスは、ストロークレベルとピクセルレベルの２つから構成されています。

　早速、コードを動かしてみましょう。

3.コード

　コードはGoogle Colabで動かす形にしてGithubに上げてありますので、それに沿って説明して行きます。自分で動かしてみたい方は、この「リンク」をクリックし表示されたノートブックの先頭にある「Colab on Web」ボタンをクリックすると動かせます。今回コードは非表示にしてあり、コードを確認したい場合は「コードの表示」をクリックすると見ることが出来ます。

　まず、セットアップを行います。

# githubからコードを取得
!git clone https://github.com/cedro3/PaintTransformer.git
%cd PaintTransformer/inference

# githubからコードを取得

!git clone https://github.com/cedro3/PaintTransformer.git

%cd PaintTransformer/inference

　最初に、１枚の画像から油絵を作成するプロセスを動画にしてみましょう。まず、inference/inputフォルダーの中にある画像（jpg）を指定します。自分の画像でやってみたい方は、inference/inputにドラッグ＆ドロップで画像をアップロードして指定して下さい。

# 画像指定
input_img = 'yui.jpg'

1 2	# 画像指定 input_img = 'yui.jpg'

　画像から油絵を作成するプロセスを作成します。プロセスは、inference/output/yui に連番の静止画（jpg）として保存され、最終画像はinference/output/yui.jpgで保存されます。

# --- ペイントプロセスの作成 ---
from inference import run_inference
import os 
import shutil

# outputフォルダーリセット
if os.path.isdir('output'):
    shutil.rmtree('output')

# name取得
name = input_img[:-4]

# プロセス作成
run_inference(input_path='input/'+input_img,
              model_path='model.pth',
              output_dir='output/', # whether need intermediate results for animation.
              need_animation=True,  # resize original input to this size. None means do not resize.
              resize_h=None,        # resize original input to this size. None means do not resize.
              resize_w=None,
              serial=True)          # if need animation, serial must be True.

# --- ペイントプロセスの作成 ---

from inference import run_inference

import os

import shutil

# outputフォルダーリセット

if os.path.isdir('output'):

shutil.rmtree('output')

# name取得

name = input_img[:-4]

# プロセス作成

run_inference(input_path='input/'+input_img,

model_path='model.pth',

output_dir='output/', # whether need intermediate results for animation.

need_animation=True, # resize original input to this size. None means do not resize.

resize_h=None, # resize original input to this size. None means do not resize.

resize_w=None,

serial=True) # if need animation, serial must be True.

　保存されたプロセスからgifとmp4を作成します。gifはinference/output/process.gifとして、mp4はinference/output/process.mp4として保存されます。

# --- プロセスからgifとmp4を作成 ---
import glob
from PIL import Image

# Set to dir with output images
in_dir = 'output/'+name+'/*.jpg'
out_path = 'output/process.gif'

img, *imgs = [Image.open(f) for f in sorted(glob.glob(in_dir))]
img.save(fp=out_path, format='GIF', append_images=imgs,
          save_all=True, duration=100, loop=0)

! ffmpeg -i output/process.gif -pix_fmt yuv420p output/process.mp4

# --- プロセスからgifとmp4を作成 ---

import glob

from PIL import Image

# Set to dir with output images

in_dir = 'output/'+name+'/*.jpg'

out_path = 'output/process.gif'

img, *imgs = [Image.open(f) for f in sorted(glob.glob(in_dir))]

img.save(fp=out_path, format='GIF', append_images=imgs,

save_all=True, duration=100, loop=0)

! ffmpeg -i output/process.gif -pix_fmt yuv420p output/process.mp4

　mp4を再生してみましょう。

# --- mp4の再生 ---
from IPython.display import HTML
from base64 import b64encode

mp4 = open('./output/process.mp4', 'rb').read()
data_url = 'data:video/mp4;base64,' + b64encode(mp4).decode()
HTML(f"""
<video width="70%" height="70%" controls>
      <source src="{data_url}" type="video/mp4">
</video>""")

# --- mp4の再生 ---

from IPython.display import HTML

from base64 import b64encode

mp4 = open('./output/process.mp4', 'rb').read()

data_url = 'data:video/mp4;base64,' + b64encode(mp4).decode()

HTML(f"""

</video>""")

　次に、高速化処理が可能になったので、ついでに動画の油絵化をやってみましょう。まず、動画を静止画にバラシます。

# --- ビデオを静止画にバラす ---
import os
import shutil
import cv2
import math

# imagesフォルダーリセット
if os.path.isdir('images'):
    shutil.rmtree('images')
os.makedirs('images', exist_ok=True)
 
def video_2_images(video_file= './input/yui.mp4',   # ビデオ指定
                   image_dir='./images/', 
                   image_file='%s.jpg'):  
 
    # Initial setting
    i = 0
    interval = 3
    length = 120  # 最大フレーム数
    
    cap = cv2.VideoCapture(video_file)
    while(cap.isOpened()):
        flag, frame = cap.read()  
        if flag == False:  
                break
        if i == length*interval:
                break
        if i % interval == 0:
           cv2.imwrite(image_dir+image_file % str(int(i/interval)).zfill(6), frame)
        i += 1 
    cap.release()  
    print('images_number = ', math.ceil(i/interval))

def main():
    video_2_images()
    
if __name__ == '__main__':
    main()

# --- ビデオを静止画にバラす ---

import os

import shutil

import cv2

import math

# imagesフォルダーリセット

if os.path.isdir('images'):

shutil.rmtree('images')

os.makedirs('images', exist_ok=True)

def video_2_images(video_file= './input/yui.mp4', # ビデオ指定

image_dir='./images/',

image_file='%s.jpg'):

# Initial setting

i = 0

interval = 3

length = 120 # 最大フレーム数

cap = cv2.VideoCapture(video_file)

while(cap.isOpened()):

flag, frame = cap.read()

if flag == False:

break

if i == length*interval:

break

if i % interval == 0:

cv2.imwrite(image_dir+image_file % str(int(i/interval)).zfill(6), frame)

i += 1

cap.release()

print('images_number = ', math.ceil(i/interval))

def main():

video_2_images()

if __name__ == '__main__':

main()

　バラシた静止画を油絵に変換し、inference/frameフォルダーに保存します。所用時間は、GPUがP100の時に、1280×720で1分/枚、640×360で15秒/枚位かかります。但し、フルHD1920×1080まで大きくすると、cuda out of memory で落ちますのでご注意を。

# --- 静止画を油絵に変換する ---
from inference import run_inference
import os
import shutil
import glob
from tqdm import tqdm

# outputフォルダーリセット
if os.path.isdir('frame'):
    shutil.rmtree('frame')

files = glob.glob('./images/*.jpg')
files.sort()

for file in tqdm(files):
    run_inference(input_path=file,
         model_path='model.pth',
         output_dir='frame/',
         need_animation=False,  # whether need intermediate results for animation.
         resize_h=None,         # resize original input to this size. None means do not resize.
         resize_w=None,         # resize original input to this size. None means do not resize.
         serial=True)          # if need animation, serial must be True.

# --- 静止画を油絵に変換する ---

from inference import run_inference

import os

import shutil

import glob

from tqdm import tqdm

# outputフォルダーリセット

if os.path.isdir('frame'):

shutil.rmtree('frame')

files = glob.glob('./images/*.jpg')

files.sort()

for file in tqdm(files):

run_inference(input_path=file,

model_path='model.pth',

output_dir='frame/',

need_animation=False, # whether need intermediate results for animation.

resize_h=None, # resize original input to this size. None means do not resize.

resize_w=None, # resize original input to this size. None means do not resize.

serial=True) # if need animation, serial must be True.

　変換した油絵の静止画からmp4を作成します。

# --- mp4作成 ---
# output.mp4をリセット
if os.path.exists('./output.mp4'):
   os.remove('./output.mp4')

# frameフォルダーの画像をmp4に変換
!ffmpeg -r 10 -i frame/%06d.jpg -vcodec libx264 -pix_fmt yuv420p output.mp4

# --- mp4作成 ---

# output.mp4をリセット

if os.path.exists('./output.mp4'):

os.remove('./output.mp4')

# frameフォルダーの画像をmp4に変換

!ffmpeg -r 10 -i frame/%06d.jpg -vcodec libx264 -pix_fmt yuv420p output.mp4

# --- mp4再生 ---
from IPython.display import HTML
from base64 import b64encode

mp4 = open('./output.mp4', 'rb').read()
data_url = 'data:video/mp4;base64,' + b64encode(mp4).decode()
HTML(f"""
<video width="70%" height="70%" controls>
      <source src="{data_url}" type="video/mp4">
</video>""")

# --- mp4再生 ---

from IPython.display import HTML

from base64 import b64encode

mp4 = open('./output.mp4', 'rb').read()

data_url = 'data:video/mp4;base64,' + b64encode(mp4).decode()

HTML(f"""

</video>""")

　ちょっと時間はかかりますが、動画が油絵化できると映像の表現力がアップしそうですね。

　では、また。

2022.6 colab リンク追加

　写真から油絵と作成動画を２段階の解像度で生成できるリンクを追加しました。

（オリジナルgithub）https://github.com/Huage001/PaintTransformer

（twitter投稿）

Paint Transformerで、高速に１枚の画像から油絵を描く

キャンバスと写真の差が最も小さくなるストローク（最大８回）を学習した予測器とレンダラーを使って、画面を1, 1/4, 1/16, 1/64, …と分割しながら並列処理することで、高速処理を可能にしています。

ブログ：https://t.co/zXXXUqejGx pic.twitter.com/ck9vH9GCWb
— cedro (@jun40vn) June 22, 2022

Paint Transformerで、高速に１枚の画像から油絵を描く

1.はじめに

2.Paint Transformer とは？

3.コード

2022.6 colab リンク追加

コメントを残すコメントをキャンセル

ABOUTこの記事をかいた人

NEW POSTこのライターの最新記事

Animate Anyoneで、１枚の画像から動画を生成する

SVDで静止画から動画を生成する

DiffMorpherを使って、拡散モデルでモーフィングを行う

Domo AIで、実写動画をアニメ化する

最近の投稿

最近のコメント

アーカイブ

カテゴリー

メタ情報

1.はじめに

2.Paint Transformer とは？

3.コード

2022.6 colab リンク追加

コメントを残す コメントをキャンセル

RECOMMENDこちらの記事も人気です。

ECONで、１枚の写真から３Dモデルを推定する

PyTorch GPT-2でサクッと文章生成してみる

BERTで文章のネガポジ判定と根拠の可視化をやってみる

NNabla DCGAN 顔画像のモーフィングをやってみる

SadTalkerを使って、音声で顔画像を動かす

DLFSで、人間の年齢による顔の変化をシミュレーションする

keras seq2seq でチャットボットをやってみる

PyTorch 次に Alexnet を作ってみる

ABOUTこの記事をかいた人

NEW POSTこのライターの最新記事

Animate Anyoneで、１枚の画像から動画を生成する

SVDで静止画から動画を生成する

DiffMorpherを使って、拡散モデルでモーフィングを行う

Domo AIで、実写動画をアニメ化する

最近の投稿

最近のコメント

アーカイブ

カテゴリー

メタ情報

コメントを残すコメントをキャンセル