#!/usr/bin/python3 import os import torch from torch import autocast from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, StableDiffusionInpaintPipeline from PIL import Image, ImageFilter, ImageOps from io import BytesIO import requests import math # Just needed for zoom videos import numpy as np # Just needed for zoom videos import cv2 # Just needed for creating a video from images. Then first: pip install opencv-python #Create a folder "input" and a folder "output" next to this script. #See https://huggingface.co/blog/stable_diffusion on how to install the software, get your token and accept the license. #pip install --upgrade diffusers #The NSFW filter(s) in the pipelines can easily be commented out. See https://www.reddit.com/r/StableDiffusion/comments/wxba44/disable_hugging_face_nsfw_filter_in_three_step/ @staticmethod def make_video(prompt): image_folder = "output/" + prompt + "/" video_name = image_folder + "/video.mp4" images = list() for img in os.listdir(image_folder): if ("out" in img) and ("_" in img) and ("jpg" in img): images.append(img) frame = cv2.imread(os.path.join(image_folder, images[0])) height, width, layers = frame.shape frames_per_second = 7 video = cv2.VideoWriter(video_name, 0, frames_per_second, (width,height)) for image in images: video.write(cv2.imread(os.path.join(image_folder, image))) cv2.destroyAllWindows() @staticmethod def read_video(videofile, prompt): vidcap = cv2.VideoCapture(videofile) success,image = vidcap.read() number = 0 while success: number += 1 if number < 10: filename = "in_000" + str(number) + ".jpg" elif number < 100: filename = "in_00" + str(number) + ".jpg" elif number < 1000: filename = "in_0" + str(number) + ".jpg" else: filename = "in_" + str(number) + ".jpg" cv2.imwrite("output/" + prompt + "/" + filename, image) im = Image.open(r"output/" + prompt + "/" + filename) width, height = im.size if width < height: zoom_factor = 512 / width else: zoom_factor = 512 / height im = im.resize((round(width * zoom_factor), round(height * zoom_factor))) width, height = im.size if width < height: im = im.crop((0, height/2-256, 512, height/2+256)) else: im = im.crop((width/2-256, 0, width/2+256, 512)) width, height = im.size #im = im.rotate(180) im.save("output/" + prompt + "/" + filename, "JPEG") print("Read pic " + str(number)) success,image = vidcap.read() @staticmethod def save_pic(image, prompt, seed, number): if number == 0: filename = "out" + str(seed) + ".jpg" elif number < 10: filename = "out" + str(seed) + "_000" + str(number) + ".jpg" elif number < 100: filename = "out" + str(seed) + "_00" + str(number) + ".jpg" elif number < 1000: filename = "out" + str(seed) + "_0" + str(number) + ".jpg" else: filename = "out" + str(seed) + "_" + str(number) + ".jpg" image.save(f"output/" + prompt + "/" + filename) print("--> " + filename) @staticmethod def move_pic(image, number): zoom_factor = 1.1 side_shift = 30 * math.sin(x / 5) * math.sin(x / 7) height_shift= 20 * math.cos(x / 6) * math.cos(x / 9) rotation_degree = 3 * math.sin(x / 10) width, height = image.size width_extra = round(width * (zoom_factor-1) / 2) height_extra = round(height * (zoom_factor-1) / 2) image = image.resize((round(width * zoom_factor), round(height * zoom_factor))) image = image.rotate(rotation_degree) image = image.crop((width_extra - side_shift, height_extra - height_shift, width + width_extra - side_shift, height + height_extra - height_shift)) return image @staticmethod def add_noise(image): image = np.array(image) row,col,ch= image.shape mean = 0 var = 0.2 sigma = var**0.5 gauss = np.random.normal(mean,sigma,(row,col,ch)) gauss = gauss.reshape(row,col,ch) noisy = image + image * gauss / 2 image = Image.fromarray(np.uint8(noisy)) return image @staticmethod def change_color(image): #r, g, b = image.split() #grey_image = ImageOps.grayscale(image) #image = Image.new("RGB", grey_image.size).paste(grey_image) inv_image = ImageOps.invert(image) ir, ig, ib = inv_image.split() image = Image.merge("RGB", (ig, ib, ir)) return inv_image @staticmethod def merge(image1, image2, weight): # weight 0.0 uses only image1 .. 1.0 only image2 image = Image.blend(image1, image2, weight) return image @staticmethod def normalize(image): arr = np.array(image).astype('float') for i in range(3): minval = arr[...,i].min() maxval = arr[...,i].max() if minval != maxval: arr[...,i] -= minval arr[...,i] *= (255.0/(maxval-minval)) image = Image.fromarray(arr.astype('uint8'),'RGB') return image if __name__ == "__main__": token = "Your personal token generated on huggingface" # Inputs: prompt = "photograph of a cute friendly animal" pic = "coyote.jpg" # or None video = "coyote.mp4" # or None mask = "something.jpg" # or None calc_mode = "normal" # Either "normal" or "vary_guidance_scale" or "vary_strength" or "video" or "mask" or "zoom" calc_variant_number = 1 calc_guidance_scale = 6.8 # 6.8 works typically well calc_strength = 0.5 # 0.5 .. 0.75 works typically well calc_num_inference_steps = 100 print("Initializing ...") if pic: in_image = Image.open(("input/" + pic)).convert("RGB") if video: in_videofile = "input/" + video if mask: mask_image = Image.open(("input/" + mask)).convert("RGB") if calc_mode == "zoom": brown_image = Image.open(("input/brown.jpg")).convert("RGB") if not os.path.exists("output/" + prompt): os.mkdir("output/" + prompt) torch.cuda.empty_cache() if calc_mode == "mask": pipe = StableDiffusionInpaintPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", revision="fp16", torch_dtype=torch.float16, use_auth_token=token).to("cuda") elif pic or video or zoom: pipe = StableDiffusionImg2ImgPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", revision="fp16", torch_dtype=torch.float16, use_auth_token=token).to("cuda") else: pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", revision="fp16", torch_dtype=torch.float16, use_auth_token=token).to("cuda") #pipe.enable_attention_slicing() # When too little RAM on graphic card print("Generating ...") with autocast("cuda"): if calc_mode == "normal": for seed in range(1, 1000): generator = torch.Generator("cuda").manual_seed(seed) if pic: image = pipe(prompt=prompt, generator=generator, num_inference_steps=calc_num_inference_steps, guidance_scale=calc_guidance_scale, init_image=in_image, strength=calc_strength, )["sample"][0] else: image = pipe(prompt=prompt, generator=generator, num_inference_steps=calc_num_inference_steps, guidance_scale=calc_guidance_scale, )["sample"][0] save_pic(image, prompt, seed, number=0) elif calc_mode == "mask": for seed in range(1, 1000): generator = torch.Generator("cuda").manual_seed(seed) if pic: image = pipe(prompt=prompt, generator=generator, num_inference_steps=calc_num_inference_steps, init_image=in_image, mask_image=mask_image, strength=calc_strength, )["sample"][0] save_pic(image, prompt, seed, number=0) elif calc_mode == "vary_guidance_scale": for x in range(1, 20): generator = torch.Generator("cuda").manual_seed(calc_seed) guidance_scale = 4.0 + 0.3 * x if pic: image = pipe(prompt=prompt, generator=generator, init_image=in_image, num_inference_steps=calc_num_inference_steps, guidance_scale=guidance_scale, strength=calc_strength, )["sample"][0] else: image = pipe(prompt=prompt, generator=generator, num_inference_steps=calc_num_inference_steps, guidance_scale=guidance_scale, )["sample"][0] save_pic(image, prompt, calc_seed, round(guidance_scale*10)) make_video(prompt) elif calc_mode == "vary_strength": for x in range(1, 35): generator = torch.Generator("cuda").manual_seed(calc_seed) strength = 0.3 + 0.02 * x image = pipe(prompt=prompt, generator=generator, init_image=in_image, num_inference_steps=calc_num_inference_steps, guidance_scale=calc_guidance_scale, strength=strength, )["sample"][0] save_pic(image, prompt, calc_seed, round(strength*1000)) make_video(prompt) elif calc_mode == "video": read_video(in_videofile, prompt) frame_images = list() image_folder = "output/" + prompt + "/" for frame_file in os.listdir(image_folder): if ("in_" in frame_file): frame_images.append(frame_file) x = 1 for frame_file in frame_images: in_image = Image.open(image_folder + frame_file).convert("RGB") generator = torch.Generator("cuda").manual_seed(calc_seed) image = pipe(prompt=prompt, generator=generator, init_image=in_image, num_inference_steps=calc_num_inference_steps, guidance_scale=calc_guidance_scale, strength=calc_strength, )["sample"][0] save_pic(image, prompt, calc_seed, x) x += 1 make_video(prompt) elif calc_mode == "zoom": image_folder = "output/" + prompt + "/" prev_image = None for x in range(1, 400): in_image = move_pic(in_image, x) #in_image = change_color(in_image) #in_image = add_noise(in_image) seed = calc_seed + math.floor(x / 40) if prev_image: in_image = merge(in_image, prev_image, 0.9) in_image = normalize(in_image) in_image = merge(in_image, brown_image, 0.10) generator = torch.Generator("cuda").manual_seed(seed) image = pipe(prompt=prompt, generator=generator, init_image=in_image, num_inference_steps=calc_num_inference_steps, guidance_scale=calc_guidance_scale, strength=calc_strength, )["sample"][0] image = normalize(image) prev_image = in_image save_pic(image, prompt, 0, x) in_image = image make_video(prompt)