Image-to-Image
Diffusers
StableDiffusionInpaintPipeline
stable-diffusion
stable-diffusion-diffusers
text-guided-to-image-inpainting
endpoints-template
Instructions to use philschmid/stable-diffusion-2-inpainting-endpoint with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use philschmid/stable-diffusion-2-inpainting-endpoint with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline from diffusers.utils import load_image # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("philschmid/stable-diffusion-2-inpainting-endpoint", dtype=torch.bfloat16, device_map="cuda") prompt = "Turn this cat into a dog" input_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cat.png") image = pipe(image=input_image, prompt=prompt).images[0] - Notebooks
- Google Colab
- Kaggle
| from typing import Dict, List, Any | |
| import torch | |
| from diffusers import DPMSolverMultistepScheduler, StableDiffusionInpaintPipeline | |
| from PIL import Image | |
| import base64 | |
| from io import BytesIO | |
| # set device | |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| if device.type != 'cuda': | |
| raise ValueError("need to run on GPU") | |
| class EndpointHandler(): | |
| def __init__(self, path=""): | |
| # load StableDiffusionInpaintPipeline pipeline | |
| self.pipe = StableDiffusionInpaintPipeline.from_pretrained(path, torch_dtype=torch.float16) | |
| # use DPMSolverMultistepScheduler | |
| self.pipe.scheduler = DPMSolverMultistepScheduler.from_config(self.pipe.scheduler.config) | |
| # move to device | |
| self.pipe = self.pipe.to(device) | |
| def __call__(self, data: Any) -> List[List[Dict[str, float]]]: | |
| """ | |
| :param data: A dictionary contains `inputs` and optional `image` field. | |
| :return: A dictionary with `image` field contains image in base64. | |
| """ | |
| inputs = data.pop("inputs", data) | |
| encoded_image = data.pop("image", None) | |
| encoded_mask_image = data.pop("mask_image", None) | |
| # hyperparamters | |
| num_inference_steps = data.pop("num_inference_steps", 25) | |
| guidance_scale = data.pop("guidance_scale", 7.5) | |
| negative_prompt = data.pop("negative_prompt", None) | |
| height = data.pop("height", None) | |
| width = data.pop("width", None) | |
| # process image | |
| if encoded_image is not None and encoded_mask_image is not None: | |
| image = self.decode_base64_image(encoded_image) | |
| mask_image = self.decode_base64_image(encoded_mask_image) | |
| else: | |
| image = None | |
| mask_image = None | |
| # run inference pipeline | |
| out = self.pipe(inputs, | |
| image=image, | |
| mask_image=mask_image, | |
| num_inference_steps=num_inference_steps, | |
| guidance_scale=guidance_scale, | |
| num_images_per_prompt=1, | |
| negative_prompt=negative_prompt, | |
| height=height, | |
| width=width | |
| ) | |
| # return first generate PIL image | |
| return out.images[0] | |
| # helper to decode input image | |
| def decode_base64_image(self, image_string): | |
| base64_image = base64.b64decode(image_string) | |
| buffer = BytesIO(base64_image) | |
| image = Image.open(buffer) | |
| return image | |