Source: Tomas Sobek on Unsplash
Multiprocessing in Python allows you to execute code concurrently by creating separate processes, each with its own Python interpreter and memory space. This enables true parallelism, where different parts of your program run simultaneously on multiple CPU cores.
It's crucial to understand the difference between multiprocessing and multithreading:
Key Differences:
Here's a simple example demonstrating the performance gains of multiprocessing:
import multiprocessing
import time
def do_something(seconds):
print(f"Sleeping for {seconds} second(s)...")
time.sleep(seconds)
print("Done sleeping!")
start = time.perf_counter()
# Running the function sequentially
do_something(1)
do_something(1)
finish = time.perf_counter()
print(f"Finished in {round(finish-start, 2)} second(s)") # Approximately 2 seconds
start = time.perf_counter()
# Running the function in parallel using multiprocessing
p1 = multiprocessing.Process(target=do_something, args=[1])
p2 = multiprocessing.Process(target=do_something, args=[1])
p1.start()
p2.start()
p1.join() # Wait for p1 to complete
p2.join() # Wait for p2 to complete
finish = time.perf_counter()
print(f"Finished in {round(finish-start, 2)} second(s)") # Approximately 1 second
The ProcessPoolExecutor
from the concurrent.futures
module provides a convenient way to manage multiple processes.
import concurrent.futures
import time
def do_something(seconds):
print(f"Sleeping for {seconds} second(s)...")
time.sleep(seconds)
return f"Done sleeping...{seconds}"
start = time.perf_counter()
with concurrent.futures.ProcessPoolExecutor() as executor:
f1 = executor.submit(do_something, 1)
f2 = executor.submit(do_something, 1)
print(f1.result()) # Waits for the result of f1
print(f2.result()) # Waits for the result of f2
finish = time.perf_counter()
print(f"Finished in {round(finish-start, 2)} second(s)")
# Using as_completed to get results as they become available:
start = time.perf_counter()
with concurrent.futures.ProcessPoolExecutor() as executor:
futures = [executor.submit(do_something, 1) for _ in range(10)]
for future in concurrent.futures.as_completed(futures):
print(future.result())
finish = time.perf_counter()
print(f"Finished in {round(finish-start, 2)} second(s)")
A common use case for multiprocessing is image processing. The following example shows how to download and process images concurrently:
import time
import requests
import concurrent.futures
from PIL import Image, ImageFilter # Requires Pillow library: pip install pillow
image_urls = [
"https://images.unsplash.com/photo-1518791841217-8f162f1e1131?ixlib=rb-1.2.1&ixid=eyJhcHBfaWQiOjEyMDd9&auto=format&fit=crop&w=500&q=60",
"https://images.unsplash.com/photo-1493663284031-b7e3a1a05820?ixlib=rb-1.2.1&ixid=eyJhcHBfaWQiOjEyMDd9&auto=format&fit=crop&w=500&q=60",
"https://images.unsplash.com/photo-1522038992700-c151c4f47122?ixlib=rb-1.2.1&ixid=eyJhcHBfaWQiOjEyMDd9&auto=format&fit=crop&w=500&q=60",
# Add more image URLs here
]
def download_image(image_url):
img_bytes = requests.get(image_url).content
img_name = image_url.split('/')[-1]
with open(img_name, 'wb') as img_file:
img_file.write(img_bytes)
print(f"{img_name} downloaded successfully.")
return img_name
def process_image(image_name):
try:
img = Image.open(image_name)
img = img.filter(ImageFilter.GaussianBlur(15))
img.save(f"processed_{image_name}")
print(f"{image_name} processed.")
except Exception as e:
print(f"Error processing {image_name}: {e}")
start = time.perf_counter()
with concurrent.futures.ProcessPoolExecutor() as executor:
image_names = executor.map(download_image, image_urls) # Download images
# Process downloaded images
if image_names: # Check if any images were downloaded
executor.map(process_image, image_names)
finish = time.perf_counter()
print(f"Finished downloading and processing images in {round(finish - start, 2)} seconds")
ProcessPoolExecutor
helps manage this.