[Python Practical] Image Recognition Secrets: Master AI as Your Assistant!
Hello everyone, I am Deep Learning Engineer Xiao K! Do you remember the excitement of implementing image recognition with Python for the first time? A simple piece of code can make computers “understand” images, it’s simply magical! Today, let’s explore the wonderful world of image recognition together and see what it can actually do through practical examples!🔍
1. Environment Setup: Summoning AI Tools
First, we need to install some necessary libraries:
# Install basic libraries
pip install opencv-python
pip install pillow
pip install numpy
pip install tensorflow
# Import commonly used libraries
import cv2
import numpy as np
from PIL import Image
import tensorflow as tf
2. Basic Image Processing: Refreshing Images
<div style="width: 100%; background-color: #f8f9fa; padding: 20px; border-radius: 10px;">
<div style="display: flex; justify-content: space-around; flex-wrap: wrap;">
<div style="text-align: center; margin: 10px;">
<canvas id="originalImage" width="200" height="200" style="border: 1px solid #ddd;"></canvas>
<p>Original Image</p>
</div>
<div style="text-align: center; margin: 10px;">
<canvas id="processedImage" width="200" height="200" style="border: 1px solid #ddd;"></canvas>
<p>Processed Effect</p>
</div>
</div>
</div>
<script>
function drawDemoImage() {
// Draw demo image
const original = document.getElementById('originalImage');
const processed = document.getElementById('processedImage');
const ctx1 = original.getContext('2d');
const ctx2 = processed.getContext('2d');
// Original image
ctx1.fillStyle = '#fff';
ctx1.fillRect(0, 0, 200, 200);
ctx1.fillStyle = '#000';
ctx1.font = '20px Arial';
ctx1.fillText('Hello World', 50, 100);
// Processed image (simulating edge detection effect)
ctx2.fillStyle = '#fff';
ctx2.fillRect(0, 0, 200, 200);
ctx2.strokeStyle = '#00f';
ctx2.lineWidth = 2;
ctx2.strokeText('Hello World', 50, 100);
}
drawDemoImage();
</script>
Basic Image Processing Code
def process_image(image_path):
"""Basic image processing"""
# Read image
img = cv2.imread(image_path)
# Image preprocessing
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Grayscale conversion
blur = cv2.GaussianBlur(gray, (5, 5), 0) # Gaussian blur
edges = cv2.Canny(blur, 100, 200) # Edge detection
# Resize
resized = cv2.resize(img, (800, 600))
return edges, resized
def enhance_image(image_path):
"""Image enhancement"""
img = Image.open(image_path)
# Adjust brightness and contrast
from PIL import ImageEnhance
enhancer = ImageEnhance.Brightness(img)
brightened = enhancer.enhance(1.2) # Increase brightness
enhancer = ImageEnhance.Contrast(brightened)
final = enhancer.enhance(1.1) # Increase contrast
return final
3. Practical Case: OCR Text Recognition
Let’s implement a practical text recognition system:
import pytesseract
from PIL import Image
import cv2
class DocumentScanner:
def __init__(self):
self.langs = 'chi_sim+eng' # Supports Chinese and English recognition
def preprocess_image(self, image):
"""Image preprocessing"""
# Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Adaptive thresholding
binary = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2
)
# Denoising
denoised = cv2.fastNlMeansDenoising(binary)
return denoised
def extract_text(self, image_path):
"""Extract text"""
# Read image
image = cv2.imread(image_path)
processed = self.preprocess_image(image)
# OCR recognition
text = pytesseract.image_to_string(
processed, lang=self.langs
)
return text.strip()
4. Face Recognition: Building an Intelligent Attendance System
import face_recognition
import cv2
import numpy as np
class FaceAttendanceSystem:
def __init__(self):
self.known_faces = {}
self.attendance_log = {}
def register_face(self, name, image_path):
"""Register face"""
image = face_recognition.load_image_file(image_path)
encoding = face_recognition.face_encodings(image)[0]
self.known_faces[name] = encoding
def mark_attendance(self, image_path):
"""Mark attendance"""
# Read image
image = face_recognition.load_image_file(image_path)
face_locations = face_recognition.face_locations(image)
face_encodings = face_recognition.face_encodings(
image, face_locations
)
# Identify faces
for encoding in face_encodings:
matches = face_recognition.compare_faces(
list(self.known_faces.values()), encoding
)
if True in matches:
name = list(self.known_faces.keys())[
matches.index(True)
]
self.attendance_log[name] = datetime.now()
return self.attendance_log
5. Product Recognition: Intelligent Retail System
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.preprocessing import image
class ProductRecognition:
def __init__(self):
self.model = MobileNetV2(weights='imagenet')
self.categories = {
'apple': ['Apple', 3.5],
'banana': ['Banana', 2.5],
'orange': ['Orange', 4.0]
}
def recognize_product(self, image_path):
"""Recognize product"""
# Load and preprocess image
img = image.load_img(image_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = tf.keras.applications.mobilenet_v2.preprocess_input(x)
# Predict
predictions = self.model.predict(x)
decoded = tf.keras.applications.mobilenet_v2.decode_predictions(
predictions, top=1
)[0]
product = decoded[0][1]
if product in self.categories:
name, price = self.categories[product]
return {
'name': name,
'price': price,
'confidence': float(decoded[0][2])
}
return None
6. Practical Project: Intelligent Document Management System
class DocumentManager:
def __init__(self):
self.scanner = DocumentScanner()
self.db = {}
def process_document(self, image_path):
"""Process document"""
# 1. Text recognition
text = self.scanner.extract_text(image_path)
# 2. Document classification
doc_type = self.classify_document(text)
# 3. Information extraction
info = self.extract_info(text, doc_type)
# 4. Storage
doc_id = str(uuid.uuid4())
self.db[doc_id] = {
'type': doc_type,
'info': info,
'text': text,
'path': image_path
}
return doc_id
def classify_document(self, text):
"""Simple document classification"""
keywords = {
'Invoice': ['发票', '金额', '税号'],
'Contract': ['合同', '甲方', '乙方'],
'Report': ['报告', '结论', '建议']
}
for doc_type, words in keywords.items():
if any(word in text for word in words):
return doc_type
return 'Others'
🌟 Successful case display:
<div style="width: 100%; background-color: #f8f9fa; padding: 20px; border-radius: 10px;">
<h3 style="text-align: center; color: #333;">Intelligent Document Recognition System Application Cases</h3>
<div style="display: flex; justify-content: space-around; flex-wrap: wrap;">
<div style="text-align: center; margin: 10px;">
<canvas id="case1" width="200" height="200" style="border: 1px solid #ddd;"></canvas>
<p>Automatic Invoice Entry</p>
</div>
<div style="text-align: center; margin: 10px;">
<canvas id="case2" width="200" height="200" style="border: 1px solid #ddd;"></canvas>
<p>Face Attendance System</p>
</div>
<div style="text-align: center; margin: 10px;">
<canvas id="case3" width="200" height="200" style="border: 1px solid #ddd;"></canvas>
<p>Automatic Product Recognition</p>
</div>
</div>
</div>
<script>
function drawCases() {
const drawCase = (id, title) => {
const canvas = document.getElementById(id);
const ctx = canvas.getContext('2d');
// Draw background
ctx.fillStyle = '#fff';
ctx.fillRect(0, 0, 200, 200);
// Draw example content
ctx.fillStyle = '#333';
ctx.font = '14px Arial';
ctx.fillText(title, 20, 100);
// Draw icon
ctx.beginPath();
ctx.arc(100, 50, 20, 0, Math.PI * 2);
ctx.fillStyle = '#007bff';
ctx.fill();
};
drawCase('case1', 'Invoice Recognition System');
drawCase('case2', 'Attendance System');
drawCase('case3', 'Product Recognition');
}
drawCases();
</script>
🎯 Practical Exercises:
-
Develop a business card recognition system
-
Implement license plate recognition functionality
-
Create an image classification model
-
Build a document scanning archiving system
Dear friends, after reading this tutorial, do you have a new understanding of image recognition? It can not only improve work efficiency but also bring infinite possibilities! If you encounter problems in practice, feel free to discuss in the comments section. In the next issue, we will delve into more AI application techniques, let’s progress together!✨
Practical Tip: When processing a large number of images, it is recommended to use multithreading or asynchronous processing to improve efficiency!
#Python Image Processing #AI Applications #OCR #Face Recognition #Deep Learning