Image Recognition Secrets: Master AI as Your Assistant

[Python Practical] Image Recognition Secrets: Master AI as Your Assistant!

Hello everyone, I am Deep Learning Engineer Xiao K! Do you remember the excitement of implementing image recognition with Python for the first time? A simple piece of code can make computers “understand” images, it’s simply magical! Today, let’s explore the wonderful world of image recognition together and see what it can actually do through practical examples!🔍

1. Environment Setup: Summoning AI Tools

First, we need to install some necessary libraries:

# Install basic libraries
pip install opencv-python
pip install pillow
pip install numpy
pip install tensorflow

# Import commonly used libraries
import cv2
import numpy as np
from PIL import Image
import tensorflow as tf

2. Basic Image Processing: Refreshing Images

<div style="width: 100%; background-color: #f8f9fa; padding: 20px; border-radius: 10px;">
    <div style="display: flex; justify-content: space-around; flex-wrap: wrap;">
        <div style="text-align: center; margin: 10px;">
            <canvas id="originalImage" width="200" height="200" style="border: 1px solid #ddd;"></canvas>
            <p>Original Image</p>
        </div>
        <div style="text-align: center; margin: 10px;">
            <canvas id="processedImage" width="200" height="200" style="border: 1px solid #ddd;"></canvas>
            <p>Processed Effect</p>
        </div>
    </div>
</div>

<script>
function drawDemoImage() {
    // Draw demo image
    const original = document.getElementById('originalImage');
    const processed = document.getElementById('processedImage');
    const ctx1 = original.getContext('2d');
    const ctx2 = processed.getContext('2d');

    // Original image
    ctx1.fillStyle = '#fff';
    ctx1.fillRect(0, 0, 200, 200);
    ctx1.fillStyle = '#000';
    ctx1.font = '20px Arial';
    ctx1.fillText('Hello World', 50, 100);

    // Processed image (simulating edge detection effect)
    ctx2.fillStyle = '#fff';
    ctx2.fillRect(0, 0, 200, 200);
    ctx2.strokeStyle = '#00f';
    ctx2.lineWidth = 2;
    ctx2.strokeText('Hello World', 50, 100);
}

drawDemoImage();
</script>

Basic Image Processing Code

def process_image(image_path):
    """Basic image processing"""
    # Read image
    img = cv2.imread(image_path)
    
    # Image preprocessing
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # Grayscale conversion
    blur = cv2.GaussianBlur(gray, (5, 5), 0)      # Gaussian blur
    edges = cv2.Canny(blur, 100, 200)             # Edge detection
    
    # Resize
    resized = cv2.resize(img, (800, 600))
    
    return edges, resized

def enhance_image(image_path):
    """Image enhancement"""
    img = Image.open(image_path)
    
    # Adjust brightness and contrast
    from PIL import ImageEnhance
    
    enhancer = ImageEnhance.Brightness(img)
    brightened = enhancer.enhance(1.2)  # Increase brightness
    
    enhancer = ImageEnhance.Contrast(brightened)
    final = enhancer.enhance(1.1)      # Increase contrast
    
    return final

3. Practical Case: OCR Text Recognition

Let’s implement a practical text recognition system:

import pytesseract
from PIL import Image
import cv2

class DocumentScanner:
    def __init__(self):
        self.langs = 'chi_sim+eng'  # Supports Chinese and English recognition
        
    def preprocess_image(self, image):
        """Image preprocessing"""
        # Convert to grayscale
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        
        # Adaptive thresholding
        binary = cv2.adaptiveThreshold(
            gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
            cv2.THRESH_BINARY, 11, 2
        )
        
        # Denoising
        denoised = cv2.fastNlMeansDenoising(binary)
        
        return denoised
    
    def extract_text(self, image_path):
        """Extract text"""
        # Read image
        image = cv2.imread(image_path)
        processed = self.preprocess_image(image)
        
        # OCR recognition
        text = pytesseract.image_to_string(
            processed, lang=self.langs
        )
        
        return text.strip()

4. Face Recognition: Building an Intelligent Attendance System

import face_recognition
import cv2
import numpy as np

class FaceAttendanceSystem:
    def __init__(self):
        self.known_faces = {}
        self.attendance_log = {}
        
    def register_face(self, name, image_path):
        """Register face"""
        image = face_recognition.load_image_file(image_path)
        encoding = face_recognition.face_encodings(image)[0]
        self.known_faces[name] = encoding
        
    def mark_attendance(self, image_path):
        """Mark attendance"""
        # Read image
        image = face_recognition.load_image_file(image_path)
        face_locations = face_recognition.face_locations(image)
        face_encodings = face_recognition.face_encodings(
            image, face_locations
        )
        
        # Identify faces
        for encoding in face_encodings:
            matches = face_recognition.compare_faces(
                list(self.known_faces.values()), encoding
            )
            if True in matches:
                name = list(self.known_faces.keys())[
                    matches.index(True)
                ]
                self.attendance_log[name] = datetime.now()
        
        return self.attendance_log

5. Product Recognition: Intelligent Retail System

import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.preprocessing import image

class ProductRecognition:
    def __init__(self):
        self.model = MobileNetV2(weights='imagenet')
        self.categories = {
            'apple': ['Apple', 3.5],
            'banana': ['Banana', 2.5],
            'orange': ['Orange', 4.0]
        }
    
    def recognize_product(self, image_path):
        """Recognize product"""
        # Load and preprocess image
        img = image.load_img(image_path, target_size=(224, 224))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = tf.keras.applications.mobilenet_v2.preprocess_input(x)
        
        # Predict
        predictions = self.model.predict(x)
        decoded = tf.keras.applications.mobilenet_v2.decode_predictions(
            predictions, top=1
        )[0]
        
        product = decoded[0][1]
        if product in self.categories:
            name, price = self.categories[product]
            return {
                'name': name,
                'price': price,
                'confidence': float(decoded[0][2])
            }
        return None

6. Practical Project: Intelligent Document Management System

class DocumentManager:
    def __init__(self):
        self.scanner = DocumentScanner()
        self.db = {}
    
    def process_document(self, image_path):
        """Process document"""
        # 1. Text recognition
        text = self.scanner.extract_text(image_path)
        
        # 2. Document classification
        doc_type = self.classify_document(text)
        
        # 3. Information extraction
        info = self.extract_info(text, doc_type)
        
        # 4. Storage
        doc_id = str(uuid.uuid4())
        self.db[doc_id] = {
            'type': doc_type,
            'info': info,
            'text': text,
            'path': image_path
        }
        
        return doc_id
    
    def classify_document(self, text):
        """Simple document classification"""
        keywords = {
            'Invoice': ['发票', '金额', '税号'],
            'Contract': ['合同', '甲方', '乙方'],
            'Report': ['报告', '结论', '建议']
        }
        
        for doc_type, words in keywords.items():
            if any(word in text for word in words):
                return doc_type
        return 'Others'

🌟 Successful case display:

<div style="width: 100%; background-color: #f8f9fa; padding: 20px; border-radius: 10px;">
    <h3 style="text-align: center; color: #333;">Intelligent Document Recognition System Application Cases</h3>
    <div style="display: flex; justify-content: space-around; flex-wrap: wrap;">
        <div style="text-align: center; margin: 10px;">
            <canvas id="case1" width="200" height="200" style="border: 1px solid #ddd;"></canvas>
            <p>Automatic Invoice Entry</p>
        </div>
        <div style="text-align: center; margin: 10px;">
            <canvas id="case2" width="200" height="200" style="border: 1px solid #ddd;"></canvas>
            <p>Face Attendance System</p>
        </div>
        <div style="text-align: center; margin: 10px;">
            <canvas id="case3" width="200" height="200" style="border: 1px solid #ddd;"></canvas>
            <p>Automatic Product Recognition</p>
        </div>
    </div>
</div>

<script>
function drawCases() {
    const drawCase = (id, title) => {
        const canvas = document.getElementById(id);
        const ctx = canvas.getContext('2d');
        
        // Draw background
        ctx.fillStyle = '#fff';
        ctx.fillRect(0, 0, 200, 200);
        
        // Draw example content
        ctx.fillStyle = '#333';
        ctx.font = '14px Arial';
        ctx.fillText(title, 20, 100);
        
        // Draw icon
        ctx.beginPath();
        ctx.arc(100, 50, 20, 0, Math.PI * 2);
        ctx.fillStyle = '#007bff';
        ctx.fill();
    };
    
    drawCase('case1', 'Invoice Recognition System');
    drawCase('case2', 'Attendance System');
    drawCase('case3', 'Product Recognition');
}

drawCases();
</script>

🎯 Practical Exercises:

  1. Develop a business card recognition system

  2. Implement license plate recognition functionality

  3. Create an image classification model

  4. Build a document scanning archiving system

Dear friends, after reading this tutorial, do you have a new understanding of image recognition? It can not only improve work efficiency but also bring infinite possibilities! If you encounter problems in practice, feel free to discuss in the comments section. In the next issue, we will delve into more AI application techniques, let’s progress together!✨

Practical Tip: When processing a large number of images, it is recommended to use multithreading or asynchronous processing to improve efficiency!

#Python Image Processing #AI Applications #OCR #Face Recognition #Deep Learning

Leave a Comment