Face_Recognition/features_extraction_to_csv.py

# Copyright (C) 2018-2021 coneypo
# SPDX-License-Identifier: MIT

import os
import dlib
import csv
import numpy as np
import logging
import cv2
from PIL import Image

# 要读取人脸图像文件的路径
path_images_from_camera = "data/data_faces"

# Dlib 检测器
predictor = dlib.shape_predictor('models/dlib/shape_predictor_68_face_landmarks.dat')
face_reco_model = dlib.face_recognition_model_v1("models/dlib/dlib_face_recognition_resnet_model_v1.dat")


class DNNFaceDetector:
    def __init__(self, confidence_threshold=0.7):
        """初始化OpenCV DNN人脸检测器"""
        self.confidence_threshold = confidence_threshold
        self.net = self.load_face_detection_model()

    def load_face_detection_model(self):
        """加载OpenCV DNN人脸检测模型"""
        try:
            # 方法1: 尝试加载TensorFlow模型
            model_path = "models/opencv_face_detector_uint8.pb"
            config_path = "models/opencv_face_detector.pbtxt"

            if os.path.exists(model_path) and os.path.exists(config_path):
                net = cv2.dnn.readNetFromTensorflow(model_path, config_path)
                print("使用 TensorFlow 人脸检测模型")
                self.use_tensorflow = True
                return net
        except Exception as e:
            print(f"TensorFlow模型加载失败: {e}")

        try:
            # 方法2: 尝试加载Caffe模型
            proto_path = "models/deploy.prototxt"
            model_path = "models/res10_300x300_ssd_iter_140000_fp16.caffemodel"

            if os.path.exists(proto_path) and os.path.exists(model_path):
                net = cv2.dnn.readNetFromCaffe(proto_path, model_path)
                print("使用 Caffe 人脸检测模型")
                self.use_tensorflow = False
                return net
        except Exception as e:
            print(f"Caffe模型加载失败: {e}")

        # 方法3: 使用OpenCV内置的Haar级联作为备选
        print("使用 Haar Cascade 作为备选检测器")
        self.use_haar = True
        cascade_path ='models/haarcascade_frontalface_default.xml'
        if os.path.exists(cascade_path):
            return cv2.CascadeClassifier(cascade_path)
        else:
            # 如果内置路径不存在，尝试其他路径
            possible_paths = [
                '/usr/share/opencv4/haarcascades/haarcascade_frontalface_default.xml',
                '/usr/share/opencv/haarcascades/haarcascade_frontalface_default.xml',
                'haarcascade_frontalface_default.xml'
            ]
            for path in possible_paths:
                if os.path.exists(path):
                    return cv2.CascadeClassifier(path)

        print("错误: 未找到任何人脸检测模型")
        return None

    def detect_faces(self, image):
        """使用DNN检测人脸"""
        if hasattr(self, 'use_haar') and self.use_haar:
            return self.detect_faces_haar(image)

        if self.net is None:
            return []

        h, w = image.shape[:2]

        # 创建blob输入
        if hasattr(self, 'use_tensorflow') and self.use_tensorflow:
            blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300), [104, 117, 123], False, False)
        else:
            # Caffe模型
            blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 1.0,
                                       (300, 300), (104.0, 177.0, 123.0))

        self.net.setInput(blob)
        detections = self.net.forward()

        faces = []
        for i in range(detections.shape[2]):
            confidence = detections[0, 0, i, 2]

            if confidence > self.confidence_threshold:
                # 提取边界框坐标
                if hasattr(self, 'use_tensorflow') and self.use_tensorflow:
                    # TensorFlow模型输出格式
                    x1 = int(detections[0, 0, i, 3] * w)
                    y1 = int(detections[0, 0, i, 4] * h)
                    x2 = int(detections[0, 0, i, 5] * w)
                    y2 = int(detections[0, 0, i, 6] * h)
                else:
                    # Caffe模型输出格式
                    x1 = int(detections[0, 0, i, 3] * w)
                    y1 = int(detections[0, 0, i, 4] * h)
                    x2 = int(detections[0, 0, i, 5] * w)
                    y2 = int(detections[0, 0, i, 6] * h)

                # 确保坐标在图像范围内
                x1, y1 = max(0, x1), max(0, y1)
                x2, y2 = min(w, x2), min(h, y2)

                # 转换为dlib矩形格式（保持兼容性）
                if x2 > x1 and y2 > y1:  # 确保是有效的矩形
                    face_rect = dlib.rectangle(x1, y1, x2, y2)
                    faces.append((face_rect, confidence))

        return faces

    def detect_faces_haar(self, image):
        """备选：使用Haar级联检测"""
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        faces_cv = self.net.detectMultiScale(
            gray,
            scaleFactor=1.1,
            minNeighbors=5,
            minSize=(30, 30)
        )

        faces = []
        for (x, y, w, h) in faces_cv:
            face_rect = dlib.rectangle(x, y, x + w, y + h)
            faces.append((face_rect, 0.9))  # Haar没有置信度

        return faces


# 全局DNN检测器实例
dnn_detector = DNNFaceDetector(confidence_threshold=0.7)

class DNNFaceDetector:
    def __init__(self, confidence_threshold=0.7):
        """初始化OpenCV DNN人脸检测器"""
        self.confidence_threshold = confidence_threshold
        self.net = self.load_face_detection_model()

    def load_face_detection_model(self):
        """加载OpenCV DNN人脸检测模型"""
        try:
            # 方法1: 尝试加载TensorFlow模型
            model_path = "models/opencv_face_detector_uint8.pb"
            config_path = "models/opencv_face_detector.pbtxt"

            if os.path.exists(model_path) and os.path.exists(config_path):
                net = cv2.dnn.readNetFromTensorflow(model_path, config_path)
                print("使用 TensorFlow 人脸检测模型")
                self.use_tensorflow = True
                return net
        except Exception as e:
            print(f"TensorFlow模型加载失败: {e}")

        try:
            # 方法2: 尝试加载Caffe模型
            proto_path = "models/deploy.prototxt"
            model_path = "models/res10_300x300_ssd_iter_140000_fp16.caffemodel"

            if os.path.exists(proto_path) and os.path.exists(model_path):
                net = cv2.dnn.readNetFromCaffe(proto_path, model_path)
                print("使用 Caffe 人脸检测模型")
                self.use_tensorflow = False
                return net
        except Exception as e:
            print(f"Caffe模型加载失败: {e}")

        # 方法3: 使用OpenCV内置的Haar级联作为备选
        print("使用 Haar Cascade 作为备选检测器")
        self.use_haar = True
        cascade_path ='models/haarcascade_frontalface_default.xml'
        if os.path.exists(cascade_path):
            return cv2.CascadeClassifier(cascade_path)
        else:
            # 如果内置路径不存在，尝试其他路径
            possible_paths = [
                '/usr/share/opencv4/haarcascades/haarcascade_frontalface_default.xml',
                '/usr/share/opencv/haarcascades/haarcascade_frontalface_default.xml',
                'models/haarcascade_frontalface_default.xml'
            ]
            for path in possible_paths:
                if os.path.exists(path):
                    return cv2.CascadeClassifier(path)

        print("错误: 未找到任何人脸检测模型")
        return None

    def detect_faces(self, image):
        """使用DNN检测人脸"""
        if hasattr(self, 'use_haar') and self.use_haar:
            return self.detect_faces_haar(image)

        if self.net is None:
            return []

        h, w = image.shape[:2]

        # 创建blob输入
        if hasattr(self, 'use_tensorflow') and self.use_tensorflow:
            blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300), [104, 117, 123], False, False)
        else:
            # Caffe模型
            blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 1.0,
                                       (300, 300), (104.0, 177.0, 123.0))

        self.net.setInput(blob)
        detections = self.net.forward()

        faces = []
        for i in range(detections.shape[2]):
            confidence = detections[0, 0, i, 2]

            if confidence > self.confidence_threshold:
                # 提取边界框坐标
                if hasattr(self, 'use_tensorflow') and self.use_tensorflow:
                    # TensorFlow模型输出格式
                    x1 = int(detections[0, 0, i, 3] * w)
                    y1 = int(detections[0, 0, i, 4] * h)
                    x2 = int(detections[0, 0, i, 5] * w)
                    y2 = int(detections[0, 0, i, 6] * h)
                else:
                    # Caffe模型输出格式
                    x1 = int(detections[0, 0, i, 3] * w)
                    y1 = int(detections[0, 0, i, 4] * h)
                    x2 = int(detections[0, 0, i, 5] * w)
                    y2 = int(detections[0, 0, i, 6] * h)

                # 确保坐标在图像范围内
                x1, y1 = max(0, x1), max(0, y1)
                x2, y2 = min(w, x2), min(h, y2)

                # 转换为dlib矩形格式（保持兼容性）
                if x2 > x1 and y2 > y1:  # 确保是有效的矩形
                    face_rect = dlib.rectangle(x1, y1, x2, y2)
                    faces.append((face_rect, confidence))

        return faces

    def detect_faces_haar(self, image):
        """备选：使用Haar级联检测"""
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        faces_cv = self.net.detectMultiScale(
            gray,
            scaleFactor=1.1,
            minNeighbors=5,
            minSize=(30, 30)
        )

        faces = []
        for (x, y, w, h) in faces_cv:
            face_rect = dlib.rectangle(x, y, x + w, y + h)
            faces.append((face_rect, 0.9))  # Haar没有置信度

        return faces


# 全局DNN检测器实例
dnn_detector = DNNFaceDetector(confidence_threshold=0.7)

def return_128d_features(path_img):
    """返回单张图像的 128D 特征（使用OpenCV DNN检测）"""
    """返回单张图像的 128D 特征（使用OpenCV DNN检测）"""
    try:
        # 直接使用OpenCV读取图像，避免PIL转换
        img_rd = cv2.imread(path_img)
        if img_rd is None:
            logging.error("无法读取图像: %s", path_img)
            return None

        # 对于大图像，先缩小以提高检测速度
        h, w = img_rd.shape[:2]
        if w > 1000 or h > 1000:
            scale = 1000 / max(w, h)
            small_img = cv2.resize(img_rd, (int(w * scale), int(h * scale)))
            face_results = dnn_detector.detect_faces(small_img)

            # 缩放坐标回原图
            scaled_faces = []
            for face_rect, confidence in face_results:
                scaled_face = dlib.rectangle(
                    int(face_rect.left() / scale),
                    int(face_rect.top() / scale),
                    int(face_rect.right() / scale),
                    int(face_rect.bottom() / scale)
                )
                scaled_faces.append((scaled_face, confidence))
            face_results = scaled_faces
        else:
            face_results = dnn_detector.detect_faces(img_rd)

        if len(face_results) != 0:
            # 取置信度最高的人脸
            best_face = max(face_results, key=lambda x: x[1])
            face_rect = best_face[0]

            logging.info("%-40s %-20s", "检测到人脸的图像:", path_img)
            logging.info("检测置信度: %.3f", best_face[1])

            # 使用dlib进行特征点检测和特征提取
            shape = predictor(img_rd, face_rect)
        # 直接使用OpenCV读取图像，避免PIL转换
        img_rd = cv2.imread(path_img)
        if img_rd is None:
            logging.error("无法读取图像: %s", path_img)
            return None

        # 对于大图像，先缩小以提高检测速度
        h, w = img_rd.shape[:2]
        if w > 1000 or h > 1000:
            scale = 1000 / max(w, h)
            small_img = cv2.resize(img_rd, (int(w * scale), int(h * scale)))
            face_results = dnn_detector.detect_faces(small_img)

            # 缩放坐标回原图
            scaled_faces = []
            for face_rect, confidence in face_results:
                scaled_face = dlib.rectangle(
                    int(face_rect.left() / scale),
                    int(face_rect.top() / scale),
                    int(face_rect.right() / scale),
                    int(face_rect.bottom() / scale)
                )
                scaled_faces.append((scaled_face, confidence))
            face_results = scaled_faces
        else:
            face_results = dnn_detector.detect_faces(img_rd)

        if len(face_results) != 0:
            # 取置信度最高的人脸
            best_face = max(face_results, key=lambda x: x[1])
            face_rect = best_face[0]

            logging.info("%-40s %-20s", "检测到人脸的图像:", path_img)
            logging.info("检测置信度: %.3f", best_face[1])

            # 使用dlib进行特征点检测和特征提取
            shape = predictor(img_rd, face_rect)
            face_descriptor = face_reco_model.compute_face_descriptor(img_rd, shape)


            return face_descriptor
        else:
            logging.warning("未检测到人脸: %s", path_img)
            return None


    except Exception as e:
        logging.error("处理图像时出错 %s: %s", path_img, e)
        return None

def return_features_mean_personX(path_face_personX):
    """返回 personX 的 128D 特征均值"""
    features_list_personX = []
    photos_list = os.listdir(path_face_personX)

    if photos_list:
        for photo in photos_list:
            # 只处理图像文件
            if not photo.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
                continue

            # 只处理图像文件
            if not photo.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
                continue

            photo_path = os.path.join(path_face_personX, photo)
            logging.info("正在读取图像: %s", photo_path)

            features_128d = return_128d_features(photo_path)
            if features_128d is not None:
                features_list_personX.append(features_128d)
            else:
                logging.warning("无法从图像中提取特征: %s", photo_path)
    else:
        logging.warning("文件夹为空: %s", path_face_personX)

    # 计算 128D 特征的均值
    if features_list_personX:
        features_mean_personX = np.array(features_list_personX).mean(axis=0)
        logging.info("成功提取 %d 张人脸特征", len(features_list_personX))
    else:
        features_mean_personX = np.zeros(128, dtype=np.float64)
        logging.warning("未提取到任何特征: %s", path_face_personX)

    return features_mean_personX


def get_person_name_from_folder(folder_name):
    """从文件夹名称获取有意义的姓名"""
    # 常见的文件夹前缀
    prefixes = ['person_', 'face_', 'user_']

    for prefix in prefixes:
        if folder_name.startswith(prefix):
            name_part = folder_name[len(prefix):]
            # 如果剩下的部分是纯数字，使用完整文件夹名
            if name_part.isdigit():
                return folder_name
            else:
                return name_part

    return folder_name

def main():
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
    # 检查源文件夹是否存在
    if not os.path.exists(path_images_from_camera):
        logging.error("人脸图像文件夹不存在: %s", path_images_from_camera)
        return

    # 获取人脸文件夹列表
    person_list = os.listdir(path_images_from_camera)
    person_list.sort()

    if not person_list:
        logging.error("没有人脸文件夹可处理")
        return

    logging.info("找到 %d 个人脸文件夹: %s", len(person_list), person_list)

    # 创建CSV文件
    with open("data/features_all.csv", "w", newline="", encoding="utf-8") as csvfile:
        writer = csv.writer(csvfile)

        successful_count = 0
        for person_folder in person_list:
            folder_path = os.path.join(path_images_from_camera, person_folder)

            if not os.path.isdir(folder_path):
                logging.warning("跳过非文件夹: %s", person_folder)
                logging.warning("跳过非文件夹: %s", person_folder)
                continue

            logging.info("=" * 60)
            logging.info("=" * 60)
            logging.info("处理文件夹: %s", person_folder)

            # 提取特征
            features_mean = return_features_mean_personX(folder_path)

            # 获取有意义的姓名
            person_name = get_person_name_from_folder(person_folder)
            logging.info("使用姓名: %s", person_name)

            # 检查特征是否有效（非全零）
            if np.all(features_mean == 0):
                logging.warning("特征提取失败，跳过: %s", person_folder)
                continue

            # 检查特征是否有效（非全零）
            if np.all(features_mean == 0):
                logging.warning("特征提取失败，跳过: %s", person_folder)
                continue

            # 构建行数据：姓名 + 128维特征
            row_data = [person_name] + features_mean.tolist()
            writer.writerow(row_data)

            successful_count += 1
            logging.info("完成: %s", person_name)
            logging.info("-" * 50)

        logging.info("=" * 60)
        logging.info("处理完成: 成功 %d/%d 个人脸文件夹", successful_count, len(person_list))
        logging.info("=" * 60)
        logging.info("处理完成: 成功 %d/%d 个人脸文件夹", successful_count, len(person_list))
        logging.info("特征数据已保存到: data/features_all.csv")


if __name__ == '__main__':
    main()