HTTPS证书配置

HTTPS证书是保证通信安全的基础,移动端爬虫需要处理证书相关的问题。

证书安装到系统分区(需要ROOT)

# Android证书操作详解

# 1. 生成CA证书(在电脑上执行)
openssl genrsa -out ca.key 2048
openssl req -new -x509 -key ca.key -out ca.crt -days 365 -subj "/CN=AppCrawler CA/O=AppCrawler/C=US"

# 2. 转换为Android格式(获取证书哈希值)
CERT_HASH=$(openssl x509 -inform PEM -subject_hash_old -in ca.crt | head -1)
mv ca.crt ${CERT_HASH}.0

# 3. 推送到系统证书目录(需要ROOT权限)
adb root
adb remount
adb push ${CERT_HASH}.0 /system/etc/security/cacerts/
adb shell chmod 644 /system/etc/security/cacerts/${CERT_HASH}.0
adb reboot

# 4. 验证证书安装
adb shell ls -la /system/etc/security/cacerts/ | grep ${CERT_HASH}

证书配置Python脚本

import os
import subprocess
import tempfile
import ssl
import hashlib
from typing import Optional, Tuple
import requests
from cryptography import x509
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import hashes

class CertificateManager:
    """证书管理器 - 用于生成、安装和管理HTTPS证书"""
    
    def __init__(self):
        self.ca_cert_path = None
        self.ca_key_path = None
        self.device_cert_installed = False
        self.certificates_dir = "certificates"
        os.makedirs(self.certificates_dir, exist_ok=True)
    
    def generate_ca_certificate(self, common_name: str = "AppCrawler Root CA", 
                              organization: str = "AppCrawler Organization") -> Tuple[str, str]:
        """生成CA证书和私钥"""
        try:
            # 使用临时文件生成证书
            with tempfile.NamedTemporaryFile(mode='w+', suffix='.key', delete=False) as key_file:
                key_filename = key_file.name
            
            with tempfile.NamedTemporaryFile(mode='w+', suffix='.crt', delete=False) as crt_file:
                crt_filename = crt_file.name
            
            # 使用OpenSSL生成自签名CA证书
            openssl_cmd = [
                'openssl', 'req', '-new', '-x509', '-keyout', key_filename,
                '-out', crt_filename, '-days', '365', '-nodes',
                '-subj', f'/CN={common_name}/O={organization}/C=US'
            ]
            
            result = subprocess.run(openssl_cmd, 
                                  capture_output=True, 
                                  text=True, 
                                  timeout=30)
            
            if result.returncode == 0:
                # 移动到永久位置
                final_key_path = os.path.join(self.certificates_dir, "ca.key")
                final_cert_path = os.path.join(self.certificates_dir, "ca.crt")
                
                os.rename(key_filename, final_key_path)
                os.rename(crt_filename, final_cert_path)
                
                self.ca_key_path = final_key_path
                self.ca_cert_path = final_cert_path
                
                print(f"✅ CA证书生成成功: {final_cert_path}")
                print(f"🔑 私钥文件: {final_key_path}")
                
                return final_key_path, final_cert_path
            else:
                print(f"❌ 证书生成失败: {result.stderr}")
                # 清理临时文件
                os.unlink(key_filename)
                os.unlink(crt_filename)
                return None, None
                
        except subprocess.TimeoutExpired:
            print("❌ 证书生成超时")
            return None, None
        except FileNotFoundError:
            print("❌ 未找到OpenSSL,请确保已安装OpenSSL并添加到PATH")
            return None, None
        except Exception as e:
            print(f"❌ 证书生成异常: {e}")
            return None, None
    
    def get_certificate_hash(self, cert_path: str) -> str:
        """获取证书的哈希值(用于Android系统证书命名)"""
        try:
            with open(cert_path, 'rb') as f:
                cert_data = f.read()
            
            # 解析证书
            cert = x509.load_pem_x509_certificate(cert_data, default_backend())
            
            # 计算subject哈希(使用旧算法)
            subject_bytes = cert.subject.public_bytes(default_backend())
            hash_digest = hashes.Hash(hashes.MD5(), default_backend())
            hash_digest.update(subject_bytes)
            hash_bytes = hash_digest.finalize()
            
            # 转换为十六进制并格式化
            cert_hash = hash_bytes.hex()
            return cert_hash
        except Exception as e:
            print(f"❌ 获取证书哈希失败: {e}")
            # 如果cryptography不可用,尝试使用OpenSSL
            try:
                result = subprocess.run([
                    'openssl', 'x509', '-inform', 'PEM', 
                    '-subject_hash_old', '-in', cert_path
                ], capture_output=True, text=True, timeout=10)
                
                if result.returncode == 0:
                    return result.stdout.strip().split('\n')[0]
            except:
                pass
            return None
    
    def prepare_android_certificate(self, cert_path: str) -> str:
        """准备Android系统证书格式"""
        cert_hash = self.get_certificate_hash(cert_path)
        if not cert_hash:
            print("❌ 无法获取证书哈希")
            return None
        
        # 创建Android格式的证书文件名
        android_cert_name = f"{cert_hash}.0"
        android_cert_path = os.path.join(self.certificates_dir, android_cert_name)
        
        # 复制证书文件
        import shutil
        shutil.copy2(cert_path, android_cert_path)
        
        print(f"✅ Android证书准备完成: {android_cert_path}")
        return android_cert_path
    
    def install_cert_to_device(self, cert_path: str, use_adb: bool = True) -> bool:
        """安装证书到设备"""
        try:
            if not use_adb:
                print("ℹ️  手动安装模式 - 请将证书传输到设备并手动安装")
                print(f"📍 证书位置: {cert_path}")
                return True
            
            # 推送证书到设备临时目录
            remote_temp_path = f'/sdcard/{os.path.basename(cert_path)}'
            push_result = subprocess.run([
                'adb', 'push', cert_path, remote_temp_path
            ], capture_output=True, text=True)
            
            if push_result.returncode != 0:
                print(f"❌ 推送证书失败: {push_result.stderr}")
                return False
            
            print(f"✅ 证书已推送至设备: {remote_temp_path}")
            
            # 检查是否需要ROOT权限安装到系统目录
            # 对于Android 7+,证书需要安装到系统证书目录才能被应用信任
            try:
                # 尝试获取ROOT权限
                root_check = subprocess.run([
                    'adb', 'shell', 'su', '-c', 'id'
                ], capture_output=True, text=True, timeout=10)
                
                if 'uid=0' in root_check.stdout:
                    print("🔐 设备已ROOT,尝试安装到系统目录")
                    
                    # 计算证书哈希并重命名
                    cert_hash = self.get_certificate_hash(cert_path)
                    if cert_hash:
                        system_cert_name = f"/system/etc/security/cacerts/{cert_hash}.0"
                        
                        # 执行ROOT安装命令
                        install_cmd = [
                            'adb', 'shell', 'su', '-c',
                            f'mount -o rw,remount /system && '
                            f'cp {remote_temp_path} {system_cert_name} && '
                            f'chmod 644 {system_cert_name}'
                        ]
                        
                        install_result = subprocess.run(
                            [' '.join(install_cmd)],
                            capture_output=True, 
                            text=True, 
                            shell=True
                        )
                        
                        if install_result.returncode == 0:
                            print(f"✅ 证书已安装到系统目录: {system_cert_name}")
                            self.device_cert_installed = True
                            
                            # 清理临时文件
                            subprocess.run(['adb', 'shell', f'rm {remote_temp_path}'], 
                                         capture_output=True)
                            
                            return True
                        else:
                            print(f"⚠️  系统目录安装失败: {install_result.stderr}")
                            print("ℹ️  请手动安装证书或使用非ROOT模式")
                
            except subprocess.TimeoutExpired:
                print("⏰ ROOT权限检查超时")
            except Exception as e:
                print(f"⚠️  ROOT安装过程中出错: {e}")
            
            # 如果ROOT安装失败,提供手动安装指导
            print("\n📋 手动安装证书步骤:")
            print("1. 打开设备上的 '设置' -> '安全' -> '加密与凭据' -> '从存储设备安装'")
            print("2. 选择证书文件进行安装")
            print("3. 设置证书名称和用途(一般选择VPN和应用)")
            
            return True  # 任务交给用户手动完成
            
        except Exception as e:
            print(f"❌ 安装证书失败: {e}")
            return False
    
    def setup_proxy_certificate(self, proxy_port: int = 8888) -> bool:
        """配置代理证书"""
        print(f"🔧 开始配置代理证书 (端口: {proxy_port})")
        
        # 生成CA证书
        key_path, cert_path = self.generate_ca_certificate()
        if not key_path:
            print("❌ 证书生成失败,无法配置代理证书")
            return False
        
        # 准备Android格式证书
        android_cert_path = self.prepare_android_certificate(cert_path)
        if not android_cert_path:
            print("❌ Android证书准备失败")
            return False
        
        # 安装到设备
        success = self.install_cert_to_device(android_cert_path)
        
        if success:
            print(f"✅ 代理证书配置完成!")
            print(f"📍 证书文件: {cert_path}")
            print(f"📍 Android证书: {android_cert_path}")
            print(f"📋 请确保代理工具(如Fiddler/Charles/Mitmproxy)使用相同CA证书")
        
        return success

# 使用示例
def setup_https_interception():
    """设置HTTPS拦截的完整流程"""
    print("🚀 开始设置HTTPS流量拦截...")
    
    cert_manager = CertificateManager()
    
    # 生成并安装证书
    success = cert_manager.setup_proxy_certificate(proxy_port=8888)
    
    if success:
        print("\n🎉 HTTPS拦截设置完成!")
        print("\n📋 后续配置步骤:")
        print("1. 启动代理工具(Fiddler/Charles/Mitmproxy)")
        print("2. 配置移动设备代理指向电脑IP:8888")
        print("3. 确保证书已在设备中正确安装并信任")
        print("4. 开始监控HTTPS流量")
    else:
        print("\n❌ HTTPS拦截设置失败,请检查错误信息")

if __name__ == "__main__":
    setup_https_interception()