抖音自动点赞与评论爬虫项目

本项目展示如何使用前面学到的技术来构建一个完整的抖音自动化爬虫系统。

项目架构设计

# douyin_automation_system.py
import uiautomator2 as u2
import time
import random
import json
import sqlite3
from datetime import datetime, timedelta
from typing import Optional, Dict, List
import logging
from dataclasses import dataclass
from enum import Enum

# 配置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class ActionType(Enum):
    LIKE = "like"
    COMMENT = "comment"
    FOLLOW = "follow"
    SHARE = "share"
    SCROLL = "scroll"

@dataclass
class ActionConfig:
    """动作配置类"""
    like_probability: float = 0.8
    comment_probability: float = 0.2
    follow_probability: float = 0.05
    share_probability: float = 0.02
    scroll_probability: float = 1.0
    comment_texts: List[str] = None
    
    def __post_init__(self):
        if self.comment_texts is None:
            self.comment_texts = [
                "不错👍", "好看!", "支持一下", "厉害了", "666",
                "学到了", "收藏了", "转走", "顶", "马克",
                "优秀!", "棒棒哒", "安排!", "稳", "牛逼"
            ]

class DatabaseManager:
    """数据库管理器 - 存储爬取的数据"""
    
    def __init__(self, db_path: str = "douyin_data.db"):
        self.db_path = db_path
        self.init_database()
    
    def init_database(self):
        """初始化数据库"""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        # 创建视频信息表
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS videos (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                aweme_id TEXT UNIQUE,
                desc TEXT,
                create_time INTEGER,
                author_nickname TEXT,
                author_uid TEXT,
                play_count INTEGER,
                like_count INTEGER,
                comment_count INTEGER,
                share_count INTEGER,
                music_title TEXT,
                video_url TEXT,
                cover_url TEXT,
                collected_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        ''')
        
        # 创建互动记录表
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS interactions (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                aweme_id TEXT,
                action_type TEXT,
                success BOOLEAN,
                timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                FOREIGN KEY (aweme_id) REFERENCES videos (aweme_id)
            )
        ''')
        
        # 创建评论记录表
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS comments (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                aweme_id TEXT,
                comment_text TEXT,
                success BOOLEAN,
                timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                FOREIGN KEY (aweme_id) REFERENCES videos (aweme_id)
            )
        ''')
        
        conn.commit()
        conn.close()
        logger.info("数据库初始化完成")
    
    def save_video_info(self, video_info: Dict):
        """保存视频信息"""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        try:
            cursor.execute('''
                INSERT OR REPLACE INTO videos 
                (aweme_id, desc, create_time, author_nickname, author_uid, 
                 play_count, like_count, comment_count, share_count, music_title, 
                 video_url, cover_url)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
            ''', (
                video_info.get('aweme_id'),
                video_info.get('desc', ''),
                video_info.get('create_time'),
                video_info.get('author', {}).get('nickname', ''),
                video_info.get('author', {}).get('uid', ''),
                video_info.get('stats', {}).get('play_count', 0),
                video_info.get('stats', {}).get('like_count', 0),
                video_info.get('stats', {}).get('comment_count', 0),
                video_info.get('stats', {}).get('share_count', 0),
                video_info.get('music', {}).get('title', ''),
                video_info.get('video', {}).get('play_addr', ''),
                video_info.get('video', {}).get('cover', '')
            ))
            conn.commit()
            logger.info(f"视频信息已保存: {video_info.get('aweme_id')}")
        except Exception as e:
            logger.error(f"保存视频信息失败: {e}")
        finally:
            conn.close()
    
    def save_interaction(self, aweme_id: str, action_type: str, success: bool):
        """保存互动记录"""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        try:
            cursor.execute('''
                INSERT INTO interactions (aweme_id, action_type, success)
                VALUES (?, ?, ?)
            ''', (aweme_id, action_type, success))
            conn.commit()
            logger.info(f"互动记录已保存: {action_type} - {success}")
        except Exception as e:
            logger.error(f"保存互动记录失败: {e}")
        finally:
            conn.close()
    
    def save_comment(self, aweme_id: str, comment_text: str, success: bool):
        """保存评论记录"""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        try:
            cursor.execute('''
                INSERT INTO comments (aweme_id, comment_text, success)
                VALUES (?, ?, ?)
            ''', (aweme_id, comment_text, success))
            conn.commit()
            logger.info(f"评论记录已保存: {comment_text} - {success}")
        except Exception as e:
            logger.error(f"保存评论记录失败: {e}")
        finally:
            conn.close()

class DouyinAutoBot:
    """抖音自动化机器人"""
    
    def __init__(self, device_id: Optional[str] = None, db_path: str = "douyin_data.db"):
        self.device_id = device_id
        self.db_manager = DatabaseManager(db_path)
        self.d = None
        self.action_config = ActionConfig()
        self.video_counter = 0
        self.daily_limit = 100  # 每日限制
        self.daily_count = 0
        
        self.setup_device()
    
    def setup_device(self):
        """设置设备连接"""
        try:
            if self.device_id:
                self.d = u2.connect(self.device_id)
            else:
                self.d = u2.connect()
            
            if self.d:
                logger.info(f"设备连接成功: {self.d.serial}")
                logger.info(f"设备信息: {self.d.info}")
            else:
                logger.error("设备连接失败")
                
        except Exception as e:
            logger.error(f"设备连接失败: {e}")
    
    def launch_douyin(self):
        """启动抖音"""
        try:
            self.d.app_start("com.ss.android.ugc.aweme")
            time.sleep(8)  # 等待应用完全启动
            logger.info("抖音应用已启动")
            return True
        except Exception as e:
            logger.error(f"启动抖音失败: {e}")
            return False
    
    def scroll_to_next_video(self):
        """滑动到下一个视频"""
        try:
            w, h = self.d.window_size()
            # 从屏幕底部向上滑动
            self.d.swipe(w//2, h*4//5, w//2, h//5, 0.8)
            time.sleep(random.uniform(2, 4))  # 随机等待
            self.video_counter += 1
            logger.info(f"滑动到第 {self.video_counter} 个视频")
            return True
        except Exception as e:
            logger.error(f"滑动失败: {e}")
            return False
    
    def like_current_video(self) -> bool:
        """点赞当前视频"""
        try:
            # 点赞按钮位置(根据屏幕尺寸计算)
            w, h = self.d.window_size()
            x, y = w*4//5, h//2 + 100
            
            # 点击点赞按钮
            self.d.click(x, y)
            time.sleep(0.5)
            
            # 检查是否成功点赞(可以通过颜色变化或其他标识判断)
            logger.info("执行点赞操作")
            self.db_manager.save_interaction("current_video_id", ActionType.LIKE.value, True)
            return True
        except Exception as e:
            logger.error(f"点赞失败: {e}")
            self.db_manager.save_interaction("current_video_id", ActionType.LIKE.value, False)
            return False
    
    def comment_on_video(self, comment_text: str = None) -> bool:
        """评论当前视频"""
        if comment_text is None:
            comment_text = random.choice(self.action_config.comment_texts)
        
        try:
            # 点击评论按钮
            w, h = self.d.window_size()
            comment_x, comment_y = w*4//5, h//2 + 200
            
            self.d.click(comment_x, comment_y)
            time.sleep(2)
            
            # 点击输入框
            input_x, input_y = w//2, h*5//6
            self.d.click(input_x, input_y)
            time.sleep(1)
            
            # 输入评论
            self.d.send_keys(comment_text)
            time.sleep(1)
            
            # 点击发送按钮
            send_x, send_y = w*9//10, h*5//6
            self.d.click(send_x, send_y)
            time.sleep(2)
            
            logger.info(f"评论成功: {comment_text}")
            self.db_manager.save_comment("current_video_id", comment_text, True)
            self.db_manager.save_interaction("current_video_id", ActionType.COMMENT.value, True)
            
            # 返回
            self.d.press("back")
            return True
        except Exception as e:
            logger.error(f"评论失败: {e}")
            self.db_manager.save_comment("current_video_id", comment_text, False)
            self.db_manager.save_interaction("current_video_id", ActionType.COMMENT.value, False)
            # 确保返回
            self.d.press("back")
            return False
    
    def collect_video_info(self) -> Dict:
        """收集当前视频信息(模拟)"""
        # 在实际应用中,这里应该通过OCR或页面解析获取真实信息
        # 这里使用模拟数据
        video_info = {
            'aweme_id': f"video_{self.video_counter}_{int(time.time())}",
            'desc': f"视频描述 {self.video_counter}",
            'create_time': int(time.time()),
            'author': {
                'nickname': f"作者{random.randint(1000, 9999)}",
                'uid': f"uid_{random.randint(100000, 999999)}"
            },
            'stats': {
                'play_count': random.randint(1000, 100000),
                'like_count': random.randint(100, 5000),
                'comment_count': random.randint(10, 200),
                'share_count': random.randint(5, 100)
            },
            'music': {
                'title': f"音乐{random.randint(1, 100)}"
            },
            'video': {
                'play_addr': f"http://example.com/video/{self.video_counter}",
                'cover': f"http://example.com/cover/{self.video_counter}.jpg"
            }
        }
        
        # 保存视频信息到数据库
        self.db_manager.save_video_info(video_info)
        return video_info
    
    def execute_random_action(self) -> bool:
        """执行随机动作"""
        if self.daily_count >= self.daily_limit:
            logger.warning("达到每日操作限制")
            return False
        
        # 根据配置的概率执行不同动作
        rand_val = random.random()
        
        if rand_val < self.action_config.like_probability:
            success = self.like_current_video()
            action = "点赞"
        elif rand_val < self.action_config.like_probability + self.action_config.comment_probability:
            success = self.comment_on_video()
            action = "评论"
        elif rand_val < self.action_config.like_probability + self.action_config.comment_probability + self.action_config.follow_probability:
            # 关注功能(简化实现)
            logger.info("执行关注操作(模拟)")
            success = True
            action = "关注"
            self.db_manager.save_interaction("current_video_id", ActionType.FOLLOW.value, True)
        elif rand_val < self.action_config.like_probability + self.action_config.comment_probability + self.action_config.follow_probability + self.action_config.share_probability:
            # 分享功能(简化实现)
            logger.info("执行分享操作(模拟)")
            success = True
            action = "分享"
            self.db_manager.save_interaction("current_video_id", ActionType.SHARE.value, True)
        else:
            logger.info("执行滑动操作")
            success = True
            action = "滑动"
        
        if success:
            self.daily_count += 1
            logger.info(f"执行{action}操作成功")
        
        return success
    
    def run_session(self, duration_minutes: int = 60, max_videos: int = 50):
        """运行一个会话"""
        logger.info(f"开始抖音自动化会话,预计时长: {duration_minutes}分钟,最大视频数: {max_videos}")
        
        start_time = time.time()
        video_count = 0
        
        while (time.time() - start_time < duration_minutes * 60 and 
               video_count < max_videos and 
               self.daily_count < self.daily_limit):
            
            # 滑动到下一个视频
            if not self.scroll_to_next_video():
                logger.error("滑动视频失败,跳过本次循环")
                time.sleep(5)
                continue
            
            # 收集视频信息
            video_info = self.collect_video_info()
            
            # 执行随机动作
            self.execute_random_action()
            
            # 随机等待
            wait_time = random.uniform(5, 15)
            logger.info(f"等待 {wait_time:.1f} 秒")
            time.sleep(wait_time)
            
            video_count += 1
        
        logger.info(f"会话结束,共处理 {video_count} 个视频")
    
    def run_continuous(self, session_duration: int = 45, break_duration: int = 15):
        """连续运行(带休息间隔)"""
        logger.info("开始连续运行模式")
        
        try:
            while True:
                # 运行一个会话
                self.run_session(duration_minutes=session_duration, max_videos=30)
                
                # 检查是否达到每日限制
                if self.daily_count >= self.daily_limit:
                    logger.info("达到每日操作限制,等待到明天")
                    # 计算到明天的时间
                    tomorrow = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(days=1)
                    sleep_time = (tomorrow - datetime.now()).total_seconds()
                    time.sleep(sleep_time)
                    self.daily_count = 0  # 重置计数
                    continue
                
                logger.info(f"休息 {break_duration} 分钟")
                time.sleep(break_duration * 60)
                
        except KeyboardInterrupt:
            logger.info("用户中断自动化")
        except Exception as e:
            logger.error(f"运行过程中出现错误: {e}")

def main():
    """主函数"""
    logger.info("抖音自动化系统启动")
    
    # 创建机器人实例
    bot = DouyinAutoBot()
    
    # 配置动作概率
    bot.action_config = ActionConfig(
        like_probability=0.8,
        comment_probability=0.15,
        follow_probability=0.05,
        share_probability=0.02
    )
    
    # 启动抖音
    if bot.launch_douyin():
        # 运行单次会话
        bot.run_session(duration_minutes=30, max_videos=20)
        
        # 或者运行连续模式(注释掉上面的run_session)
        # bot.run_continuous(session_duration=30, break_duration=10)
    
    logger.info("抖音自动化系统结束")

if __name__ == "__main__":
    main()

项目配置与部署

# config.py - 项目配置文件
import os
from dataclasses import dataclass
from typing import List

@dataclass
class AppConfig:
    """应用配置"""
    # 设备配置
    device_id: str = ""  # 留空使用默认设备
    auto_connect: bool = True
    
    # 动作配置
    like_probability: float = 0.8
    comment_probability: float = 0.15
    follow_probability: float = 0.05
    share_probability: float = 0.02
    
    # 时间配置
    session_duration: int = 45  # 单次会话时长(分钟)
    break_duration: int = 15    # 休息时长(分钟)
    max_videos_per_session: int = 30
    
    # 限制配置
    daily_action_limit: int = 100  # 每日操作限制
    
    # 数据库配置
    database_path: str = "douyin_data.db"
    
    # 日志配置
    log_level: str = "INFO"
    log_file: str = "douyin_bot.log"

# requirements.txt - 依赖包
requirements_content = """
uiautomator2>=2.16.22
opencv-python>=4.5.0
numpy>=1.19.0
pillow>=8.0.0
sqlite3
typing-extensions
"""

# docker-compose.yml - Docker部署配置
docker_compose_content = """
version: '3.8'

services:
  douyin-bot:
    build: .
    container_name: douyin-automation
    volumes:
      - ./data:/app/data
      - /dev/bus/usb:/dev/bus/usb  # USB设备访问(如果需要连接手机)
    environment:
      - PYTHONUNBUFFERED=1
    restart: unless-stopped
    privileged: true  # 需要特权模式访问USB设备
"""

# Dockerfile
dockerfile_content = """
FROM python:3.9-slim

WORKDIR /app

COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# 安装ADB(如果需要)
RUN apt-get update && \\
    apt-get install -y android-tools-adb && \\
    rm -rf /var/lib/apt/lists/*

COPY . .

CMD ["python", "douyin_automation_system.py"]
"""

def create_project_structure():
    """创建项目结构"""
    import os
    
    # 创建项目目录结构
    directories = [
        "data",
        "logs", 
        "screenshots",
        "templates"  # 用于Airtest图像模板
    ]
    
    for directory in directories:
        os.makedirs(directory, exist_ok=True)
        print(f"📁 创建目录: {directory}")
    
    # 创建配置文件
    with open("config.py", "w", encoding="utf-8") as f:
        f.write("""
# 项目配置文件
from dataclasses import dataclass
from typing import List

@dataclass
class BotConfig:
    device_id: str = ""
    like_prob: float = 0.8
    comment_prob: float = 0.15
    follow_prob: float = 0.05
    session_duration: int = 30
    daily_limit: int = 100
""")
    
    # 创建需求文件
    with open("requirements.txt", "w", encoding="utf-8") as f:
        f.write(requirements_content)
    
    print("✅ 项目结构创建完成!")

if __name__ == "__main__":
    create_project_structure()

项目监控与统计

# analytics.py - 数据分析和统计
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import seaborn as sns
from typing import Dict, List

class DouyinAnalytics:
    """抖音数据分析器"""
    
    def __init__(self, db_path: str = "douyin_data.db"):
        self.db_path = db_path
    
    def get_daily_statistics(self, days: int = 7) -> Dict:
        """获取每日统计数据"""
        conn = sqlite3.connect(self.db_path)
        
        # 获取最近几天的互动统计
        query = """
        SELECT 
            date(timestamp) as date,
            action_type,
            count(*) as count,
            avg(success) as success_rate
        FROM interactions 
        WHERE timestamp >= date('now', '-{} days')
        GROUP BY date(timestamp), action_type
        ORDER BY date DESC
        """.format(days)
        
        df = pd.read_sql_query(query, conn)
        conn.close()
        
        stats = {
            'total_interactions': len(df),
            'success_rate': df['success_rate'].mean() if not df.empty else 0,
            'action_distribution': df.groupby('action_type')['count'].sum().to_dict(),
            'daily_trend': df.groupby('date')['count'].sum().to_dict()
        }
        
        return stats
    
    def get_top_videos(self, limit: int = 10) -> List[Dict]:
        """获取热门视频"""
        conn = sqlite3.connect(self.db_path)
        
        query = """
        SELECT * FROM videos 
        ORDER BY like_count DESC, comment_count DESC
        LIMIT {}
        """.format(limit)
        
        df = pd.read_sql_query(query, conn)
        conn.close()
        
        return df.to_dict('records')
    
    def generate_report(self) -> str:
        """生成分析报告"""
        stats = self.get_daily_statistics()
        top_videos = self.get_top_videos()
        
        report = f"""
=== 抖音自动化机器人分析报告 ===

📊 总体统计:
- 总互动数: {stats['total_interactions']}
- 平均成功率: {stats['success_rate']:.2%}
- 动作分布: {stats['action_distribution']}

🔥 热门视频TOP5:
"""
        for i, video in enumerate(top_videos[:5], 1):
            report += f"{i}. {video.get('desc', '')[:30]}... - 点赞:{video.get('like_count', 0)}, 评论:{video.get('comment_count', 0)}\n"
        
        return report
    
    def plot_interactions_trend(self):
        """绘制互动趋势图"""
        conn = sqlite3.connect(self.db_path)
        
        query = """
        SELECT 
            date(timestamp) as date,
            action_type,
            count(*) as count
        FROM interactions 
        WHERE timestamp >= date('now', '-30 days')
        GROUP BY date(timestamp), action_type
        ORDER BY date
        """
        
        df = pd.read_sql_query(query, conn)
        conn.close()
        
        if df.empty:
            print("暂无数据可绘制")
            return
        
        plt.figure(figsize=(12, 6))
        
        # 按动作类型分组绘图
        for action_type in df['action_type'].unique():
            action_data = df[df['action_type'] == action_type]
            plt.plot(action_data['date'], action_data['count'], label=action_type, marker='o')
        
        plt.title('互动操作趋势图 (最近30天)')
        plt.xlabel('日期')
        plt.ylabel('操作次数')
        plt.legend()
        plt.xticks(rotation=45)
        plt.tight_layout()
        
        plt.savefig('interaction_trend.png', dpi=300, bbox_inches='tight')
        plt.show()
        
        print("📈 趋势图已保存为 interaction_trend.png")

def run_analytics():
    """运行数据分析"""
    analytics = DouyinAnalytics()
    
    # 生成报告
    report = analytics.generate_report()
    print(report)
    
    # 绘制图表
    analytics.plot_interactions_trend()

if __name__ == "__main__":
    run_analytics()