utopia-surveillance-tool/run_pipeline.py

174 lines
5.7 KiB
Python

"""
run_pipeline.py
───────────────
Master pipeline script that orchestrates the full workflow:
Phase 1: Extract frames from videos (adaptive sampling + YOLO detection)
Phase 2: Split dataset by camera (train/test)
Phase 3: Train yolo26n.pt on the dataset
Designed to run overnight — resumable from any phase.
Usage:
python run_pipeline.py # Run full pipeline
python run_pipeline.py --phase 2 # Start from phase 2 (skip extraction)
python run_pipeline.py --phase 3 # Start from phase 3 (skip extract + split)
python run_pipeline.py --extract-only # Only extract (no split or train)
"""
import os
import sys
import argparse
import logging
import traceback
from datetime import datetime
import pipeline_config as cfg
# ──────────────────────────────────────────────
# Logging
# ──────────────────────────────────────────────
os.makedirs(cfg.LOG_DIR, exist_ok=True)
log_file = os.path.join(cfg.LOG_DIR, f"pipeline_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.FileHandler(log_file),
logging.StreamHandler(sys.stdout),
],
)
logger = logging.getLogger(__name__)
def phase1_extract():
"""Phase 1: Extract dataset from videos."""
logger.info("\n" + "=" * 60)
logger.info("= PHASE 1: DATASET EXTRACTION")
logger.info("=" * 60 + "\n")
from extract_dataset import extract_all
stats = extract_all()
return stats
def phase2_split():
"""Phase 2: Split dataset by camera."""
logger.info("\n" + "=" * 60)
logger.info("= PHASE 2: CAMERA-LEVEL TRAIN/TEST SPLIT")
logger.info("=" * 60 + "\n")
from split_dataset import split_dataset
split_info = split_dataset()
return split_info
def phase3_train():
"""Phase 3: Train model."""
logger.info("\n" + "=" * 60)
logger.info("= PHASE 3: MODEL TRAINING")
logger.info("=" * 60 + "\n")
from train_model import train_model
best_weights = train_model()
return best_weights
def run_pipeline(start_phase: int = 1, extract_only: bool = False):
"""Run the full pipeline from the specified starting phase."""
pipeline_start = datetime.now()
logger.info("+" + "-" * 58 + "+")
logger.info("| PERSON DETECTION PIPELINE |")
logger.info("| " + f"Started: {pipeline_start.strftime('%Y-%m-%d %H:%M:%S')}".ljust(57) + "|")
logger.info("+" + "-" * 58 + "+")
logger.info("")
logger.info(f"Configuration:")
logger.info(f" Video directory: {cfg.VIDEO_DIR}")
logger.info(f" Dataset output: {cfg.DATASET_DIR}")
logger.info(f" Detector model: {cfg.DETECTOR_MODEL}")
logger.info(f" Training model: {cfg.TRAIN_MODEL}")
logger.info(f" Max dataset size: {cfg.MAX_DATASET_SIZE_GB} GB")
logger.info(f" Starting phase: {start_phase}")
logger.info(f" Extract only: {extract_only}")
logger.info("")
try:
# Phase 1: Extract
if start_phase <= 1:
p1_start = datetime.now()
phase1_extract()
p1_duration = datetime.now() - p1_start
logger.info(f"\nPhase 1 completed in {p1_duration}")
if extract_only:
logger.info("Extract-only mode — stopping after Phase 1.")
return
# Phase 2: Split
if start_phase <= 2:
p2_start = datetime.now()
phase2_split()
p2_duration = datetime.now() - p2_start
logger.info(f"\nPhase 2 completed in {p2_duration}")
# Phase 3: Train
if start_phase <= 3:
p3_start = datetime.now()
best_weights = phase3_train()
p3_duration = datetime.now() - p3_start
logger.info(f"\nPhase 3 completed in {p3_duration}")
# Final summary
pipeline_end = datetime.now()
total_duration = pipeline_end - pipeline_start
logger.info("\n" + "+" + "-" * 58 + "+")
logger.info("| PIPELINE COMPLETED SUCCESSFULLY |")
logger.info("| " + f"Duration: {total_duration}".ljust(57) + "|")
logger.info("+" + "-" * 58 + "+")
except KeyboardInterrupt:
logger.warning("\n\nPipeline interrupted by user. Progress has been checkpointed.")
logger.warning("Re-run to resume from where you left off.")
sys.exit(1)
except Exception as e:
logger.error(f"\n\nPipeline failed with error: {e}")
logger.error(traceback.format_exc())
logger.error("Progress has been checkpointed. Fix the error and re-run.")
sys.exit(1)
def main():
parser = argparse.ArgumentParser(
description="Person detection dataset pipeline",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python run_pipeline.py Run full pipeline
python run_pipeline.py --phase 2 Start from split phase
python run_pipeline.py --phase 3 Start from training phase
python run_pipeline.py --extract-only Only extract dataset
""",
)
parser.add_argument(
"--phase",
type=int,
default=1,
choices=[1, 2, 3],
help="Starting phase (1=extract, 2=split, 3=train)",
)
parser.add_argument(
"--extract-only",
action="store_true",
help="Only run extraction phase",
)
args = parser.parse_args()
run_pipeline(start_phase=args.phase, extract_only=args.extract_only)
if __name__ == "__main__":
main()