""" run_pipeline.py ─────────────── Master pipeline script that orchestrates the full workflow: Phase 1: Extract frames from videos (adaptive sampling + YOLO detection) Phase 2: Split dataset by camera (train/test) Phase 3: Train yolo26n.pt on the dataset Designed to run overnight — resumable from any phase. Usage: python run_pipeline.py # Run full pipeline python run_pipeline.py --phase 2 # Start from phase 2 (skip extraction) python run_pipeline.py --phase 3 # Start from phase 3 (skip extract + split) python run_pipeline.py --extract-only # Only extract (no split or train) """ import os import sys import argparse import logging import traceback from datetime import datetime import pipeline_config as cfg # ────────────────────────────────────────────── # Logging # ────────────────────────────────────────────── os.makedirs(cfg.LOG_DIR, exist_ok=True) log_file = os.path.join(cfg.LOG_DIR, f"pipeline_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log") logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[ logging.FileHandler(log_file), logging.StreamHandler(sys.stdout), ], ) logger = logging.getLogger(__name__) def phase1_extract(): """Phase 1: Extract dataset from videos.""" logger.info("\n" + "=" * 60) logger.info("= PHASE 1: DATASET EXTRACTION") logger.info("=" * 60 + "\n") from extract_dataset import extract_all stats = extract_all() return stats def phase2_split(): """Phase 2: Split dataset by camera.""" logger.info("\n" + "=" * 60) logger.info("= PHASE 2: CAMERA-LEVEL TRAIN/TEST SPLIT") logger.info("=" * 60 + "\n") from split_dataset import split_dataset split_info = split_dataset() return split_info def phase3_train(): """Phase 3: Train model.""" logger.info("\n" + "=" * 60) logger.info("= PHASE 3: MODEL TRAINING") logger.info("=" * 60 + "\n") from train_model import train_model best_weights = train_model() return best_weights def run_pipeline(start_phase: int = 1, extract_only: bool = False): """Run the full pipeline from the specified starting phase.""" pipeline_start = datetime.now() logger.info("+" + "-" * 58 + "+") logger.info("| PERSON DETECTION PIPELINE |") logger.info("| " + f"Started: {pipeline_start.strftime('%Y-%m-%d %H:%M:%S')}".ljust(57) + "|") logger.info("+" + "-" * 58 + "+") logger.info("") logger.info(f"Configuration:") logger.info(f" Video directory: {cfg.VIDEO_DIR}") logger.info(f" Dataset output: {cfg.DATASET_DIR}") logger.info(f" Detector model: {cfg.DETECTOR_MODEL}") logger.info(f" Training model: {cfg.TRAIN_MODEL}") logger.info(f" Max dataset size: {cfg.MAX_DATASET_SIZE_GB} GB") logger.info(f" Starting phase: {start_phase}") logger.info(f" Extract only: {extract_only}") logger.info("") try: # Phase 1: Extract if start_phase <= 1: p1_start = datetime.now() phase1_extract() p1_duration = datetime.now() - p1_start logger.info(f"\nPhase 1 completed in {p1_duration}") if extract_only: logger.info("Extract-only mode — stopping after Phase 1.") return # Phase 2: Split if start_phase <= 2: p2_start = datetime.now() phase2_split() p2_duration = datetime.now() - p2_start logger.info(f"\nPhase 2 completed in {p2_duration}") # Phase 3: Train if start_phase <= 3: p3_start = datetime.now() best_weights = phase3_train() p3_duration = datetime.now() - p3_start logger.info(f"\nPhase 3 completed in {p3_duration}") # Final summary pipeline_end = datetime.now() total_duration = pipeline_end - pipeline_start logger.info("\n" + "+" + "-" * 58 + "+") logger.info("| PIPELINE COMPLETED SUCCESSFULLY |") logger.info("| " + f"Duration: {total_duration}".ljust(57) + "|") logger.info("+" + "-" * 58 + "+") except KeyboardInterrupt: logger.warning("\n\nPipeline interrupted by user. Progress has been checkpointed.") logger.warning("Re-run to resume from where you left off.") sys.exit(1) except Exception as e: logger.error(f"\n\nPipeline failed with error: {e}") logger.error(traceback.format_exc()) logger.error("Progress has been checkpointed. Fix the error and re-run.") sys.exit(1) def main(): parser = argparse.ArgumentParser( description="Person detection dataset pipeline", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: python run_pipeline.py Run full pipeline python run_pipeline.py --phase 2 Start from split phase python run_pipeline.py --phase 3 Start from training phase python run_pipeline.py --extract-only Only extract dataset """, ) parser.add_argument( "--phase", type=int, default=1, choices=[1, 2, 3], help="Starting phase (1=extract, 2=split, 3=train)", ) parser.add_argument( "--extract-only", action="store_true", help="Only run extraction phase", ) args = parser.parse_args() run_pipeline(start_phase=args.phase, extract_only=args.extract_only) if __name__ == "__main__": main()