174 lines
5.7 KiB
Python
174 lines
5.7 KiB
Python
"""
|
|
run_pipeline.py
|
|
───────────────
|
|
Master pipeline script that orchestrates the full workflow:
|
|
|
|
Phase 1: Extract frames from videos (adaptive sampling + YOLO detection)
|
|
Phase 2: Split dataset by camera (train/test)
|
|
Phase 3: Train yolo26n.pt on the dataset
|
|
|
|
Designed to run overnight — resumable from any phase.
|
|
|
|
Usage:
|
|
python run_pipeline.py # Run full pipeline
|
|
python run_pipeline.py --phase 2 # Start from phase 2 (skip extraction)
|
|
python run_pipeline.py --phase 3 # Start from phase 3 (skip extract + split)
|
|
python run_pipeline.py --extract-only # Only extract (no split or train)
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import argparse
|
|
import logging
|
|
import traceback
|
|
from datetime import datetime
|
|
|
|
import pipeline_config as cfg
|
|
|
|
# ──────────────────────────────────────────────
|
|
# Logging
|
|
# ──────────────────────────────────────────────
|
|
os.makedirs(cfg.LOG_DIR, exist_ok=True)
|
|
log_file = os.path.join(cfg.LOG_DIR, f"pipeline_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s [%(levelname)s] %(message)s",
|
|
handlers=[
|
|
logging.FileHandler(log_file),
|
|
logging.StreamHandler(sys.stdout),
|
|
],
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def phase1_extract():
|
|
"""Phase 1: Extract dataset from videos."""
|
|
logger.info("\n" + "=" * 60)
|
|
logger.info("= PHASE 1: DATASET EXTRACTION")
|
|
logger.info("=" * 60 + "\n")
|
|
|
|
from extract_dataset import extract_all
|
|
stats = extract_all()
|
|
return stats
|
|
|
|
|
|
def phase2_split():
|
|
"""Phase 2: Split dataset by camera."""
|
|
logger.info("\n" + "=" * 60)
|
|
logger.info("= PHASE 2: CAMERA-LEVEL TRAIN/TEST SPLIT")
|
|
logger.info("=" * 60 + "\n")
|
|
|
|
from split_dataset import split_dataset
|
|
split_info = split_dataset()
|
|
return split_info
|
|
|
|
|
|
def phase3_train():
|
|
"""Phase 3: Train model."""
|
|
logger.info("\n" + "=" * 60)
|
|
logger.info("= PHASE 3: MODEL TRAINING")
|
|
logger.info("=" * 60 + "\n")
|
|
|
|
from train_model import train_model
|
|
best_weights = train_model()
|
|
return best_weights
|
|
|
|
|
|
def run_pipeline(start_phase: int = 1, extract_only: bool = False):
|
|
"""Run the full pipeline from the specified starting phase."""
|
|
pipeline_start = datetime.now()
|
|
|
|
logger.info("+" + "-" * 58 + "+")
|
|
logger.info("| PERSON DETECTION PIPELINE |")
|
|
logger.info("| " + f"Started: {pipeline_start.strftime('%Y-%m-%d %H:%M:%S')}".ljust(57) + "|")
|
|
logger.info("+" + "-" * 58 + "+")
|
|
logger.info("")
|
|
logger.info(f"Configuration:")
|
|
logger.info(f" Video directory: {cfg.VIDEO_DIR}")
|
|
logger.info(f" Dataset output: {cfg.DATASET_DIR}")
|
|
logger.info(f" Detector model: {cfg.DETECTOR_MODEL}")
|
|
logger.info(f" Training model: {cfg.TRAIN_MODEL}")
|
|
logger.info(f" Max dataset size: {cfg.MAX_DATASET_SIZE_GB} GB")
|
|
logger.info(f" Starting phase: {start_phase}")
|
|
logger.info(f" Extract only: {extract_only}")
|
|
logger.info("")
|
|
|
|
try:
|
|
# Phase 1: Extract
|
|
if start_phase <= 1:
|
|
p1_start = datetime.now()
|
|
phase1_extract()
|
|
p1_duration = datetime.now() - p1_start
|
|
logger.info(f"\nPhase 1 completed in {p1_duration}")
|
|
|
|
if extract_only:
|
|
logger.info("Extract-only mode — stopping after Phase 1.")
|
|
return
|
|
|
|
# Phase 2: Split
|
|
if start_phase <= 2:
|
|
p2_start = datetime.now()
|
|
phase2_split()
|
|
p2_duration = datetime.now() - p2_start
|
|
logger.info(f"\nPhase 2 completed in {p2_duration}")
|
|
|
|
# Phase 3: Train
|
|
if start_phase <= 3:
|
|
p3_start = datetime.now()
|
|
best_weights = phase3_train()
|
|
p3_duration = datetime.now() - p3_start
|
|
logger.info(f"\nPhase 3 completed in {p3_duration}")
|
|
|
|
# Final summary
|
|
pipeline_end = datetime.now()
|
|
total_duration = pipeline_end - pipeline_start
|
|
|
|
logger.info("\n" + "+" + "-" * 58 + "+")
|
|
logger.info("| PIPELINE COMPLETED SUCCESSFULLY |")
|
|
logger.info("| " + f"Duration: {total_duration}".ljust(57) + "|")
|
|
logger.info("+" + "-" * 58 + "+")
|
|
|
|
except KeyboardInterrupt:
|
|
logger.warning("\n\nPipeline interrupted by user. Progress has been checkpointed.")
|
|
logger.warning("Re-run to resume from where you left off.")
|
|
sys.exit(1)
|
|
|
|
except Exception as e:
|
|
logger.error(f"\n\nPipeline failed with error: {e}")
|
|
logger.error(traceback.format_exc())
|
|
logger.error("Progress has been checkpointed. Fix the error and re-run.")
|
|
sys.exit(1)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Person detection dataset pipeline",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
python run_pipeline.py Run full pipeline
|
|
python run_pipeline.py --phase 2 Start from split phase
|
|
python run_pipeline.py --phase 3 Start from training phase
|
|
python run_pipeline.py --extract-only Only extract dataset
|
|
""",
|
|
)
|
|
parser.add_argument(
|
|
"--phase",
|
|
type=int,
|
|
default=1,
|
|
choices=[1, 2, 3],
|
|
help="Starting phase (1=extract, 2=split, 3=train)",
|
|
)
|
|
parser.add_argument(
|
|
"--extract-only",
|
|
action="store_true",
|
|
help="Only run extraction phase",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
run_pipeline(start_phase=args.phase, extract_only=args.extract_only)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|