Source code for fantasyfootball.benchmarking

from itertools import product
import pandas as pd
import pandas_flavor as pf

from fantasyfootball.data import FantasyData
from fantasyfootball.config import root_dir, scoring
from urllib.error import URLError


[docs]@pf.register_dataframe_method
def filter_to_prior_week(
    df: pd.DataFrame, season_year: int, week_number: int
) -> pd.DataFrame:
    """Filter all data up until the most recently
       completed week.


    Args:
        df (pd.DataFrame): Historical data and features.
        season_year (int): Year of the season.
        week_number (int): Week number of the most recently completed week.

    Returns:
        pd.DataFrame: Historical data and features.
    """
    calendar_df = pd.read_csv(
        root_dir / "datasets" / "season" / str(season_year) / "calendar.gz"
    )
    prior_week_df = calendar_df[calendar_df["week"] == week_number]
    max_date_week = max(prior_week_df["date"])
    prior_week_df = df[df["date"] <= max_date_week]
    return prior_week_df


[docs]@pf.register_dataframe_method
def score_benchmark_data(
    benchmark_df: pd.DataFrame, scoring_source: str
) -> pd.DataFrame:
    """Add point projection based on predictions from:
    https://fantasydata.com/nfl/fantasy-football-weekly-projections
    for use in benchmarking.

    Args:
        benchmark_df (pd.DataFrame): Weekly player predictions from fantasydata.com.
        scoring_source (str): Name of the scoring system to apply (e.g., 'yahoo').

    Returns:
        pd.DataFrame: Weekly player predictions from
            fantasydata converted to scoring system.

    """
    score_player = FantasyData.score_player
    # map different name spellings between
    scoring_source_rules = scoring.get(scoring_source)
    # score all players for that week
    scoring_columns = set(scoring_source_rules["scoring_columns"].keys()) & set(
        benchmark_df.columns
    )
    weekly_benchmark_preds = pd.DataFrame()
    for row in (
        benchmark_df[["name", "team", "position", "season_year"]]
        .drop_duplicates()
        .itertuples(index=False)
    ):
        player_df = benchmark_df[
            (benchmark_df["name"] == row.name)
            & (benchmark_df["team"] == row.team)
            & (benchmark_df["position"] == row.position)
            & (benchmark_df["season_year"] == row.season_year)
        ]
        player_weekly_points = score_player(
            player_df, scoring_columns, scoring_source_rules
        )
        player_df = player_df.assign(
            **{f"ff_pts_{scoring_source}_fantasydata_pred": player_weekly_points}
        )
        player_df = player_df[
            [
                "name",
                "team",
                "position",
                "season_year",
                "week",
                player_df.columns.tolist()[-1],
            ]
        ]
        weekly_benchmark_preds = pd.concat([weekly_benchmark_preds, player_df])
    return weekly_benchmark_preds


[docs]def get_benchmarking_data(
    season_year_start: int,
    season_year_end: int,
    base_url: str = "https://raw.githubusercontent.com/thecodeforest/fantasyfootball/main/examples/benchmarking_data/season/",  # noqa: E501
) -> pd.DataFrame:
    if season_year_start < 2018 or season_year_end > 2021:
        raise ValueError("Season year must be between 2018 and 2021.")
    all_benchmark_df = pd.DataFrame()
    for week, year in product(
        range(1, 18), range(season_year_start, season_year_end + 1)
    ):
        benchmark_url = f"{base_url}/{year}/wk{week}.csv"
        try:
            benchmark_df = pd.read_csv(benchmark_url)
            benchmark_df["season_year"] = year
            all_benchmark_df = pd.concat([all_benchmark_df, benchmark_df])
            return all_benchmark_df
        except URLError:
            raise URLError(
                f"Could not find {benchmark_url}"
                "Check if internet connection is working."
            )