🎯 Weighted Observations

Farseer natively supports observation weights, allowing you to give more importance to recent or reliable data points. This is perfect for emphasizing recent trends, downweighting outliers, or incorporating data quality information.

Basic Usage

import polars as pl
import numpy as np
from datetime import datetime, timedelta
from farseer import Farseer

# Create data with weights
np.random.seed(42)
n = 100
dates = [datetime(2020, 1, 1) + timedelta(days=i) for i in range(n)]

df = pl.DataFrame({
    'ds': dates,
    'y': np.random.randn(n).cumsum() + 50,
    'weight': [2.0 if i < 50 else 1.0 for i in range(n)]  # Weight recent data more
})

# Fit with weights - Farseer automatically detects 'weight' column
m = Farseer()
m.fit(df)

# Make predictions
future = m.make_future_dataframe(periods=30)
forecast = m.predict(future)

print(forecast.select(['ds', 'yhat', 'yhat_lower', 'yhat_upper']).tail())

Downweighting Outliers

import polars as pl
import numpy as np
from datetime import datetime, timedelta
from farseer import Farseer

# Create data with some outliers
np.random.seed(42)
n = 365
dates = [datetime(2020, 1, 1) + timedelta(days=i) for i in range(n)]
y = np.random.randn(n).cumsum() + 100

# Add outliers
outlier_indices = [50, 100, 200, 300]
y[outlier_indices] += np.random.randn(len(outlier_indices)) * 50

# Create weights: downweight outliers
weights = np.ones(n)
weights[outlier_indices] = 0.1  # Give outliers much less weight

df = pl.DataFrame({'ds': dates, 'y': y, 'weight': weights})

m = Farseer()
m.fit(df)
forecast = m.predict(m.make_future_dataframe(periods=90))

Use Cases

  • Recency weighting: Give more importance to recent observations in evolving trends
  • Data quality: Downweight suspicious or low-quality measurements
  • Confidence scores: Incorporate measurement uncertainty
  • Business logic: Emphasize important time periods (e.g., peak season)

📈 Custom Regressors

Add additional variables to your forecast model to capture effects beyond trend and seasonality. Regressors can be continuous or binary variables that influence your time series.

Adding Multiple Regressors

import polars as pl
import numpy as np
from datetime import datetime, timedelta
from farseer import Farseer, regressor_coefficients

# Create sample data
np.random.seed(42)
n = 365 * 2
dates = [datetime(2020, 1, 1) + timedelta(days=i) for i in range(n)]
dates_series = pl.Series("ds", dates)

# Base trend and seasonality
trend = np.arange(n) * 0.3 + 100
yearly = 20 * np.sin(2 * np.pi * np.arange(n) / 365.25)

# Create regressors
is_weekend = (dates_series.dt.weekday() >= 5).cast(pl.Float64)
temperature = 15 + 10 * np.sin(2 * np.pi * np.arange(n) / 365.25) + np.random.randn(n) * 3
promo = np.zeros(n)
promo[np.random.choice(n, 50, replace=False)] = 1

# Combine with regressor effects
y = trend + yearly + (-10 * is_weekend) + (0.5 * temperature) + (15 * promo) + np.random.randn(n) * 3

df = pl.DataFrame({
    'ds': dates,
    'y': y,
    'is_weekend': is_weekend,
    'temperature': temperature,
    'promo': promo
})

# Create model and add regressors
m = Farseer(yearly_seasonality=True)
m.add_regressor('is_weekend', prior_scale=10.0, mode='additive')
m.add_regressor('temperature', prior_scale=10.0, mode='additive')
m.add_regressor('promo', prior_scale=5.0, mode='additive')

# Split train/test
train_size = int(n * 0.8)
train = df[:train_size]
test = df[train_size:]

# Fit and forecast
m.fit(train)
forecast = m.predict(test)

# Get regressor coefficients
coeffs = regressor_coefficients(m)
print(coeffs)

Tips for Regressors

  • Standardization: Continuous variables are auto-standardized; binary (0/1) are not
  • Prior scale: Controls regularization; larger = more flexible, smaller = more conservative
  • Mode: Use 'additive' for most cases, 'multiplicative' when effect scales with level
  • Future values: Ensure regressor values are available for forecast period

🔄 Manual Changepoints

When you know specific dates where your time series trend changed (e.g., product launches, policy changes), you can specify them manually for more accurate forecasts.

Specifying Known Changepoints

import polars as pl
import numpy as np
from datetime import datetime, timedelta
from farseer import Farseer

# Create data with known trend changes
np.random.seed(42)
n = 365 * 3
dates = [datetime(2020, 1, 1) + timedelta(days=i) for i in range(n)]

# Create trend with changepoints at specific dates
y = []
base = 100
for i, date in enumerate(dates):
    if date < datetime(2021, 1, 1):
        slope = 0.5  # Moderate growth
        y_val = base + slope * i
    elif date < datetime(2022, 1, 1):
        slope = 1.5  # Rapid growth (policy change)
        days = (date - datetime(2021, 1, 1)).days
        y_val = base + 0.5 * 365 + slope * days
    else:
        slope = 0.3  # Slow growth (market saturation)
        days = (date - datetime(2022, 1, 1)).days
        y_val = base + 0.5 * 365 + 1.5 * 365 + slope * days

    yearly_season = 10 * np.sin(2 * np.pi * i / 365.25)
    y.append(y_val + yearly_season + np.random.randn() * 5)

df = pl.DataFrame({'ds': dates, 'y': y})

# Specify changepoints at known dates
manual_changepoints = ['2021-01-01', '2022-01-01']

m = Farseer(
    changepoints=manual_changepoints,
    yearly_seasonality=True,
    weekly_seasonality=False
)

# Split and fit
train_size = int(n * 0.85)
train = df[:train_size]
test = df[train_size:]

m.fit(train)
forecast = m.predict(test)

print(f"Changepoints used: {m.changepoints}")
print(f"Test MAE: {np.mean(np.abs(test['y'] - forecast['yhat'][:len(test)])):.2f}")

Automatic vs Manual Changepoints

# Automatic: Let Farseer find changepoints
m_auto = Farseer(
    n_changepoints=25,           # Number of potential changepoints
    changepoint_range=0.8,       # Consider first 80% of data
    changepoint_prior_scale=0.05 # Flexibility (higher = more flexible)
)

# Manual: Specify exact dates
m_manual = Farseer(
    changepoints=['2021-01-01', '2021-06-15', '2022-01-01']
)

# Hybrid: Use automatic but with custom parameters
m_hybrid = Farseer(
    n_changepoints=15,
    changepoint_range=0.9,
    changepoint_prior_scale=0.1
)

🎄 Holiday Effects

Model the impact of holidays and special events on your time series with customizable windows before and after each event.

Creating a Holiday DataFrame

import polars as pl
from datetime import datetime
from farseer import Farseer

# Define holidays
holidays = pl.DataFrame({
    'holiday': ['Christmas', 'Christmas', 'New Year', 'New Year',
                'Black Friday', 'Black Friday', 'Thanksgiving', 'Thanksgiving'],
    'ds': [
        datetime(2020, 12, 25), datetime(2021, 12, 25),
        datetime(2021, 1, 1), datetime(2022, 1, 1),
        datetime(2020, 11, 27), datetime(2021, 11, 26),
        datetime(2020, 11, 26), datetime(2021, 11, 25)
    ],
    'lower_window': 0,  # Days before
    'upper_window': [1, 1, 1, 1, 3, 3, 2, 2]  # Days after
})

# Create model with holidays
m = Farseer(holidays=holidays, yearly_seasonality=True)

# Fit model
m.fit(df)
forecast = m.predict(future)

Holiday Windows

# Christmas: affect day itself and day after
christmas = pl.DataFrame({
    'holiday': 'Christmas',
    'ds': pl.date_range(datetime(2020, 1, 1), datetime(2023, 12, 31), interval='1y', eager=True)
            .map_elements(lambda x: datetime(x.year, 12, 25)),
    'lower_window': 0,
    'upper_window': 1
})

# Black Friday: affect 3 days after
black_friday = pl.DataFrame({
    'holiday': 'Black Friday',
    'ds': [datetime(2020, 11, 27), datetime(2021, 11, 26), datetime(2022, 11, 25)],
    'lower_window': 0,
    'upper_window': 3
})

# Combine holidays
all_holidays = pl.concat([christmas, black_friday])

m = Farseer(holidays=all_holidays)
m.fit(train_df)

📅 Custom Seasonality

Beyond yearly, weekly, and daily seasonality, you can add any custom periodic pattern such as monthly, quarterly, or domain-specific cycles.

Adding Monthly Seasonality

from farseer import Farseer

# Create model with custom monthly seasonality
m = Farseer(
    yearly_seasonality=True,
    weekly_seasonality=False,
    daily_seasonality=False
)

# Add monthly seasonality (period = 30.5 days)
m.add_seasonality(
    name='monthly',
    period=30.5,
    fourier_order=5  # Number of Fourier components
)

m.fit(df)
forecast = m.predict(future)

Quarterly Business Cycles

from farseer import Farseer

m = Farseer()

# Quarterly cycle (91.25 days)
m.add_seasonality(
    name='quarterly',
    period=91.25,
    fourier_order=8,
    mode='additive'
)

m.fit(df)

📅 Conditional Seasonality

Apply seasonal patterns only when certain conditions are met. Perfect for modeling different behavior on weekdays vs weekends, holidays vs normal days, or any binary condition.

Weekday vs Weekend Patterns

import polars as pl
import numpy as np
from datetime import datetime, timedelta
from farseer import Farseer

# Create data with different patterns on weekdays vs weekends
np.random.seed(42)
n = 365 * 2
dates = [datetime(2020, 1, 1) + timedelta(days=i) for i in range(n)]
dates_series = pl.Series("ds", dates)

# Base trend
trend = np.arange(n) * 0.3 + 100

# Different weekly patterns for weekdays vs weekends
is_weekday = dates_series.dt.weekday() < 5
weekly_pattern = np.where(
    is_weekday,
    5 * np.sin(2 * np.pi * np.arange(n) / 7),      # Weekday pattern
    -8 * np.sin(2 * np.pi * np.arange(n) / 7)      # Weekend pattern (different amplitude)
)

y = trend + weekly_pattern + np.random.randn(n) * 3

df = pl.DataFrame({
    'ds': dates,
    'y': y,
    'is_weekday': is_weekday
})

# Create model with conditional seasonality
m = Farseer(
    yearly_seasonality=False,
    weekly_seasonality=False  # Disable default weekly seasonality
)

# Add conditional weekly seasonality for weekdays
m.add_seasonality(
    name='weekly_on_weekday',
    period=7,
    fourier_order=3,
    condition_name='is_weekday'
)

# Add conditional weekly seasonality for weekends
m.add_seasonality(
    name='weekly_on_weekend',
    period=7,
    fourier_order=3,
    condition_name='is_weekend'
)

# Add weekend condition
df = df.with_columns((~pl.col('is_weekday')).alias('is_weekend'))

# Fit model
train_size = int(n * 0.8)
train = df[:train_size]
test = df[train_size:]

m.fit(train)

# Predict (remember to add condition columns to future!)
future = m.make_future_dataframe(periods=90)
future = future.with_columns([
    (pl.col('ds').dt.weekday() < 5).alias('is_weekday'),
    (pl.col('ds').dt.weekday() >= 5).alias('is_weekend')
])

forecast = m.predict(future)
print(forecast.select(['ds', 'yhat', 'weekly_on_weekday', 'weekly_on_weekend']).tail())

Multiple Conditional Seasonalities

from farseer import Farseer

m = Farseer()

# Different monthly patterns during summer vs winter
m.add_seasonality(
    name='monthly_summer',
    period=30.5,
    fourier_order=5,
    condition_name='is_summer'
)

m.add_seasonality(
    name='monthly_winter',
    period=30.5,
    fourier_order=5,
    condition_name='is_winter'
)

# Add conditions to dataframe
df = df.with_columns([
    (pl.col('ds').dt.month().is_in([6, 7, 8])).alias('is_summer'),
    (pl.col('ds').dt.month().is_in([12, 1, 2])).alias('is_winter')
])

m.fit(df)

# Remember to add conditions to future dataframe too!
future = m.make_future_dataframe(periods=365)
future = future.with_columns([
    (pl.col('ds').dt.month().is_in([6, 7, 8])).alias('is_summer'),
    (pl.col('ds').dt.month().is_in([12, 1, 2])).alias('is_winter')
])

forecast = m.predict(future)

Key Points

  • Condition columns: Must be boolean (True/False) in both training and future data
  • Masking: Fourier features are multiplied by 0.0 when condition is False, 1.0 when True
  • Multiple conditions: Can have multiple conditional seasonalities with different conditions
  • Serialization: Condition names are saved with the model

📏 Floor & Cap Parameters

For logistic growth models, you can specify both a cap (maximum value) and floor (minimum value) to bound your forecasts. This is useful for modeling phenomena with natural upper and lower limits.

Logistic Growth with Floor

import polars as pl
import numpy as np
from datetime import datetime, timedelta
from farseer import Farseer

# Create saturating data with both upper and lower bounds
np.random.seed(42)
n = 365 * 3
dates = [datetime(2020, 1, 1) + timedelta(days=i) for i in range(n)]

# Logistic growth curve between floor and cap
t = np.arange(n)
floor_val = 2.0
cap_val = 10.0
k = 0.01  # Growth rate

# Logistic function: floor + (cap - floor) / (1 + exp(-k*(t - t0)))
t0 = n / 2
y = floor_val + (cap_val - floor_val) / (1 + np.exp(-k * (t - t0)))
y += np.random.randn(n) * 0.3  # Add noise

df = pl.DataFrame({
    'ds': dates,
    'y': y,
    'floor': floor_val,  # Minimum value
    'cap': cap_val       # Maximum value
})

# Create logistic growth model
m = Farseer(growth='logistic', yearly_seasonality=False)

# Fit model
train_size = int(n * 0.8)
train = df[:train_size]
test = df[train_size:]

m.fit(train)

# Predict (must include floor and cap in future dataframe)
future = m.make_future_dataframe(periods=180)
future = future.with_columns([
    pl.lit(floor_val).alias('floor'),
    pl.lit(cap_val).alias('cap')
])

forecast = m.predict(future)

print(f"Floor value: {floor_val}")
print(f"Cap value: {cap_val}")
print(f"Min prediction: {forecast['yhat'].min():.2f}")
print(f"Max prediction: {forecast['yhat'].max():.2f}")

Cap Only (Standard Logistic Growth)

import polars as pl
from farseer import Farseer

# Market share example (0% to 100%)
df = pl.DataFrame({
    'ds': dates,
    'y': market_share,  # Values between 0 and 100
    'cap': 100.0        # Maximum market share
})

m = Farseer(growth='logistic')
m.fit(df)

future = m.make_future_dataframe(periods=365)
future = future.with_columns(pl.lit(100.0).alias('cap'))
forecast = m.predict(future)

Important Notes

  • Validation: Cap must be greater than floor for all data points
  • Scaling: Both y and cap are scaled relative to floor internally
  • Future data: Both floor and cap must be present in future dataframe
  • Constant values: Floor and cap don't have to be constant (can vary over time)

📊 Regressor Standardization

Farseer intelligently handles regressor standardization, automatically detecting binary vs continuous regressors and standardizing appropriately for optimal model performance.

Auto-Detection Mode (Recommended)

import polars as pl
import numpy as np
from datetime import datetime, timedelta
from farseer import Farseer

# Create data with mixed regressor types
np.random.seed(42)
n = 365 * 2
dates = [datetime(2020, 1, 1) + timedelta(days=i) for i in range(n)]
dates_series = pl.Series("ds", dates)

# Binary regressor (0 or 1)
is_weekend = (dates_series.dt.weekday() >= 5).cast(pl.Int64)

# Continuous regressor
temperature = 15 + 10 * np.sin(2 * np.pi * np.arange(n) / 365.25) + np.random.randn(n) * 3

# Another binary regressor
is_promo = np.random.choice([0, 1], n, p=[0.9, 0.1])

# Create target variable
y = (100 + np.arange(n) * 0.3 +
     (-10 * is_weekend) +
     (0.5 * temperature) +
     (15 * is_promo) +
     np.random.randn(n) * 3)

df = pl.DataFrame({
    'ds': dates,
    'y': y,
    'is_weekend': is_weekend,
    'temperature': temperature,
    'is_promo': is_promo
})

# Create model with auto-standardization
m = Farseer()

# Binary regressors won't be standardized (mean=0, std=1 used)
m.add_regressor('is_weekend', standardize='auto')
m.add_regressor('is_promo', standardize='auto')

# Continuous regressor will be standardized (z-score normalization)
m.add_regressor('temperature', standardize='auto')

m.fit(df)

# Get coefficients
from farseer import regressor_coefficients
coeffs = regressor_coefficients(m)
print("Regressor coefficients:")
print(coeffs)

Standardization Modes

from farseer import Farseer

m = Farseer()

# Auto mode (recommended): detects binary vs continuous
m.add_regressor('feature1', standardize='auto')

# Force standardization (even for binary)
m.add_regressor('feature2', standardize='true')

# Force no standardization
m.add_regressor('feature3', standardize='false')

m.fit(df)

How Auto-Detection Works

# Auto mode detection logic:
# - Binary regressor (only 0 and 1 values): NOT standardized
#   → Uses mu=0, std=1 (no transformation)
#
# - Continuous regressor: IS standardized
#   → Uses mu=mean(x), std=std(x)
#   → Transforms: (x - mu) / std
#
# - Constant regressor: NEVER standardized
#   → Uses mu=value, std=1

# Example:
df['binary_flag'] = [0, 1, 0, 1, ...]  # Only 0s and 1s → NOT standardized
df['temperature'] = [15.2, 18.4, ...]  # Continuous → IS standardized
df['constant'] = [5.0, 5.0, ...]       # Constant → NOT standardized

When to Use Each Mode

  • 'auto' (default): Best for most cases, intelligently handles different regressor types
  • 'true': When you want all regressors on same scale (even binary)
  • 'false': When your features are already on appropriate scales or have meaningful units

Impact on Coefficients

# Without standardization:
# - Coefficient magnitude depends on regressor scale
# - Hard to compare importance across regressors
# - Example: temperature coefficient ~0.5, binary flag coefficient ~-10

# With standardization:
# - Coefficients are more comparable
# - Represents effect of 1 standard deviation change
# - Easier to interpret relative importance

🎄 Holiday Prior Scales

Farseer allows independent prior scales for each holiday, giving you fine control over how strongly each event affects your forecast. This is separate from seasonality priors.

Different Priors for Different Holidays

from farseer import Farseer
from datetime import datetime

m = Farseer()

# Major holiday with strong effect
m.add_holidays(
    'christmas',
    dates=[
        datetime(2020, 12, 25),
        datetime(2021, 12, 25),
        datetime(2022, 12, 25)
    ],
    prior_scale=20.0,  # Large prior → strong effect allowed
    lower_window=-1,   # Include day before
    upper_window=1     # Include day after
)

# Medium holiday
m.add_holidays(
    'thanksgiving',
    dates=[
        datetime(2020, 11, 26),
        datetime(2021, 11, 25),
        datetime(2022, 11, 24)
    ],
    prior_scale=10.0   # Default prior scale
)

# Minor event with weak effect
m.add_holidays(
    'minor_event',
    dates=[datetime(2020, 3, 17), datetime(2021, 3, 17)],
    prior_scale=5.0    # Small prior → weak effect
)

m.fit(df)

# Holiday effects are independent from seasonality_prior_scale
print(f"Seasonality prior: {m.seasonality_prior_scale}")
print(f"Holiday priors: configured per holiday")

Understanding Prior Scales

# Prior scale interpretation:
# - Larger values (20+): Allow strong holiday effects
# - Medium values (10): Balanced (default)
# - Smaller values (5-): Regularize toward zero (weak effects)

# Example use cases:
# - Black Friday (retail): prior_scale=25.0 → huge impact expected
# - Christmas: prior_scale=20.0 → strong impact
# - Valentine's Day: prior_scale=10.0 → moderate impact
# - Minor awareness day: prior_scale=3.0 → minimal impact

Holiday Priors vs Seasonality Priors

from farseer import Farseer

m = Farseer(
    seasonality_prior_scale=10.0  # Affects ALL seasonalities
)

# This is SEPARATE from holiday priors
m.add_holidays(
    'christmas',
    dates=[...],
    prior_scale=20.0  # Independent from seasonality_prior_scale
)

# Custom seasonality also uses seasonality_prior_scale by default
m.add_seasonality(
    name='monthly',
    period=30.5,
    fourier_order=5,
    prior_scale=15.0  # Can override seasonality_prior_scale
)

m.fit(df)

🚀 Performance Optimization

Farseer is built for speed with automatic multithreading and Polars DataFrames. Here are tips to maximize performance.

Use Polars for Best Performance

import polars as pl  # Recommended
import pandas as pd
from farseer import Farseer
from datetime import datetime, timedelta

# Polars (Recommended - 5-10x faster)
dates = [datetime(2020, 1, 1) + timedelta(days=i) for i in range(1000)]
df_polars = pl.DataFrame({
    'ds': dates,
    'y': list(range(1000))
})

# Pandas (Still supported)
df_pandas = pd.DataFrame({
    'ds': pd.date_range('2020-01-01', periods=1000),
    'y': range(1000)
})

# Both work, but Polars is faster!
m = Farseer()
m.fit(df_polars)  # Faster ⚡

Multithreading is Automatic

# Farseer automatically uses all CPU cores
# No configuration needed!

m = Farseer()
m.fit(large_df)  # Automatically parallelized

# Performance scales with:
# - Number of CPU cores
# - Dataset size
# - Model complexity

Batch Processing

from concurrent.futures import ProcessPoolExecutor
from farseer import Farseer

def fit_forecast(df_segment):
    """Fit and forecast a single time series"""
    m = Farseer()
    m.fit(df_segment)
    return m.predict(m.make_future_dataframe(periods=30))

# Process multiple time series in parallel
segments = [df1, df2, df3, df4]

with ProcessPoolExecutor() as executor:
    forecasts = list(executor.map(fit_forecast, segments))

print(f"Processed {len(forecasts)} time series")