工作常用的时间序列分析案例:

使用 Pandas 构造标准时间序列
可视化每日/每周销售额趋势图
分解时间序列的趋势、季节性与残差(STL分解)
使用移动平均法平滑短期波动
使用指数平滑法(Simple Exponential Smoothing)进行预测
使用 Holt-Winters 季节性模型预测节假日效应
使用 ARIMA 模型预测未来30天销售额
使用 SARIMA 模型处理具有季节性的销售数据
构建多维时间序列模型(按品类 + 区域)
预测促销期间的订单增长曲线

好的,我们将展示如何进行这些常见的时间序列分析任务。为了演示这些方法,我们需要使用一些常用的数据分析库,如 pandasmatplotlibstatsmodels。首先,让我们创建一个示例DataFrame来模拟原始数据,并逐步应用这些时间序列分析任务。

创建示例数据

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 创建示例时间序列数据
dates = pd.date_range(start='2023-01-01', end='2025-06-30', freq='D')
np.random.seed(42)
sales_data = np.cumsum(np.random.normal(loc=100, scale=20, size=len(dates)))

data = {
    'order_date': dates,
    'amount': sales_data,
    'category_code': np.random.choice(['C{}'.format(i) for i in range(1, 11)], len(dates)),
    'region': np.random.choice(['Beijing', 'Shanghai', 'Guangzhou', 'Shenzhen'], len(dates)),
    'promotion': np.random.choice([True, False], len(dates))
}

df = pd.DataFrame(data)

# 设置时间为索引
df.set_index('order_date', inplace=True)

print("原始数据:")
print(df.head())

1. 使用 Pandas 构造标准时间序列

# 按天聚合销售额
daily_sales = df['amount'].resample('D').sum()

print("\n按天聚合销售额:")
print(daily_sales.head())

2. 可视化每日/每周销售额趋势图

# 绘制每日销售额趋势图
plt.figure(figsize=(14, 7))
plt.plot(daily_sales, label='Daily Sales')
plt.title('Daily Sales Trend')
plt.xlabel('Date')
plt.ylabel('Sales Amount')
plt.legend()
plt.show()

# 按周聚合销售额
weekly_sales = daily_sales.resample('W').sum()

# 绘制每周销售额趋势图
plt.figure(figsize=(14, 7))
plt.plot(weekly_sales, label='Weekly Sales')
plt.title('Weekly Sales Trend')
plt.xlabel('Week')
plt.ylabel('Sales Amount')
plt.legend()
plt.show()

3. 分解时间序列的趋势、季节性与残差(STL分解)

from statsmodels.tsa.seasonal import STL

# STL分解
stl = STL(daily_sales, period=365)
res = stl.fit()

fig = res.plot()
plt.show()

4. 使用移动平均法平滑短期波动

# 计算移动平均
moving_avg = daily_sales.rolling(window=30).mean()

# 绘制移动平均线
plt.figure(figsize=(14, 7))
plt.plot(daily_sales, label='Original')
plt.plot(moving_avg, label='30-Day Moving Average', color='red')
plt.title('Daily Sales with 30-Day Moving Average')
plt.xlabel('Date')
plt.ylabel('Sales Amount')
plt.legend()
plt.show()

5. 使用指数平滑法(Simple Exponential Smoothing)进行预测

from statsmodels.tsa.holtwinters import SimpleExpSmoothing

# 拟合指数平滑模型
model_es = SimpleExpSmoothing(daily_sales)
model_es_fit = model_es.fit(smoothing_level=0.2, optimized=False)

# 预测未来一个月的销售额
forecast_es = model_es_fit.forecast(steps=30)

# 绘制预测结果
plt.figure(figsize=(14, 7))
plt.plot(daily_sales, label='Original')
plt.plot(forecast_es, label='Forecast', color='red')
plt.title('Simple Exponential Smoothing Forecast')
plt.xlabel('Date')
plt.ylabel('Sales Amount')
plt.legend()
plt.show()

6. 使用 Holt-Winters 季节性模型预测节假日效应

from statsmodels.tsa.holtwinters import ExponentialSmoothing

# 添加节假日标志
holidays = pd.to_datetime([
    '2023-01-01', '2023-02-22', '2023-04-05', '2023-05-01', '2023-10-01',
    '2024-01-01', '2024-02-10', '2024-04-04', '2024-05-01', '2024-10-01',
    '2025-01-01', '2025-02-19', '2025-04-04', '2025-05-01', '2025-10-01'
])

df.reset_index(inplace=True)
df['is_holiday'] = df['order_date'].isin(holidays).astype(int)
df.set_index('order_date', inplace=True)

# 拟合Holt-Winters模型
model_hw = ExponentialSmoothing(daily_sales, trend='add', seasonal='add', seasonal_periods=365)
model_hw_fit = model_hw.fit()

# 预测未来一个月的销售额
forecast_hw = model_hw_fit.forecast(steps=30)

# 绘制预测结果
plt.figure(figsize=(14, 7))
plt.plot(daily_sales, label='Original')
plt.plot(forecast_hw, label='Forecast', color='red')
plt.title('Holt-Winters Seasonal Model Forecast')
plt.xlabel('Date')
plt.ylabel('Sales Amount')
plt.legend()
plt.show()

7. 使用 ARIMA 模型预测未来30天销售额

from statsmodels.tsa.arima.model import ARIMA

# 拟合ARIMA模型
model_arima = ARIMA(daily_sales, order=(5, 1, 0))  # 这里使用 (5, 1, 0) 作为示例参数
model_arima_fit = model_arima.fit()

# 预测未来一个月的销售额
forecast_arima = model_arima_fit.forecast(steps=30)

# 绘制预测结果
plt.figure(figsize=(14, 7))
plt.plot(daily_sales, label='Original')
plt.plot(pd.date_range(start=daily_sales.index[-1] + pd.Timedelta(days=1), periods=30), forecast_arima, label='Forecast', color='red')
plt.title('ARIMA Model Forecast')
plt.xlabel('Date')
plt.ylabel('Sales Amount')
plt.legend()
plt.show()

8. 使用 SARIMA 模型处理具有季节性的销售数据

from statsmodels.tsa.statespace.sarimax import SARIMAX

# 拟合SARIMA模型
model_sarima = SARIMAX(daily_sales, order=(5, 1, 0), seasonal_order=(1, 1, 1, 365))
model_sarima_fit = model_sarima.fit()

# 预测未来一个月的销售额
forecast_sarima = model_sarima_fit.forecast(steps=30)

# 绘制预测结果
plt.figure(figsize=(14, 7))
plt.plot(daily_sales, label='Original')
plt.plot(pd.date_range(start=daily_sales.index[-1] + pd.Timedelta(days=1), periods=30), forecast_sarima, label='Forecast', color='red')
plt.title('SARIMA Model Forecast')
plt.xlabel('Date')
plt.ylabel('Sales Amount')
plt.legend()
plt.show()

9. 构建多维时间序列模型(按品类 + 区域)

# 按品类和区域聚合销售额
region_category_sales = df.groupby(['region', 'category_code'])['amount'].sum().unstack(fill_value=0)

# 选择一个组合进行预测
selected_region = region_category_sales.columns.levels[0][0]
selected_category = region_category_sales.columns.levels[1][0]
region_category_series = region_category_sales[(selected_region, selected_category)]

# 拟合SARIMA模型
model_sarima_multi = SARIMAX(region_category_series, order=(5, 1, 0), seasonal_order=(1, 1, 1, 365))
model_sarima_multi_fit = model_sarima_multi.fit()

# 预测未来一个月的销售额
forecast_sarima_multi = model_sarima_multi_fit.forecast(steps=30)

# 绘制预测结果
plt.figure(figsize=(14, 7))
plt.plot(region_category_series, label='Original')
plt.plot(pd.date_range(start=region_category_series.index[-1] + pd.Timedelta(days=1), periods=30), forecast_sarima_multi, label='Forecast', color='red')
plt.title('Multi-Dimensional SARIMA Model Forecast (Region: {}, Category: {})'.format(selected_region, selected_category))
plt.xlabel('Date')
plt.ylabel('Sales Amount')
plt.legend()
plt.show()

10. 预测促销期间的订单增长曲线

# 按促销状态聚合销售额
promotion_effect = df.groupby('promotion')['amount'].sum()

# 将促销状态转换为数值
df['promotion_numeric'] = df['promotion'].astype(int)

# 拟合SARIMA模型考虑促销影响
model_sarima_promo = SARIMAX(daily_sales, exog=df['promotion_numeric'], order=(5, 1, 0), seasonal_order=(1, 1, 1, 365))
model_sarima_promo_fit = model_sarima_promo.fit()

# 预测未来一个月的销售额并考虑促销影响
future_dates = pd.date_range(start=daily_sales.index[-1] + pd.Timedelta(days=1), periods=30)
future_exog = pd.Series([1]*30, index=future_dates)  # 假设未来都是促销期
forecast_sarima_promo = model_sarima_promo_fit.get_forecast(steps=30, exog=future_exog)

# 绘制预测结果
plt.figure(figsize=(14, 7))
plt.plot(daily_sales, label='Original')
plt.plot(forecast_sarima_promo.predicted_mean, label='Promotion Forecast', color='red')
plt.title('SARIMA Model Forecast with Promotion Effect')
plt.xlabel('Date')
plt.ylabel('Sales Amount')
plt.legend()
plt.show()

综合以上步骤,最终的时间序列分析结果如下:

这段代码展示了从原始数据到经过全面时间序列分析的结果的过程。你可以根据实际需求调整每一步的操作。

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import STL
from statsmodels.tsa.holtwinters import SimpleExpSmoothing, ExponentialSmoothing
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX

# 创建示例时间序列数据
dates = pd.date_range(start='2023-01-01', end='2025-06-30', freq='D')
np.random.seed(42)
sales_data = np.cumsum(np.random.normal(loc=100, scale=20, size=len(dates)))

data = {
    'order_date': dates,
    'amount': sales_data,
    'category_code': np.random.choice(['C{}'.format(i) for i in range(1, 11)], len(dates)),
    'region': np.random.choice(['Beijing', 'Shanghai', 'Guangzhou', 'Shenzhen'], len(dates)),
    'promotion': np.random.choice([True, False], len(dates))
}

df = pd.DataFrame(data)

# 设置时间为索引
df.set_index('order_date', inplace=True)

# 按天聚合销售额
daily_sales = df['amount'].resample('D').sum()

# 绘制每日销售额趋势图
plt.figure(figsize=(14, 7))
plt.plot(daily_sales, label='Daily Sales')
plt.title('Daily Sales Trend')
plt.xlabel('Date')
plt.ylabel('Sales Amount')
plt.legend()
plt.show()

# 按周聚合销售额
weekly_sales = daily_sales.resample('W').sum()

# 绘制每周销售额趋势图
plt.figure(figsize=(14, 7))
plt.plot(weekly_sales, label='Weekly Sales')
plt.title('Weekly Sales Trend')
plt.xlabel('Week')
plt.ylabel('Sales Amount')
plt.legend()
plt.show()

# STL分解
stl = STL(daily_sales, period=365)
res = stl.fit()

fig = res.plot()
plt.show()

# 计算移动平均
moving_avg = daily_sales.rolling(window=30).mean()

# 绘制移动平均线
plt.figure(figsize=(14, 7))
plt.plot(daily_sales, label='Original')
plt.plot(moving_avg, label='30-Day Moving Average', color='red')
plt.title('Daily Sales with 30-Day Moving Average')
plt.xlabel('Date')
plt.ylabel('Sales Amount')
plt.legend()
plt.show()

# 拟合指数平滑模型
model_es = SimpleExpSmoothing(daily_sales)
model_es_fit = model_es.fit(smoothing_level=0.2, optimized=False)

# 预测未来一个月的销售额
forecast_es = model_es_fit.forecast(steps=30)

# 绘制预测结果
plt.figure(figsize=(14, 7))
plt.plot(daily_sales, label='Original')
plt.plot(forecast_es, label='Forecast', color='red')
plt.title('Simple Exponential Smoothing Forecast')
plt.xlabel('Date')
plt.ylabel('Sales Amount')
plt.legend()
plt.show()

# 添加节假日标志
holidays = pd.to_datetime([
    '2023-01-01', '2023-02-22', '2023-04-05', '2023-05-01', '2023-10-01',
    '2024-01-01', '2024-02-10', '2024-04-04', '2024-05-01', '2024-10-01',
    '2025-01-01', '2025-02-19', '2025-04-04', '2025-05-01', '2025-10-01'
])

df.reset_index(inplace=True)
df['is_holiday'] = df['order_date'].isin(holidays).astype(int)
df.set_index('order_date', inplace=True)

# 拟合Holt-Winters模型
model_hw = ExponentialSmoothing(daily_sales, trend='add', seasonal='add', seasonal_periods=365)
model_hw_fit = model_hw.fit()

# 预测未来一个月的销售额
forecast_hw = model_hw_fit.forecast(steps=30)

# 绘制预测结果
plt.figure(figsize=(14, 7))
plt.plot(daily_sales, label='Original')
plt.plot(forecast_hw, label='Forecast', color='red')
plt.title('Holt-Winters Seasonal Model Forecast')
plt.xlabel('Date')
plt.ylabel('Sales Amount')
plt.legend()
plt.show()

# 拟合ARIMA模型
model_arima = ARIMA(daily_sales, order=(5, 1, 0))  # 这里使用 (5, 1, 0) 作为示例参数
model_arima_fit = model_arima.fit()

# 预测未来一个月的销售额
forecast_arima = model_arima_fit.forecast(steps=30)

# 绘制预测结果
plt.figure(figsize=(14, 7))
plt.plot(daily_sales, label='Original')
plt.plot(pd.date_range(start=daily_sales.index[-1] + pd.Timedelta(days=1), periods=30), forecast_arima, label='Forecast', color='red')
plt.title('ARIMA Model Forecast')
plt.xlabel('Date')
plt.ylabel('Sales Amount')
plt.legend()
plt.show()

# 拟合SARIMA模型
model_sarima = SARIMAX(daily_sales, order=(5, 1, 0), seasonal_order=(1, 1, 1, 365))
model_sarima_fit = model_sarima.fit()

# 预测未来一个月的销售额
forecast_sarima = model_sarima_fit.forecast(steps=30)

# 绘制预测结果
plt.figure(figsize=(14, 7))
plt.plot(daily_sales, label='Original')
plt.plot(pd.date_range(start=daily_sales.index[-1] + pd.Timedelta(days=1), periods=30), forecast_sarima, label='Forecast', color='red')
plt.title('SARIMA Model Forecast')
plt.xlabel('Date')
plt.ylabel('Sales Amount')
plt.legend()
plt.show()

# 按品类和区域聚合销售额
region_category_sales = df.groupby(['region', 'category_code'])['amount'].sum().unstack(fill_value=0)

# 选择一个组合进行预测
selected_region = region_category_sales.columns.levels[0][0]
selected_category = region_category_sales.columns.levels[1][0]
region_category_series = region_category_sales[(selected_region, selected_category)]

# 拟合SARIMA模型
model_sarima_multi = SARIMAX(region_category_series, order=(5, 1, 0), seasonal_order=(1, 1, 1, 365))
model_sarima_multi_fit = model_sarima_multi.fit()

# 预测未来一个月的销售额
forecast_sarima_multi = model_sarima_multi_fit.forecast(steps=30)

# 绘制预测结果
plt.figure(figsize=(14, 7))
plt.plot(region_category_series, label='Original')
plt.plot(pd.date_range(start=region_category_series.index[-1] + pd.Timedelta(days=1), periods=30), forecast_sarima_multi, label='Forecast', color='red')
plt.title('Multi-Dimensional SARIMA Model Forecast (Region: {}, Category: {})'.format(selected_region, selected_category))
plt.xlabel('Date')
plt.ylabel('Sales Amount')
plt.legend()
plt.show()

# 将促销状态转换为数值
df['promotion_numeric'] = df['promotion'].astype(int)

# 拟合SARIMA模型考虑促销影响
model_sarima_promo = SARIMAX(daily_sales, exog=df['promotion_numeric'], order=(5, 1, 0), seasonal_order=(1, 1, 1, 365))
model_sarima_promo_fit = model_sarima_promo.fit()

# 预测未来一个月的销售额并考虑促销影响
future_dates = pd.date_range(start=daily_sales.index[-1] + pd.Timedelta(days=1), periods=30)
future_exog = pd.Series([1]*30, index=future_dates)  # 假设未来都是促销期
forecast_sarima_promo = model_sarima_promo_fit.get_forecast(steps=30, exog=future_exog)

# 绘制预测结果
plt.figure(figsize=(14, 7))
plt.plot(daily_sales, label='Original')
plt.plot(forecast_sarima_promo.predicted_mean, label='Promotion Forecast', color='red')
plt.title('SARIMA Model Forecast with Promotion Effect')
plt.xlabel('Date')
plt.ylabel('Sales Amount')
plt.legend()
plt.show()