31 · NumPy 实战:向量化计算与矩阵运算
🔗 知识图谱导航:阅读本文前,建议先掌握《16 · 数据库底座:SQLite 核心操作》中的数据处理思路——本文把数据从数据库里取出来,用 NumPy 做高性能数值计算。NumPy 是 pandas/scikit-learn/TensorFlow 的底层,理解它是数据科学的基础。
运行环境:需要安装 NumPy:
pip install numpy。
极客解析:NumPy 的核心价值是"向量化"——用 C 语言实现的批量操作替代 Python 循环,通常比 Python 循环更快。
np.mean(returns, axis=0)一行代码计算所有股票的平均收益率,等价于一个 Python 循环,但快得多。
向量化 vs Python 循环
import numpy as np
returns = np.random.randn(252, 5) * 0.015
# Python 循环(慢)
means_loop = [returns[:, i].mean() for i in range(returns.shape[1])]
# NumPy 向量化(把循环交给底层 C 实现)
means = np.mean(returns, axis=0) # axis=0 对每列求均值
print(f"结果一致: {np.allclose(means_loop, means)}")
ndarray 核心概念
shape 数组形状,如 (252, 5) 表示 252 行 5 列
dtype 数据类型,如 float64
axis=0 沿行方向操作(对每列计算)
axis=1 沿列方向操作(对每行计算)
广播 不同形状的数组自动对齐,如 (252,5) + (5,) → (252,5)
步步为营:核心逻辑自适应拆解
这一篇按照真实脚本执行链路拆成 6 个小台阶:先生成矩阵,再做移动平均、统计摘要、相关性矩阵、ASCII 分布图,最后用 CLI 把这些能力串起来。每个演示都带 Mock 数据和 print(),复制运行就能看到结果。
Step 1:用 generate_returns 造出“股票收益率 Excel 表”
痛点与机制:
generate_returns 是本文的数据入口:它一次性生成“天数 × 股票数”的二维矩阵。你可以把矩阵想成一张 Excel 表,行是交易日,列是股票;NumPy 的厉害之处在于,它能对整张表一次性做计算,而不是一格一格慢慢算。np.random.seed(42) 像给实验贴上封条,保证每次运行拿到同一份模拟数据。
核心源码(逐字来自文末完整源码):
def generate_returns(n_days: int = 252, n_stocks: int = 5) -> tuple[np.ndarray, list[str]]:
"""生成模拟股票日收益率矩阵 (n_days × n_stocks)"""
np.random.seed(42)
tickers = ["AAPL", "GOOG", "MSFT", "AMZN", "TSLA"][:n_stocks]
vols = np.array([0.015, 0.018, 0.014, 0.020, 0.030])[:n_stocks]
returns = np.random.randn(n_days, n_stocks) * vols + 0.0003
return returns, tickers
可运行演示(补齐 Mock 数据与 print 反馈):
import numpy as np
def generate_returns(n_days: int = 252, n_stocks: int = 5) -> tuple[np.ndarray, list[str]]:
"""生成模拟股票日收益率矩阵 (n_days × n_stocks)"""
np.random.seed(42)
tickers = ["AAPL", "GOOG", "MSFT", "AMZN", "TSLA"][:n_stocks]
vols = np.array([0.015, 0.018, 0.014, 0.020, 0.030])[:n_stocks]
returns = np.random.randn(n_days, n_stocks) * vols + 0.0003
return returns, tickers
returns, tickers = generate_returns(n_days=6, n_stocks=3)
print("📦 生成了一个收益率矩阵")
print("股票列表:", tickers)
print("矩阵形状:", returns.shape, "= 6天 × 3只股票")
print("前2天收益率:")
print(returns[:2].round(4))
Step 2:用 cumsum 做移动平均,避开手写循环
痛点与机制:
移动平均就是“最近几天价格的平均值”。如果每天都重新数一遍窗口,会像每次算账都从第一页翻到最后一页;np.cumsum 则像提前做好累计账本,用两个累计值一减,就能得到窗口总和。这样写既短,也更接近 NumPy 的向量化思维。
核心源码(逐字来自文末完整源码):
def moving_average(arr: np.ndarray, window: int) -> np.ndarray:
result = np.full(len(arr), np.nan)
cumsum = np.cumsum(np.insert(arr, 0, 0))
result[window - 1:] = (cumsum[window:] - cumsum[:-window]) / window
return result
可运行演示(补齐 Mock 数据与 print 反馈):
import numpy as np
def moving_average(arr: np.ndarray, window: int) -> np.ndarray:
result = np.full(len(arr), np.nan)
cumsum = np.cumsum(np.insert(arr, 0, 0))
result[window - 1:] = (cumsum[window:] - cumsum[:-window]) / window
return result
prices = np.array([100, 102, 101, 105, 107, 106], dtype=float)
ma3 = moving_average(prices, window=3)
print("📈 原始价格:", prices.tolist())
print("3日均线:", np.round(ma3, 2).tolist())
print("解释: 前2个位置是 nan,因为还凑不满3天窗口。")
Step 3:用 mode_stats 生成股票风险体检表
痛点与机制:
mode_stats 把一堆收益率压缩成可读的风险摘要:均值看方向,标准差看波动,夏普比率看“冒一份风险换来多少收益”。这就像给每只股票做体检报告,新手不用盯着 252 行原始数据,也能快速判断它的性格。
核心源码(逐字来自文末完整源码):
def mode_stats(returns: np.ndarray, tickers: list[str]) -> None:
print(f"\n[{nexdo_time()}] 📊 收益率统计摘要")
print(f" {'股票':<6} {'均值':>8} {'标准差':>8} {'最小值':>8} {'最大值':>8} {'夏普':>7}")
print(" " + "─" * 54)
for i, t in enumerate(tickers):
r = returns[:, i]
mean, std = r.mean(), r.std()
sharpe = mean / std * np.sqrt(252)
print(f" {t:<6} {mean:>+8.4f} {std:>8.4f} {r.min():>+8.4f} {r.max():>+8.4f} {sharpe:>7.2f}")
ma20 = moving_average(returns[:, 0], 20)
print(f"\n {tickers[0]} 20日MA(最后5日): {ma20[-5:].round(4).tolist()}")
可运行演示(补齐 Mock 数据与 print 反馈):
import time
import numpy as np
def nexdo_time() -> str:
return time.strftime("%Y-%m-%d %H:%M:%S")
def generate_returns(n_days: int = 252, n_stocks: int = 5) -> tuple[np.ndarray, list[str]]:
"""生成模拟股票日收益率矩阵 (n_days × n_stocks)"""
np.random.seed(42)
tickers = ["AAPL", "GOOG", "MSFT", "AMZN", "TSLA"][:n_stocks]
vols = np.array([0.015, 0.018, 0.014, 0.020, 0.030])[:n_stocks]
returns = np.random.randn(n_days, n_stocks) * vols + 0.0003
return returns, tickers
def moving_average(arr: np.ndarray, window: int) -> np.ndarray:
result = np.full(len(arr), np.nan)
cumsum = np.cumsum(np.insert(arr, 0, 0))
result[window - 1:] = (cumsum[window:] - cumsum[:-window]) / window
return result
def mode_stats(returns: np.ndarray, tickers: list[str]) -> None:
print(f"\n[{nexdo_time()}] 📊 收益率统计摘要")
print(f" {'股票':<6} {'均值':>8} {'标准差':>8} {'最小值':>8} {'最大值':>8} {'夏普':>7}")
print(" " + "─" * 54)
for i, t in enumerate(tickers):
r = returns[:, i]
mean, std = r.mean(), r.std()
sharpe = mean / std * np.sqrt(252)
print(f" {t:<6} {mean:>+8.4f} {std:>8.4f} {r.min():>+8.4f} {r.max():>+8.4f} {sharpe:>7.2f}")
ma20 = moving_average(returns[:, 0], 20)
print(f"\n {tickers[0]} 20日MA(最后5日): {ma20[-5:].round(4).tolist()}")
returns, tickers = generate_returns(n_days=40, n_stocks=3)
mode_stats(returns, tickers)
Step 4:用 mode_corr 看股票是否同涨同跌
痛点与机制:
相关性矩阵回答的是“这些股票是不是经常一起动”。returns.T 很关键:np.corrcoef 希望每一行代表一个变量,所以要把原来的“行=日期、列=股票”转成“行=股票、列=日期”。可以把它理解成把 Excel 表横竖转过来,让统计函数按正确方向读表。
核心源码(逐字来自文末完整源码):
def mode_corr(returns: np.ndarray, tickers: list[str]) -> None:
print(f"\n[{nexdo_time()}] 🔗 相关性矩阵")
corr = np.corrcoef(returns.T)
n = len(tickers)
print(" " + " " * 6 + "".join(f"{t:>8}" for t in tickers))
print(" " + "─" * (6 + 8 * n))
for i, t in enumerate(tickers):
row = "".join(f"{corr[i,j]:>8.3f}" for j in range(n))
print(f" {t:<6}{row}")
可运行演示(补齐 Mock 数据与 print 反馈):
import time
import numpy as np
def nexdo_time() -> str:
return time.strftime("%Y-%m-%d %H:%M:%S")
def generate_returns(n_days: int = 252, n_stocks: int = 5) -> tuple[np.ndarray, list[str]]:
"""生成模拟股票日收益率矩阵 (n_days × n_stocks)"""
np.random.seed(42)
tickers = ["AAPL", "GOOG", "MSFT", "AMZN", "TSLA"][:n_stocks]
vols = np.array([0.015, 0.018, 0.014, 0.020, 0.030])[:n_stocks]
returns = np.random.randn(n_days, n_stocks) * vols + 0.0003
return returns, tickers
def mode_corr(returns: np.ndarray, tickers: list[str]) -> None:
print(f"\n[{nexdo_time()}] 🔗 相关性矩阵")
corr = np.corrcoef(returns.T)
n = len(tickers)
print(" " + " " * 6 + "".join(f"{t:>8}" for t in tickers))
print(" " + "─" * (6 + 8 * n))
for i, t in enumerate(tickers):
row = "".join(f"{corr[i,j]:>8.3f}" for j in range(n))
print(f" {t:<6}{row}")
returns, tickers = generate_returns(n_days=60, n_stocks=3)
mode_corr(returns, tickers)
print("提示: 对角线都是 1.000,因为每只股票和自己完全相关。")
Step 5:用 ascii_hist 在终端画收益率分布
痛点与机制:
直方图把连续收益率切成一个个小区间,再统计每个区间出现多少次。它像把考试分数按 0-60、60-70、70-80 分桶,柱子越长说明这个区间越常见。这里用 ASCII 而不是图片库,是为了让脚本在任何终端都能给出可见反馈。
核心源码(逐字来自文末完整源码):
def ascii_hist(data: np.ndarray, title: str, bins: int = 10, width: int = 36) -> None:
counts, edges = np.histogram(data, bins=bins)
max_count = counts.max()
print(f"\n {title}")
print(" " + "─" * (width + 22))
for cnt, left, right in zip(counts, edges[:-1], edges[1:]):
bar = "█" * int(cnt / max_count * width)
print(f" [{left:+.3f},{right:+.3f}) │{bar:<{width}}│ {cnt:4d}")
print(" " + "─" * (width + 22))
可运行演示(补齐 Mock 数据与 print 反馈):
import numpy as np
def ascii_hist(data: np.ndarray, title: str, bins: int = 10, width: int = 36) -> None:
counts, edges = np.histogram(data, bins=bins)
max_count = counts.max()
print(f"\n {title}")
print(" " + "─" * (width + 22))
for cnt, left, right in zip(counts, edges[:-1], edges[1:]):
bar = "█" * int(cnt / max_count * width)
print(f" [{left:+.3f},{right:+.3f}) │{bar:<{width}}│ {cnt:4d}")
print(" " + "─" * (width + 22))
np.random.seed(7)
mock_returns = np.random.normal(loc=0.001, scale=0.015, size=80)
ascii_hist(mock_returns, "AAPL 模拟日收益率分布", bins=8, width=24)
print("解释: 柱子越长,说明这个收益率区间出现次数越多。")
Step 6:用 main 做 stats/corr/hist/all 脚本遥控器
痛点与机制:
main 是脚本遥控器:用户不用改代码,只要传 --mode stats/corr/hist/all 就能切换功能。它像电视遥控器的频道键,把同一份数据送到不同分析模块,既避免重复生成数据,也让脚本更像真正可交付的小工具。
核心源码(逐字来自文末完整源码):
def main() -> None:
parser = argparse.ArgumentParser(description="NumPy 股票收益率分析")
parser.add_argument("--mode", choices=["stats", "corr", "hist", "all"],
default="all", help="运行模式")
args = parser.parse_args()
returns, tickers = generate_returns()
print(f"[{nexdo_time()}] 数据生成:{returns.shape[0]} 天 × {returns.shape[1]} 只股票")
if args.mode in ("stats", "all"):
mode_stats(returns, tickers)
if args.mode in ("corr", "all"):
mode_corr(returns, tickers)
if args.mode in ("hist", "all"):
mode_hist(returns, tickers)
if __name__ == "__main__":
main()
可运行演示(补齐 Mock 数据与 print 反馈):
import argparse
import sys
import time
import numpy as np
def nexdo_time() -> str:
return time.strftime("%Y-%m-%d %H:%M:%S")
def generate_returns(n_days: int = 252, n_stocks: int = 5) -> tuple[np.ndarray, list[str]]:
"""生成模拟股票日收益率矩阵 (n_days × n_stocks)"""
np.random.seed(42)
tickers = ["AAPL", "GOOG", "MSFT", "AMZN", "TSLA"][:n_stocks]
vols = np.array([0.015, 0.018, 0.014, 0.020, 0.030])[:n_stocks]
returns = np.random.randn(n_days, n_stocks) * vols + 0.0003
return returns, tickers
def mode_stats(returns: np.ndarray, tickers: list[str]) -> None:
print("运行 stats: 输出收益率均值和波动率", tickers)
def mode_corr(returns: np.ndarray, tickers: list[str]) -> None:
print("运行 corr: 输出相关性矩阵 shape", np.corrcoef(returns.T).shape)
def mode_hist(returns: np.ndarray, tickers: list[str]) -> None:
print("运行 hist: 输出 ASCII 分布图入口", tickers[0])
def main() -> None:
parser = argparse.ArgumentParser(description="NumPy 股票收益率分析")
parser.add_argument("--mode", choices=["stats", "corr", "hist", "all"],
default="all", help="运行模式")
args = parser.parse_args()
returns, tickers = generate_returns()
print(f"[{nexdo_time()}] 数据生成:{returns.shape[0]} 天 × {returns.shape[1]} 只股票")
if args.mode in ("stats", "all"):
mode_stats(returns, tickers)
if args.mode in ("corr", "all"):
mode_corr(returns, tickers)
if args.mode in ("hist", "all"):
mode_hist(returns, tickers)
for mode in ["stats", "corr", "hist", "all"]:
print(f"\n>>> python3 31-numpy-quant.py --mode {mode}")
sys.argv = ["prog", "--mode", mode]
main()
极客实战:完整源码与运行
现在,把上面的积木拼起来,将以下完整代码放进你的编辑器,运行它。先看整体闭环,再回头逐段改参数,你会更容易建立工程直觉。
#!/usr/bin/env python3
"""
31-numpy-quant.py — NumPy 股票收益率分析
用法:
python 31-numpy-quant.py --mode stats
python 31-numpy-quant.py --mode corr
python 31-numpy-quant.py --mode hist
"""
import argparse
import time
import numpy as np
def nexdo_time() -> str:
return time.strftime("%Y-%m-%d %H:%M:%S")
def generate_returns(n_days: int = 252, n_stocks: int = 5) -> tuple[np.ndarray, list[str]]:
"""生成模拟股票日收益率矩阵 (n_days × n_stocks)"""
np.random.seed(42)
tickers = ["AAPL", "GOOG", "MSFT", "AMZN", "TSLA"][:n_stocks]
vols = np.array([0.015, 0.018, 0.014, 0.020, 0.030])[:n_stocks]
returns = np.random.randn(n_days, n_stocks) * vols + 0.0003
return returns, tickers
def moving_average(arr: np.ndarray, window: int) -> np.ndarray:
result = np.full(len(arr), np.nan)
cumsum = np.cumsum(np.insert(arr, 0, 0))
result[window - 1:] = (cumsum[window:] - cumsum[:-window]) / window
return result
def ascii_hist(data: np.ndarray, title: str, bins: int = 10, width: int = 36) -> None:
counts, edges = np.histogram(data, bins=bins)
max_count = counts.max()
print(f"\n {title}")
print(" " + "─" * (width + 22))
for cnt, left, right in zip(counts, edges[:-1], edges[1:]):
bar = "█" * int(cnt / max_count * width)
print(f" [{left:+.3f},{right:+.3f}) │{bar:<{width}}│ {cnt:4d}")
print(" " + "─" * (width + 22))
def mode_stats(returns: np.ndarray, tickers: list[str]) -> None:
print(f"\n[{nexdo_time()}] 📊 收益率统计摘要")
print(f" {'股票':<6} {'均值':>8} {'标准差':>8} {'最小值':>8} {'最大值':>8} {'夏普':>7}")
print(" " + "─" * 54)
for i, t in enumerate(tickers):
r = returns[:, i]
mean, std = r.mean(), r.std()
sharpe = mean / std * np.sqrt(252)
print(f" {t:<6} {mean:>+8.4f} {std:>8.4f} {r.min():>+8.4f} {r.max():>+8.4f} {sharpe:>7.2f}")
ma20 = moving_average(returns[:, 0], 20)
print(f"\n {tickers[0]} 20日MA(最后5日): {ma20[-5:].round(4).tolist()}")
def mode_corr(returns: np.ndarray, tickers: list[str]) -> None:
print(f"\n[{nexdo_time()}] 🔗 相关性矩阵")
corr = np.corrcoef(returns.T)
n = len(tickers)
print(" " + " " * 6 + "".join(f"{t:>8}" for t in tickers))
print(" " + "─" * (6 + 8 * n))
for i, t in enumerate(tickers):
row = "".join(f"{corr[i,j]:>8.3f}" for j in range(n))
print(f" {t:<6}{row}")
def mode_hist(returns: np.ndarray, tickers: list[str]) -> None:
print(f"\n[{nexdo_time()}] 📈 收益率分布(ASCII 柱状图)")
for i, t in enumerate(tickers):
ascii_hist(returns[:, i], f"{t} 日收益率分布")
def main() -> None:
parser = argparse.ArgumentParser(description="NumPy 股票收益率分析")
parser.add_argument("--mode", choices=["stats", "corr", "hist", "all"],
default="all", help="运行模式")
args = parser.parse_args()
returns, tickers = generate_returns()
print(f"[{nexdo_time()}] 数据生成:{returns.shape[0]} 天 × {returns.shape[1]} 只股票")
if args.mode in ("stats", "all"):
mode_stats(returns, tickers)
if args.mode in ("corr", "all"):
mode_corr(returns, tickers)
if args.mode in ("hist", "all"):
mode_hist(returns, tickers)
if __name__ == "__main__":
main()
$ python3 31-python-numpy.py --mode stats
[2026-04-18 05:17:06] 📊 收益率统计摘要
股票 均值 标准差 最小值 最大值 夏普
──────────────────────────────────────────────────────
AAPL +0.0003 0.0150 -0.0441 +0.0441 0.32
GOOG +0.0003 0.0180 -0.0529 +0.0529 0.28
MSFT +0.0003 0.0140 -0.0411 +0.0411 0.34
$ python3 31-python-numpy.py --mode hist
[2026-04-18 05:17:06] 📈 收益率分布(ASCII 柱状图)
AAPL 日收益率分布:
-0.045 |
-0.036 | ██
-0.027 | ████████
-0.018 | ████████████████
-0.009 | ████████████████████████
0.000 | ████████████████████████████
0.009 | ████████████████████████
0.018 | ████████████████
0.027 | ████████
0.036 | ██
小结
| 概念 | 一句话记忆 |
|---|---|
np.random.randn(m, n) |
生成 m×n 的标准正态分布矩阵 |
np.random.seed(42) |
固定随机种子,保证可复现 |
axis=0 |
沿行方向操作(对每列计算),结果行数减少 |
axis=1 |
沿列方向操作(对每行计算),结果列数减少 |
np.cumsum |
累积和,用差分实现 O(n) 移动平均 |
np.corrcoef(X.T) |
计算相关性矩阵,需要转置让每行是一个变量 |
np.percentile(a, 5) |
第 5 百分位数,金融里的 VaR |
| 广播 | (252,5) * (5,) 自动对齐,不需要手动 reshape |
⏱ NexDo Time(5 分钟)
挑战:用 NumPy 实现等权重投资组合的年化收益率和波动率。
具体步骤:
- 用
generate_returns()生成 5 只股票的收益率矩阵 - 等权重
weights = np.ones(5) / 5 - 组合日收益率 =
returns @ weights(矩阵乘法,@运算符) - 年化收益率 =
portfolio_returns.mean() * 252 - 年化波动率 =
portfolio_returns.std() * np.sqrt(252) - 打印结果,和单只股票的统计对比
Don’t wait for next time, do it in the next moment.