一、引入相关库

%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
font=FontProperties(fname=r'c:/windows/fonts/msyh.ttf',size=10)

二、一元回归范例

def runplt():
    plt.figure()
    plt.title(u'披萨价格与直径数据',fontproperties=font)
    plt.xlabel(u'直径(英寸)',fontproperties=font)
    plt.ylabel(u'价格(美元)',fontproperties=font)
    plt.axis([0,25,0,25])
    plt.grid(True)#是否显示网格
    return plt
plt=runplt()
x=[[6],[8],[10],[14],[18]]
y=[[7],[9],[13],[17.5],[18]]
plt.plot(x,y,'k.')
plt.show()

 

Python_sklearn机器学习库学习笔记(一)_一元回归

 

三、利用sklearn建立一元回归

from sklearn.linear_model import LinearRegression
#创建并拟合模型
model=LinearRegression()
model.fit(x,y)
print('预测一张12英寸的披萨价格:')

## 波士顿房屋价格,SGDRegressor

import numpy as np
from sklearn.datasets import load_boston
from sklearn.linear_model import SGDRegressor#随机梯度
from sklearn.cross_validation import cross_val_score
from sklearn.preprocessing import StandardScaler#列归一化,标准正态分布形式
from sklearn.cross_validation import train_test_split#分割训练集和测试集,默认值是25%

data=load_boston()
X_train,X_test,y_train,y_test=train_test_split(data.data,data.target)
#归一化
X_scaler=StandardScaler()
y_scaler=StandardScaler()
X_train=X_scaler.fit_transform(X_train)#训练并转换
y_train=y_scaler.fit_transform(y_train)
#对测试样本归一转换
X_test=X_scaler.transform(X_test)
y_test=y_scaler.transform(y_test)
#训练并测试样本
regression=SGDRegressor(loss='squared_loss')
scores=cross_val_score(regression,X_train,y_train,cv=5)#cv=5训练五次
print scores
print 'Cross validation r-squared score:',np.mean(scores)
regression.fit_transform(X_train,y_train)
print 'Test set r-squared score:',regression.score(X_test,y_test)

输出结果:

[ 0.65592082  0.71571537  0.79468123  0.69650452  0.67266115]
Cross validation r-squared score: 0.707096620395
Test set r-squared score: 0.677424272546