# coding: utf-8

get_ipython().run_line_magic('matplotlib', 'notebook')

import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.contrib.learn as skflow
from sklearn.utils import shuffle
import numpy as np
import pandas as pd
import os 
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
print(tf.__version__)
print(tf.test.is_gpu_available())


# ** 数据集简介 **

# 本数据集包含与波士顿房价相关的多个因素:<br>
# ** CRIM **:城镇人均犯罪率<br>
# ** ZN **:住宅用地超过25000 sq.ft. 的比例<br>
# ** INDUS ** : 城镇非零售商用土地的比例<br>
# ** CHAS **:Charles河空变量(如果边界是河流,则为1;否则,为0)<br>
# ** NOX **:一氧化氮浓度<br>
# ** RM **:住宅平均房间数<br>
# **AGE **:1940年之前建成的自用房屋比例<br>
# ** DIS **:到波士顿5个中心区域的加权距离<br>
# ** RAD **:辐射性公路的靠近指数<br>
# ** TAX **:每1万美元的全值财产税率<br>
# ** PTRATIO **:城镇师生比例<br>
# ** LSTAT **:人口中地位低下者的比例<br>
# ** MEDV **:自住房的平均房价,单位:千美元<br>

# ** 数据集以CSV格式存储,可通过Pandas库读取并进行格式转换 **

# ** Pandas库 **可以帮助我们快速读取常规大小的数据文件<br>
# 能够读取CVS文件, 文本文件、MS Excel、SQL数据库以及用于科学用途的HDF5格式文件<br>
# 自动转换为Numpy的多维阵列

# ** 通过Pandas导入数据 **

# In[2]:


df = pd.read_csv("data/boston.csv", header=0)
print (df.describe())


df = np.array(df)

for i in range(12):
    df[:,i] = (df[:,i]-df[:,i].min())/(df[:,i].max()-df[:,i].min())
x_data = df[:,:12]
y_data = df[:,12]




x = tf.placeholder(tf.float32, [None,12], name = "x") # 3个影响因素
y = tf.placeholder(tf.float32, [None,1], name = "y")


with tf.name_scope("Model"):
    w = tf.Variable(tf.random_normal([12,1], stddev=0.01), name="w0")
    b = tf.Variable(1., name="b0")
    def model(x, w, b):
        return tf.matmul(x, w) + b

    pred= model(x, w, b)


train_epochs = 500 # 迭代次数
learning_rate = 0.01 #学习率

with tf.name_scope("LossFunction"):
    loss_function = tf.reduce_mean(tf.pow(y-pred, 2)) #均方误差MSE

optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss_function)

sess = tf.Session()
init = tf.global_variables_initializer()

tf.train.write_graph(sess.graph, 'log2/boston','graph.pbtxt')

loss_op = tf.summary.scalar("loss", loss_function)
merged = tf.summary.merge_all()

sess.run(init)

writer = tf.summary.FileWriter('log/boston', sess.graph) 

loss_list = []
for epoch in range (train_epochs):
    loss_sum=0.0
    for xs, ys in zip(x_data, y_data):   
        z1 = xs.reshape(1,12)
        z2 = ys.reshape(1,1)
        _,loss = sess.run([optimizer,loss_function], feed_dict={x: z1, y: z2}) 
        summary_str = sess.run(loss_op, feed_dict={x: z1, y: z2})
        #lossv+=sess.run(loss_function, feed_dict={x: z1, y: z2})/506.00
        loss_sum = loss_sum + loss
       # loss_list.append(loss)
        writer.add_summary(summary_str, epoch) 
    x_data, y_data = shuffle(x_data, y_data)
    print (loss_sum)
    b0temp=b.eval(session=sess)
    w0temp=w.eval(session=sess)
    loss_average = loss_sum/len(y_data)
    loss_list.append(loss_average)
    print("epoch=", epoch+1,"loss=",loss_average,"b=", b0temp,"w=", w0temp )
    

print("y=",w0temp[0], "x1+",w0temp[1], "x2+",w0temp[2], "x3+", [b0temp])
print("y=",w0temp[0], "CRIM+", w0temp[1], 'DIS+', w0temp[2], "LSTAT+", [b0temp])

plt.plot(loss_list)