Data Analysis
Posted wykxldz
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Data Analysis相关的知识,希望对你有一定的参考价值。
写在前面
该篇主要讲述的是数据分析的代码实现:
包含:
Linear Regression
Logistic_Regression
Support Vector Machine
Convolution Neural Network
Linear Regression
import tensorflow as tf
import numpy as np
def read_data():
"""读取数据"""
pass
X = tf.placeholder(tf.float32, shape=[3,1], name="X")
Y = tf.placeholder(tf.float32, name="Y")
w = tf.Variable(tf.random_normal(shape[3,1], dtype=tf.float32), name="weight")
b = tf.Variable(.0, dtype=tf.float32, name="bias")
y_predict = tf.matmul(X, w) + b
loss = tf.square(Y - Y_predicted, name=‘loss‘)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=.001).minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(epochs):
total_loss = 0
for j in range(len(data)):
"""read data"""
x_ = read_x_data()
y_ = read_y_data()
"""add some functions to get the precise data of x and y"""
_, loss_ = sess.run([optimizer, loss], feed_dict={X:x_, Y:y_})
total_loss += loss_
w_, b_ = sess.run([w,b])
Logits Regression
import padas # mabey useful
import tensorflow as tf
import numpy as np
csv_path = "./data/haberman.csv"
data = pandas.read_csv(csv_path).to_numpy()
X = tf.placeholder(tf.float32, shape=[1,3], name="X")
Y = tf.placeholder(tf.float32, name="Y")mize
w = tf.Variable(tf.random_normal(shape=[3,1], dtype=tf.float32), name="weight")
b = tf.Variable(.0, dtype=tf.float32 ,name="bias")
logits = tf.matmul(X , w)+ b
entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=Y, name="sigmoid")
loss = tf.reduce_mean(entropy)
optimizer = tf.train.AdamOptimizer(learning_rate=.01).minimize(loss)
#Train
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
epoches = 10
for n in range(epoches):
total_loss = 0
for i in range(int(len(data) * .8)):
x_data = np.array(data[i][:3]).reshape((1, 3))
y_data = float(data[i][3:] - 1)
# print([x_data, y_data])
_, loss_ = sess.run([optimizer, loss], feed_dict={X: x_data, Y: y_data})
total_loss += loss_
print("total loss : {0}".format(total_loss/ int(len(data) * .8)))
w, b = sess.run([w, b])
print([w,b])
# Test
with tf.Session() as sess:
print("start test!")
Accurency = 0
for i in range(int(len(data)*.8), len(data)):
x_data = np.array(data[i][:3]).reshape((1, 3))
y_data = float(data[i][3:] - 1)
value = tf.nn.sigmoid(logits)
sess.run(tf.global_variables_initializer())
# print([sess.run(value,feed_dict={X:x_data, Y: y_data})[0][0],y_data])
Accurency += sess.run(tf.cast(tf.equal(sess.run(value, feed_dict={X:x_data, Y: y_data})[0][0], y_data), tf.float32))
print("Accurency : {0}%".format(Accurency/int(len(data)*.2) * 100))
print("Done!")
SVM
import numpy as np
from sklearn import svm
path_file = "./Image_Files/fer2013.csv"
def data_get(type):
"""param type:Training or Test"""
pass
train_dataX, train_dataY = data_get("Training")
clf = svm.SVC()
clf.fit(train_dataX, train_dataY)
print(clf.score(trai_dataX, train_dataY))
Convolution Neural Network
import tensorflow as tf
import numpy as np
from PIL import Image
X = tf.placeholder(tf.float32, [28, 28], name="X_placeholder")
Y = tf.placeholder(tf.float32, [10], name="Y_placeholder")
# cnn
images = tf.reshape(X, shape=[-1, 28, 28, 1])
kernel = tf.get_variable(‘kernel‘, [5,5,1,16], initializer=tf.truncated_normal_initializer())
biases = tf.get_variable(‘bias‘, [16], initializer=tf.random_normal_initializer())
conv = tf.nn.conv2d(images, kernel, strides=[1,1,1,1], padding="SAME")
conv = tf.nn.relu(conv + biases, name="conv")
# pool
pool = tf.nn.max_pool(conv, ksize=[1,2,2,1], strides=[1,2,2,1], padding=‘SAME‘)
pool = tf.reshape(pool, [-1,14*14*16])
# linear processing
w1 = tf.get_variable(‘weights‘, [14*14*16, 1024], initializer=tf.truncated_normal_initializer())
b1 = tf.get_variable(‘biases‘, [1024], initializer=tf.random_normal_initializer())
f = tf.nn.relu(tf.matmul(pool, w1) + b1, name="relu")
# dropout
f = tf.nn.dropout(f, .75, name=‘relu_dropout‘)
# linear processing
w = tf.get_variable(‘weights_‘, [1024, 10], initializer=tf.truncated_normal_initializer())
b = tf.get_variable(‘bias_‘, [10], initializer=tf.random_normal_initializer())
# logits regression
logits = tf.matmul(f, w) + b
entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y, name="entropy")
loss = tf.reduce_mean(entropy)
optimizer = tf.train.AdamOptimizer(learning_rate=.001).minimize(loss)
# get datas
train_data_images = load_train_images()
train_data_label = load_train_labels()
test_data_images = load_test_images()
test_data_label = load_test_labels()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# Train
total_loss = 0
for i in range(len(train_data_images)):
x_data = train_data_images[i]
y_data = train_data_label[i]
_, loss_ = sess.run([optimizer, loss], feed_dict={X: x_data, Y: y_data})
total_loss += loss_
print("total loss : {0}".format(total_loss))
# Test
total_loss = 0
w, b = sess.run([w,b])
for i in range(len(test_data_images)):
x_data = train_data_images[i]
y_data = train_data_label[i]
softmax_ = tf.nn.softmax(logits)
pred = tf.equal(tf.argmax(softmax_, 1), tf.argmax(Y,1))
total_loss += sess.run(pred, feed_dict={X: x_data, Y: y_data})
print("total loss : {0}".format(total_loss))
以上是关于Data Analysis的主要内容,如果未能解决你的问题,请参考以下文章
python for data analysis chapter1~2
idea中analysis之analyze data flow功能体验
idea中analysis之analyze data flow功能体验
Data Analysis with Python : Exercise- Titantic Survivor Analysis | packtpub.com