Data Analysis

Posted wykxldz

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Data Analysis相关的知识,希望对你有一定的参考价值。

写在前面
该篇主要讲述的是数据分析的代码实现:
包含:
Linear Regression
Logistic_Regression
Support Vector Machine
Convolution Neural Network

Linear Regression

import tensorflow as tf
import numpy as np

def read_data():
 """读取数据"""
 pass

X = tf.placeholder(tf.float32, shape=[3,1], name="X")
Y = tf.placeholder(tf.float32, name="Y")

w = tf.Variable(tf.random_normal(shape[3,1], dtype=tf.float32), name="weight")
b = tf.Variable(.0, dtype=tf.float32, name="bias")

y_predict = tf.matmul(X, w) + b
loss = tf.square(Y - Y_predicted, name=‘loss‘)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=.001).minimize(loss)

with tf.Session() as sess:
 sess.run(tf.global_variables_initializer()) 
 
 for i in range(epochs):
  total_loss = 0
  for j in range(len(data)):
   """read data"""
   x_ = read_x_data()
   y_ = read_y_data()
   """add some functions to get the precise data of x and y"""
   _, loss_ = sess.run([optimizer, loss], feed_dict={X:x_, Y:y_})
   total_loss += loss_
 
 w_, b_ = sess.run([w,b])

Logits Regression

import padas # mabey useful
import tensorflow as tf
import numpy as np

csv_path = "./data/haberman.csv"
data = pandas.read_csv(csv_path).to_numpy()

X = tf.placeholder(tf.float32, shape=[1,3], name="X")
Y = tf.placeholder(tf.float32, name="Y")mize

w = tf.Variable(tf.random_normal(shape=[3,1], dtype=tf.float32), name="weight")
b = tf.Variable(.0, dtype=tf.float32 ,name="bias")

logits = tf.matmul(X , w)+ b
entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=Y, name="sigmoid")
loss = tf.reduce_mean(entropy)

optimizer = tf.train.AdamOptimizer(learning_rate=.01).minimize(loss)

#Train
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    epoches = 10
    for n in range(epoches):
        total_loss = 0
        for i in range(int(len(data) * .8)):
            x_data = np.array(data[i][:3]).reshape((1, 3))
            y_data = float(data[i][3:] - 1)
            # print([x_data, y_data])
            _, loss_ = sess.run([optimizer, loss], feed_dict={X: x_data, Y: y_data})
            total_loss += loss_

        print("total loss : {0}".format(total_loss/ int(len(data) * .8)))
    w, b = sess.run([w, b])
    print([w,b])

# Test
with tf.Session() as sess:
    print("start test!")
    Accurency = 0
    for i in range(int(len(data)*.8), len(data)):
        x_data = np.array(data[i][:3]).reshape((1, 3))
        y_data = float(data[i][3:] - 1)
        value = tf.nn.sigmoid(logits)
        sess.run(tf.global_variables_initializer())
        # print([sess.run(value,feed_dict={X:x_data, Y: y_data})[0][0],y_data])
        Accurency += sess.run(tf.cast(tf.equal(sess.run(value, feed_dict={X:x_data, Y: y_data})[0][0], y_data), tf.float32))

    print("Accurency : {0}%".format(Accurency/int(len(data)*.2) * 100))

print("Done!")

SVM

import numpy as np
from sklearn import svm

path_file = "./Image_Files/fer2013.csv"

def data_get(type):
 """param type:Training or Test"""
 pass
train_dataX, train_dataY = data_get("Training")

clf = svm.SVC()
clf.fit(train_dataX, train_dataY)
print(clf.score(trai_dataX, train_dataY))

Convolution Neural Network

import tensorflow as tf
import numpy as np
from PIL import Image

X = tf.placeholder(tf.float32, [28, 28], name="X_placeholder")
Y = tf.placeholder(tf.float32, [10], name="Y_placeholder")

# cnn
images = tf.reshape(X, shape=[-1, 28, 28, 1])
kernel = tf.get_variable(‘kernel‘, [5,5,1,16], initializer=tf.truncated_normal_initializer())

biases = tf.get_variable(‘bias‘, [16], initializer=tf.random_normal_initializer())

conv = tf.nn.conv2d(images, kernel, strides=[1,1,1,1], padding="SAME")
conv = tf.nn.relu(conv + biases, name="conv")

# pool
pool = tf.nn.max_pool(conv, ksize=[1,2,2,1], strides=[1,2,2,1], padding=‘SAME‘)
pool = tf.reshape(pool, [-1,14*14*16])

# linear processing
w1 = tf.get_variable(‘weights‘, [14*14*16, 1024], initializer=tf.truncated_normal_initializer())
b1 = tf.get_variable(‘biases‘, [1024], initializer=tf.random_normal_initializer())

f = tf.nn.relu(tf.matmul(pool, w1) + b1, name="relu")
# dropout
f = tf.nn.dropout(f, .75, name=‘relu_dropout‘)

# linear processing
w = tf.get_variable(‘weights_‘, [1024, 10], initializer=tf.truncated_normal_initializer())
b = tf.get_variable(‘bias_‘, [10], initializer=tf.random_normal_initializer())

# logits regression
logits = tf.matmul(f, w) + b

entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y, name="entropy")
loss = tf.reduce_mean(entropy)
optimizer = tf.train.AdamOptimizer(learning_rate=.001).minimize(loss)

# get datas
train_data_images = load_train_images()
train_data_label = load_train_labels()
test_data_images = load_test_images()
test_data_label = load_test_labels()

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    # Train
    total_loss = 0
    for i in range(len(train_data_images)):
        x_data = train_data_images[i]
        y_data = train_data_label[i]
        _, loss_ = sess.run([optimizer, loss], feed_dict={X: x_data, Y: y_data})
        total_loss += loss_
    print("total loss : {0}".format(total_loss))
 # Test
    total_loss = 0
    w, b = sess.run([w,b])
    for i in range(len(test_data_images)):
        x_data = train_data_images[i]
        y_data = train_data_label[i]
        softmax_ = tf.nn.softmax(logits)
        pred = tf.equal(tf.argmax(softmax_, 1), tf.argmax(Y,1))

        total_loss += sess.run(pred, feed_dict={X: x_data, Y: y_data})
    print("total loss : {0}".format(total_loss))







以上是关于Data Analysis的主要内容,如果未能解决你的问题,请参考以下文章

python for data analysis chapter1~2

idea中analysis之analyze data flow功能体验

idea中analysis之analyze data flow功能体验

AMS 315 Data Analysis

Data Analysis with Python : Exercise- Titantic Survivor Analysis | packtpub.com

AMS 315 Data Analysis方法详细解说