一文搞懂tensorflow2.0(1)（一文搞懂麦克斯韦方程）-技术教程-四时宝库

基本数据类型

tf.int32：tf.constant(1)

tf.float64：tf.constant(1.)

tf.int64：tf.constant(1, dtype=tf.int64)

tf.float64：tf.constant(1. , dtype=tf.double)

tf.bool：tf.constant([True, False])

tf.string：tf.constant('hello world')

类型转换

tf.cast(x, dtype, name=None)

eg:

a=tf.constant(1.)
print(a.dtype)
b=tf.cast(a,tf.int32)
print(b.dtype)

待优化张量

tf.Variable()

eg:

a=tf.constant([-1,2,0])
b=tf.Variable(a)
b.name, b.trainable #张量属性

创建张量

tf.convert_to_tensor(data)

tf.constant()

tf.zeros() 创建全为0张量

tf.ones() 创建全为1张量

tf.zeros_like(a) 创建形状为a的全为0张量

tf.ones_like(a) 创建形状为a的全为1张量

tf.fill(dims, value) 创建自定义数值张量

tf.random.normal(shape,mean=0.0,stddev=1.0,dtype=tf.float32)

创建形状为shape，均值为mean，标准差为stddev的正态分布张量

tf.random.uniform(shape,minval=0,maxval=None,dtype=tf.float32)

创建采样自[minval,maxval)区间的均匀分布张量

创建序列

tf.range(start, limit=None, delta=1)

创建[start,limit),步长为delta的序列

索引与切片

基本：a[idx][idx][idx]

numpy风格：a[idx,idx,idx]

切片(与numpy基本一致)

a[start:end:positive_step]

a[end:start:negative_step]

维度变换

tf.reshape(tensor, shape) 改变数据形状

tf.expand_dims(input, axis) 增加维度

tf.squeeze(input, axis=None) 减少维度

tf.transpose(a, perm=None) 交换维度

数学运算

tf.pow 乘方

tf.square 平方

tf.sqrt 平方根

tf.exp 指数运算

tf.matf.log 对数运算

tf.matmul 矩阵相乘

合并与分割

tf.concat(values, axis) 在原来的维度上累加

tf.stack(values, axis=0) 在0维度处创建一个新维度

tf.split(value, num_or_size_splits, axis=0) 张量的分割

tf.unstack(value, axis=0) value的维度0上分割成多个张量，数量是这个维度的长度

数据统计

tf.norm(tensor,ord=1) 计算L1范数

tf.norm(tensor,ord=2) 计算L2范数

tf.reduce_max 统计最大值

tf.reduce_min 统计最小值

tf.reduce_mean 统计均值

tf.reduce_sum 求和

tf.argmax 求最大值的索引

tf.argmin 求最小值的索引

tf.equal 张量比较

eg：

#计算平均误差
out = tf.random.normal([4,10]) # 模拟网络预测输出
y = tf.constant([1,2,2,0]) # 模拟真实标签
y = tf.one_hot(y,depth=10) # one-hot 编码
loss = tf.keras.losses.mse(y,out) # 计算每个样本的误差
loss = tf.reduce_mean(loss) # 平均误差，在样本数维度上取均值
loss # 误差标量

out=tf.random.normal([2,10]) # 模拟网络输出
out=tf.nn.softmax(out, axis=1) # 通过softmax转换为概率值
pred=tf.argmax(out, axis=1) # 选取概率最大的位置
pred # 得出预测类别

填充与复制

tf.pad(tensor,paddings) 填充

tf.tile(input, multiples) 复制

eg：

x=tf.random.normal([4,28,28,3])
tf.pad(x,[[0,0],[2,2],[2,2],[0,0]]) # 图片上下左右分别填充2个单元
tf.tile(x,[1,3,3,1]) # 图片的高和宽方向分别复制2份

数据限幅

tf.maximum(x,a) 数据的下限幅x∈[a, +∞)

tf.minimum(x,a) 数据的上限幅x∈(-∞，a]

其他高级操作

tf.gather 可以根据索引号收集数据

tf.gather_nd 根据多维坐标收集数据

tf.boolean_mask 根据掩码方式采集数据

tf.where(cond,a,b) 根据cond从a,b中挑选元素

tf.scatter_nd(indices, updates, shape) 刷新张量的部分数据

tf.meshgrid 生成二维网格的采样点坐标

MNIST实战

import  tensorflow as tf
from    tensorflow import keras
from    tensorflow.keras import datasets, layers, optimizers

# 预处理
def preprocess(x, y):
    x = tf.cast(x, dtype=tf.float32) / 255. #标准化
    x = tf.reshape(x, [-1, 28*28]) #打平
    y = tf.cast(y, dtype=tf.int32) #转出整型张量
    y = tf.one_hot(y, depth=10) #one_hot编码
    return x,y


(x, y), (x_test, y_test) = datasets.mnist.load_data() #加载数据
print('x:', x.shape, 'y:', y.shape, 'x test:', x_test.shape, 'y test:', y_test.shape)
train_db = tf.data.Dataset.from_tensor_slices((x, y)) #构建Dataset对象
train_db = train_db.shuffle(60000).batch(128).map(preprocess) #随机打散和进行批量处理

test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_db = test_db.shuffle(10000).batch(128).map(preprocess)
x,y = next(iter(train_db))
print('train sample:', x.shape, y.shape)

def main():

    # 学习率
    lr = 0.001
    # 784 => 512
    w1 = tf.Variable(tf.random.truncated_normal([784, 512], stddev=0.1)) # 梯度只会跟踪tf.Variable类型的变量
    b1 = tf.Variable(tf.zeros([512]))
    # 512 => 256
    w2 = tf.Variable(tf.random.truncated_normal([512, 256], stddev=0.1))
    b2 = tf.Variable(tf.zeros([256]))
    # 256 => 10
    w3 = tf.Variable(tf.random.truncated_normal([256, 10], stddev=0.1))
    b3 = tf.Variable(tf.zeros([10]))
    
    loss_list=[]
    acc_list=[]
    
    for epoch in range(30):
        for step, (x,y) in enumerate(train_db):
            with tf.GradientTape() as tape:
                
                # layer1.
                h1 = x @ w1 + b1
                h1 = tf.nn.relu(h1)
                # layer2
                h2 = h1 @ w2 + b2
                h2 = tf.nn.relu(h2)
                # output
                out = h2 @ w3 + b3
                #out = tf.nn.relu(out)

                # compute loss
                # [b, 10] - [b, 10]
                loss = tf.square(y-out)
                # [b, 10] => [b]
                loss = tf.reduce_mean(loss, axis=1)
                # [b] => scalar
                loss = tf.reduce_mean(loss)
                
            # compute gradient
            grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
            # for g in grads:
            # print(tf.norm(g))
            # update w' = w - lr*grad
            for p, g in zip([w1, b1, w2, b2, w3, b3], grads):
                p.assign_sub(lr * g) # assign_sub 原地更新， 不会改变变量类型

            if step % 100 == 0:
                loss_list.append(float(loss))
                print(step, 'loss:', float(loss))

            # evaluate
            if step % 500 == 0:
                total, total_correct = 0., 0

                for step, (x, y) in enumerate(test_db):
                    # layer1.
                    h1 = x @ w1 + b1
                    h1 = tf.nn.relu(h1)
                    # layer2
                    h2 = h1 @ w2 + b2
                    h2 = tf.nn.relu(h2)
                    # output
                    out = h2 @ w3 + b3
                    # [b, 10] => [b]
                    pred = tf.argmax(out, axis=1)
                    # convert one_hot y to number y
                    y = tf.argmax(y, axis=1)
                    # bool type
                    correct = tf.equal(pred, y)
                    # bool tensor => int tensor => numpy
                    total_correct += tf.reduce_sum(tf.cast(correct, dtype=tf.int32)).numpy()
                    total += x.shape[0]

                acc_list.append(total_correct/total)
                print(step, 'Evaluate Acc:', total_correct/total)
    return loss_list,acc_list


#运行
loss,acc=main()

#绘图
import matplotlib.pyplot as plt

epochs1=range(1, len(loss)+1)
plt.plot(epochs1, loss, 'b', label='loss',color='coral')
plt.ylim(0,1)
plt.title('loss')
plt.legend()

plt.figure()
epochs2=range(1, len(acc)+1)
plt.plot(epochs2, acc, 'b', label='acc',color='coral')
plt.ylim(0,1)
plt.title('acc')
plt.legend()

plt.show()

四时宝库

程序员的知识宝库

一文搞懂tensorflow2.0(1)（一文搞懂麦克斯韦方程）