def main(unused_argv):# Get the data.data_sets = mnist.read_data_sets(FLAGS.directory,dtype=tf.uint8,reshape=False,validation_size=FLAGS.validation_size)# Convert to Examples and write the result to TFRecords.convert_to(data_sets.train, 'train')convert_to(data_sets.validation, 'validation')convert_to(data_sets.test, 'test')
import tensorflow as tf
import os
import argparse
import sysos.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'#1.0 生成TFRecords 文件
from tensorflow.contrib.learn.python.learn.datasets import mnistFLAGS = None# 編碼函數如下:
def _int64_feature(value):return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))def _bytes_feature(value):return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))def convert_to(data_set, name):"""Converts a dataset to tfrecords."""images = data_set.imageslabels = data_set.labelsnum_examples = data_set.num_examplesif images.shape[0] != num_examples:raise ValueError('Images size %d does not match label size %d.' %(images.shape[0], num_examples))rows = images.shape[1] # 28cols = images.shape[2] # 28depth = images.shape[3] # 1. 是黑白圖像,所以是單通道filename = os.path.join(FLAGS.directory, name + '.tfrecords')print('Writing', filename)writer = tf.python_io.TFRecordWriter(filename)for index in range(num_examples):image_raw = images[index].tostring()# 寫入協議緩存區,height,width,depth,label編碼成int64類型,image_raw 編碼成二進制example = tf.train.Example(features=tf.train.Features(feature={'height': _int64_feature(rows),'width': _int64_feature(cols),'depth': _int64_feature(depth),'label': _int64_feature(int(labels[index])),'image_raw': _bytes_feature(image_raw)}))writer.write(example.SerializeToString()) # 序列化為字符串writer.close()def main(unused_argv):# Get the data.data_sets = mnist.read_data_sets(FLAGS.directory,dtype=tf.uint8,reshape=False,validation_size=FLAGS.validation_size)# Convert to Examples and write the result to TFRecords.convert_to(data_sets.train, 'train')convert_to(data_sets.validation, 'validation')convert_to(data_sets.test, 'test')if __name__ == '__main__':parser = argparse.ArgumentParser()parser.add_argument('--directory',type=str,default='MNIST_data/',help='Directory to download data files and write the converted result')parser.add_argument('--validation_size',type=int,default=5000,help="""\Number of examples to separate from the training data for the validationset.\""")FLAGS, unparsed = parser.parse_known_args()tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
import tensorflow as tf
import os# from tensorflow.contrib.learn.python.learn.datasets import mnist
# 注意上面的這個mnist 與 example 中的 mnist 是不同的,本文件中請使用下面的那個 mnistos.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import argparse
import os.path
import sys
import timefrom tensorflow.examples.tutorials.mnist import mnist# Basic model parameters as external flags.
FLAGS = None# This part of the code is added by FontTian,which comes from the source code of tensorflow.examples.tutorials.mnist
# The MNIST images are always 28x28 pixels.
# IMAGE_SIZE = 28
# IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE# Constants used for dealing with the files, matches convert_to_records.
TRAIN_FILE = 'train.tfrecords'
VALIDATION_FILE = 'validation.tfrecords'def read_and_decode(filename_queue):reader = tf.TFRecordReader()_, serialized_example = reader.read(filename_queue)features = tf.parse_single_example(serialized_example,# Defaults are not specified since both keys are required.# 必須寫明faetures 中的 key 的名稱features={'image_raw': tf.FixedLenFeature([], tf.string),'label': tf.FixedLenFeature([], tf.int64),})# Convert from a scalar string tensor (whose single string has# length mnist.IMAGE_PIXELS) to a uint8 tensor with shape# [mnist.IMAGE_PIXELS].# 將一個標量字符串張量(其單個字符串的長度是mnist.image像素) # 0 維的Tensor# 轉換為一個帶有形狀mnist.圖像像素的uint8張量。 # 一維的Tensorimage = tf.decode_raw(features['image_raw'], tf.uint8)# print(tf.shape(image)) # Tensor("input/Shape:0", shape=(1,), dtype=int32)image.set_shape([mnist.IMAGE_PIXELS])# print(tf.shape(image)) # Tensor("input/Shape_1:0", shape=(1,), dtype=int32)# OPTIONAL: Could reshape into a 28x28 image and apply distortions# here. Since we are not applying any distortions in this# example, and the next step expects the image to be flattened# into a vector, we don't bother.# Convert from [0, 255] -> [-0.5, 0.5] floats.image = tf.cast(image, tf.float32) * (1. / 255) - 0.5# print(tf.shape(image)) # Tensor("input/Shape_2:0", shape=(1,), dtype=int32)# Convert label from a scalar uint8 tensor to an int32 scalar.label = tf.cast(features['label'], tf.int32)# print(tf.shape(label)) # Tensor("input/Shape_3:0", shape=(0,), dtype=int32)return image, label# 使用 tf.train.shuffle_batch 將前面生成的樣本隨機化,獲得一個最小批次的張量
def inputs(train, batch_size, num_epochs):"""Reads input data num_epochs times.Args:train: Selects between the training (True) and validation (False) data.batch_size: Number of examples per returned batch.num_epochs: Number of times to read the input data, or 0/None totrain forever.Returns:A tuple (images, labels), where:* images is a float tensor with shape [batch_size, mnist.IMAGE_PIXELS]in the range [-0.5, 0.5].* labels is an int32 tensor with shape [batch_size] with the true label,a number in the range [0, mnist.NUM_CLASSES).Note that an tf.train.QueueRunner is added to the graph, whichmust be run using e.g. tf.train.start_queue_runners().輸入參數:train: Selects between the training (True) and validation (False) data.batch_size: 訓練的每一批有多少個樣本num_epochs: 讀取輸入數據的次數, or 0/None 表示永遠訓練下去返回結果:A tuple (images, labels), where:* images is a float tensor with shape [batch_size, mnist.IMAGE_PIXELS]范圍: [-0.5, 0.5].* labels is an int32 tensor with shape [batch_size] with the true label,范圍: [0, mnist.NUM_CLASSES).注意 : tf.train.QueueRunner 被添加進 graph, 它必須用 tf.train.start_queue_runners() 來啟動線程."""if not num_epochs: num_epochs = Nonefilename = os.path.join(FLAGS.train_dir,TRAIN_FILE if train else VALIDATION_FILE)with tf.name_scope('input'):# tf.train.string_input_producer 返回一個 QueueRunner,里面有一個 FIFQueuefilename_queue = tf.train.string_input_producer([filename], num_epochs=num_epochs)# 如果樣本數據很大,可以分成若干文件,把文件名列表傳入# Even when reading in multiple threads, share the filename queue.image, label = read_and_decode(filename_queue)# Shuffle the examples and collect them into batch_size batches.# (Internally uses a RandomShuffleQueue.)# We run this in two threads to avoid being a bottleneck.images, sparse_labels = tf.train.shuffle_batch([image, label], batch_size=batch_size, num_threads=2,capacity=1000 + 3 * batch_size,# Ensures a minimum amount of shuffling of examples.# 留下一部分隊列,來保證每次有足夠的數據做隨機打亂min_after_dequeue=1000)return images, sparse_labelsdef run_training():"""Train MNIST for a number of steps."""# Tell TensorFlow that the model will be built into the default Graph.with tf.Graph().as_default():# Input images and labels.images, labels = inputs(train=True, batch_size=FLAGS.batch_size,num_epochs=FLAGS.num_epochs)# 構建一個從推理模型來預測數據的圖logits = mnist.inference(images,FLAGS.hidden1,FLAGS.hidden2)# Add to the Graph the loss calculation.# 定義損失函數loss = mnist.loss(logits, labels)# 將模型添加到圖操作中train_op = mnist.training(loss, FLAGS.learning_rate)# 初始化變量的操作init_op = tf.group(tf.global_variables_initializer(),tf.local_variables_initializer())# Create a session for running operations in the Graph.# 在圖中創建一個用于運行操作的會話sess = tf.Session()# 初始化變量,注意:string_input_product 內部創建了一個epoch計數器sess.run(init_op)# Start input enqueue threads.coord = tf.train.Coordinator()threads = tf.train.start_queue_runners(sess=sess, coord=coord)try:step = 0while not coord.should_stop():start_time = time.time()# Run one step of the model. The return values are# the activations from the `train_op` (which is# discarded) and the `loss` op. To inspect the values# of your ops or variables, you may include them in# the list passed to sess.run() and the value tensors# will be returned in the tuple from the call._, loss_value = sess.run([train_op, loss])duration = time.time() - start_time# Print an overview fairly often.if step % 100 == 0:print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value,duration))step += 1except tf.errors.OutOfRangeError:print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step))finally:# 通知其他線程關閉coord.request_stop()# Wait for threads to finish.coord.join(threads)sess.close()def main(_):run_training()if __name__ == '__main__':parser = argparse.ArgumentParser()parser.add_argument('--learning_rate',type=float,default=0.01,help='Initial learning rate.')parser.add_argument('--num_epochs',type=int,default=2,help='Number of epochs to run trainer.')parser.add_argument('--hidden1',type=int,default=128,help='Number of units in hidden layer 1.')parser.add_argument('--hidden2',type=int,default=32,help='Number of units in hidden layer 2.')parser.add_argument('--batch_size',type=int,default=100,help='Batch size.')parser.add_argument('--train_dir',type=str,default='/tmp/data',help='Directory with the training data.')FLAGS, unparsed = parser.parse_known_args()tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)