зеркало из https://github.com/microsoft/caffe.git
Merge pull request #1008 from mohomran/mnist_with_lmdb
convert MNIST demo to lmdb, fixes
This commit is contained in:
Коммит
8bf1e60cdc
|
@ -2,27 +2,37 @@
|
||||||
// This script converts the MNIST dataset to the leveldb format used
|
// This script converts the MNIST dataset to the leveldb format used
|
||||||
// by caffe to perform classification.
|
// by caffe to perform classification.
|
||||||
// Usage:
|
// Usage:
|
||||||
// convert_mnist_data input_image_file input_label_file output_db_file
|
// convert_mnist_data [FLAGS] input_image_file input_label_file
|
||||||
|
// output_db_file
|
||||||
// The MNIST dataset could be downloaded at
|
// The MNIST dataset could be downloaded at
|
||||||
// http://yann.lecun.com/exdb/mnist/
|
// http://yann.lecun.com/exdb/mnist/
|
||||||
|
|
||||||
|
#include <gflags/gflags.h>
|
||||||
|
#include <glog/logging.h>
|
||||||
|
#include <google/protobuf/text_format.h>
|
||||||
|
#include <leveldb/db.h>
|
||||||
|
#include <leveldb/write_batch.h>
|
||||||
|
#include <lmdb.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
|
||||||
#include <fstream> // NOLINT(readability/streams)
|
#include <fstream> // NOLINT(readability/streams)
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
#include "glog/logging.h"
|
|
||||||
#include "google/protobuf/text_format.h"
|
|
||||||
#include "leveldb/db.h"
|
|
||||||
#include "stdint.h"
|
|
||||||
|
|
||||||
#include "caffe/proto/caffe.pb.h"
|
#include "caffe/proto/caffe.pb.h"
|
||||||
|
|
||||||
|
using namespace caffe; // NOLINT(build/namespaces)
|
||||||
|
using std::string;
|
||||||
|
|
||||||
|
DEFINE_string(backend, "lmdb", "The backend for storing the result");
|
||||||
|
|
||||||
uint32_t swap_endian(uint32_t val) {
|
uint32_t swap_endian(uint32_t val) {
|
||||||
val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF);
|
val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF);
|
||||||
return (val << 16) | (val >> 16);
|
return (val << 16) | (val >> 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
void convert_dataset(const char* image_filename, const char* label_filename,
|
void convert_dataset(const char* image_filename, const char* label_filename,
|
||||||
const char* db_filename) {
|
const char* db_path, const string& db_backend) {
|
||||||
// Open files
|
// Open files
|
||||||
std::ifstream image_file(image_filename, std::ios::in | std::ios::binary);
|
std::ifstream image_file(image_filename, std::ios::in | std::ios::binary);
|
||||||
std::ifstream label_file(label_filename, std::ios::in | std::ios::binary);
|
std::ifstream label_file(label_filename, std::ios::in | std::ios::binary);
|
||||||
|
@ -51,55 +61,139 @@ void convert_dataset(const char* image_filename, const char* label_filename,
|
||||||
image_file.read(reinterpret_cast<char*>(&cols), 4);
|
image_file.read(reinterpret_cast<char*>(&cols), 4);
|
||||||
cols = swap_endian(cols);
|
cols = swap_endian(cols);
|
||||||
|
|
||||||
// Open leveldb
|
// lmdb
|
||||||
|
MDB_env *mdb_env;
|
||||||
|
MDB_dbi mdb_dbi;
|
||||||
|
MDB_val mdb_key, mdb_data;
|
||||||
|
MDB_txn *mdb_txn;
|
||||||
|
// leveldb
|
||||||
leveldb::DB* db;
|
leveldb::DB* db;
|
||||||
leveldb::Options options;
|
leveldb::Options options;
|
||||||
options.create_if_missing = true;
|
|
||||||
options.error_if_exists = true;
|
options.error_if_exists = true;
|
||||||
leveldb::Status status = leveldb::DB::Open(
|
options.create_if_missing = true;
|
||||||
options, db_filename, &db);
|
options.write_buffer_size = 268435456;
|
||||||
CHECK(status.ok()) << "Failed to open leveldb " << db_filename
|
leveldb::WriteBatch* batch = NULL;
|
||||||
<< ". Is it already existing?";
|
|
||||||
|
|
||||||
|
// Open db
|
||||||
|
if (db_backend == "leveldb") { // leveldb
|
||||||
|
LOG(INFO) << "Opening leveldb " << db_path;
|
||||||
|
leveldb::Status status = leveldb::DB::Open(
|
||||||
|
options, db_path, &db);
|
||||||
|
CHECK(status.ok()) << "Failed to open leveldb " << db_path
|
||||||
|
<< ". Is it already existing?";
|
||||||
|
batch = new leveldb::WriteBatch();
|
||||||
|
} else if (db_backend == "lmdb") { // lmdb
|
||||||
|
LOG(INFO) << "Opening lmdb " << db_path;
|
||||||
|
CHECK_EQ(mkdir(db_path, 0744), 0)
|
||||||
|
<< "mkdir " << db_path << "failed";
|
||||||
|
CHECK_EQ(mdb_env_create(&mdb_env), MDB_SUCCESS) << "mdb_env_create failed";
|
||||||
|
CHECK_EQ(mdb_env_set_mapsize(mdb_env, 1099511627776), MDB_SUCCESS) // 1TB
|
||||||
|
<< "mdb_env_set_mapsize failed";
|
||||||
|
CHECK_EQ(mdb_env_open(mdb_env, db_path, 0, 0664), MDB_SUCCESS)
|
||||||
|
<< "mdb_env_open failed";
|
||||||
|
CHECK_EQ(mdb_txn_begin(mdb_env, NULL, 0, &mdb_txn), MDB_SUCCESS)
|
||||||
|
<< "mdb_txn_begin failed";
|
||||||
|
CHECK_EQ(mdb_open(mdb_txn, NULL, 0, &mdb_dbi), MDB_SUCCESS)
|
||||||
|
<< "mdb_open failed. Does the lmdb already exist? ";
|
||||||
|
} else {
|
||||||
|
LOG(FATAL) << "Unknown db backend " << db_backend;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Storing to db
|
||||||
char label;
|
char label;
|
||||||
char* pixels = new char[rows * cols];
|
char* pixels = new char[rows * cols];
|
||||||
|
int count = 0;
|
||||||
const int kMaxKeyLength = 10;
|
const int kMaxKeyLength = 10;
|
||||||
char key[kMaxKeyLength];
|
char key_cstr[kMaxKeyLength];
|
||||||
std::string value;
|
string value;
|
||||||
|
|
||||||
caffe::Datum datum;
|
Datum datum;
|
||||||
datum.set_channels(1);
|
datum.set_channels(1);
|
||||||
datum.set_height(rows);
|
datum.set_height(rows);
|
||||||
datum.set_width(cols);
|
datum.set_width(cols);
|
||||||
LOG(INFO) << "A total of " << num_items << " items.";
|
LOG(INFO) << "A total of " << num_items << " items.";
|
||||||
LOG(INFO) << "Rows: " << rows << " Cols: " << cols;
|
LOG(INFO) << "Rows: " << rows << " Cols: " << cols;
|
||||||
for (int itemid = 0; itemid < num_items; ++itemid) {
|
for (int item_id = 0; item_id < num_items; ++item_id) {
|
||||||
image_file.read(pixels, rows * cols);
|
image_file.read(pixels, rows * cols);
|
||||||
label_file.read(&label, 1);
|
label_file.read(&label, 1);
|
||||||
datum.set_data(pixels, rows*cols);
|
datum.set_data(pixels, rows*cols);
|
||||||
datum.set_label(label);
|
datum.set_label(label);
|
||||||
|
snprintf(key_cstr, kMaxKeyLength, "%08d", item_id);
|
||||||
datum.SerializeToString(&value);
|
datum.SerializeToString(&value);
|
||||||
snprintf(key, kMaxKeyLength, "%08d", itemid);
|
string keystr(key_cstr);
|
||||||
db->Put(leveldb::WriteOptions(), std::string(key), value);
|
|
||||||
}
|
|
||||||
|
|
||||||
delete db;
|
// Put in db
|
||||||
|
if (db_backend == "leveldb") { // leveldb
|
||||||
|
batch->Put(keystr, value);
|
||||||
|
} else if (db_backend == "lmdb") { // lmdb
|
||||||
|
mdb_data.mv_size = value.size();
|
||||||
|
mdb_data.mv_data = reinterpret_cast<void*>(&value[0]);
|
||||||
|
mdb_key.mv_size = keystr.size();
|
||||||
|
mdb_key.mv_data = reinterpret_cast<void*>(&keystr[0]);
|
||||||
|
CHECK_EQ(mdb_put(mdb_txn, mdb_dbi, &mdb_key, &mdb_data, 0), MDB_SUCCESS)
|
||||||
|
<< "mdb_put failed";
|
||||||
|
} else {
|
||||||
|
LOG(FATAL) << "Unknown db backend " << db_backend;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (++count % 1000 == 0) {
|
||||||
|
// Commit txn
|
||||||
|
if (db_backend == "leveldb") { // leveldb
|
||||||
|
db->Write(leveldb::WriteOptions(), batch);
|
||||||
|
delete batch;
|
||||||
|
batch = new leveldb::WriteBatch();
|
||||||
|
} else if (db_backend == "lmdb") { // lmdb
|
||||||
|
CHECK_EQ(mdb_txn_commit(mdb_txn), MDB_SUCCESS)
|
||||||
|
<< "mdb_txn_commit failed";
|
||||||
|
CHECK_EQ(mdb_txn_begin(mdb_env, NULL, 0, &mdb_txn), MDB_SUCCESS)
|
||||||
|
<< "mdb_txn_begin failed";
|
||||||
|
} else {
|
||||||
|
LOG(FATAL) << "Unknown db backend " << db_backend;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// write the last batch
|
||||||
|
if (count % 1000 != 0) {
|
||||||
|
if (db_backend == "leveldb") { // leveldb
|
||||||
|
db->Write(leveldb::WriteOptions(), batch);
|
||||||
|
delete batch;
|
||||||
|
delete db;
|
||||||
|
} else if (db_backend == "lmdb") { // lmdb
|
||||||
|
CHECK_EQ(mdb_txn_commit(mdb_txn), MDB_SUCCESS) << "mdb_txn_commit failed";
|
||||||
|
mdb_close(mdb_env, mdb_dbi);
|
||||||
|
mdb_env_close(mdb_env);
|
||||||
|
} else {
|
||||||
|
LOG(FATAL) << "Unknown db backend " << db_backend;
|
||||||
|
}
|
||||||
|
LOG(ERROR) << "Processed " << count << " files.";
|
||||||
|
}
|
||||||
delete pixels;
|
delete pixels;
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
#ifndef GFLAGS_GFLAGS_H_
|
||||||
|
namespace gflags = google;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
gflags::SetUsageMessage("This script converts the MNIST dataset to\n"
|
||||||
|
"the leveldb/lmdb format used by Caffe to perform classification.\n"
|
||||||
|
"Usage:\n"
|
||||||
|
" convert_mnist_data [FLAGS] input_image_file input_label_file "
|
||||||
|
"output_db_file\n"
|
||||||
|
"The MNIST dataset could be downloaded at\n"
|
||||||
|
" http://yann.lecun.com/exdb/mnist/\n"
|
||||||
|
"You should gunzip them after downloading,"
|
||||||
|
"or directly use data/mnist/get_mnist.sh\n");
|
||||||
|
gflags::ParseCommandLineFlags(&argc, &argv, true);
|
||||||
|
|
||||||
|
const string& db_backend = FLAGS_backend;
|
||||||
|
|
||||||
if (argc != 4) {
|
if (argc != 4) {
|
||||||
printf("This script converts the MNIST dataset to the leveldb format used\n"
|
gflags::ShowUsageWithFlagsRestrict(argv[0],
|
||||||
"by caffe to perform classification.\n"
|
"examples/mnist/convert_mnist_data");
|
||||||
"Usage:\n"
|
|
||||||
" convert_mnist_data input_image_file input_label_file "
|
|
||||||
"output_db_file\n"
|
|
||||||
"The MNIST dataset could be downloaded at\n"
|
|
||||||
" http://yann.lecun.com/exdb/mnist/\n"
|
|
||||||
"You should gunzip them after downloading.\n");
|
|
||||||
} else {
|
} else {
|
||||||
google::InitGoogleLogging(argv[0]);
|
google::InitGoogleLogging(argv[0]);
|
||||||
convert_dataset(argv[1], argv[2], argv[3]);
|
convert_dataset(argv[1], argv[2], argv[3], db_backend);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,18 +1,21 @@
|
||||||
#!/usr/bin/env sh
|
#!/usr/bin/env sh
|
||||||
# This script converts the mnist data into leveldb format.
|
# This script converts the mnist data into leveldb/lmdb format,
|
||||||
|
# depending on the value assigned to $BACKEND.
|
||||||
|
|
||||||
EXAMPLE=examples/mnist
|
EXAMPLE=examples/mnist
|
||||||
DATA=data/mnist
|
DATA=data/mnist
|
||||||
BUILD=build/examples/mnist
|
BUILD=build/examples/mnist
|
||||||
|
|
||||||
echo "Creating leveldb..."
|
BACKEND="lmdb"
|
||||||
|
|
||||||
rm -rf mnist_train_leveldb
|
echo "Creating ${BACKEND}..."
|
||||||
rm -rf mnist_test_leveldb
|
|
||||||
|
rm -rf $EXAMPLE/mnist_train_${BACKEND}
|
||||||
|
rm -rf $EXAMPLE/mnist_test_${BACKEND}
|
||||||
|
|
||||||
$BUILD/convert_mnist_data.bin $DATA/train-images-idx3-ubyte \
|
$BUILD/convert_mnist_data.bin $DATA/train-images-idx3-ubyte \
|
||||||
$DATA/train-labels-idx1-ubyte $EXAMPLE/mnist_train_leveldb
|
$DATA/train-labels-idx1-ubyte $EXAMPLE/mnist_train_${BACKEND} --backend=${BACKEND}
|
||||||
$BUILD/convert_mnist_data.bin $DATA/t10k-images-idx3-ubyte \
|
$BUILD/convert_mnist_data.bin $DATA/t10k-images-idx3-ubyte \
|
||||||
$DATA/t10k-labels-idx1-ubyte $EXAMPLE/mnist_test_leveldb
|
$DATA/t10k-labels-idx1-ubyte $EXAMPLE/mnist_test_${BACKEND} --backend=${BACKEND}
|
||||||
|
|
||||||
echo "Done."
|
echo "Done."
|
||||||
|
|
|
@ -50,7 +50,8 @@ net_param {
|
||||||
top: "data"
|
top: "data"
|
||||||
top: "label"
|
top: "label"
|
||||||
data_param {
|
data_param {
|
||||||
source: "examples/mnist/mnist_train_leveldb"
|
source: "examples/mnist/mnist_train_lmdb"
|
||||||
|
backend: LMDB
|
||||||
batch_size: 64
|
batch_size: 64
|
||||||
transform_param {
|
transform_param {
|
||||||
scale: 0.00390625
|
scale: 0.00390625
|
||||||
|
@ -64,7 +65,8 @@ net_param {
|
||||||
top: "data"
|
top: "data"
|
||||||
top: "label"
|
top: "label"
|
||||||
data_param {
|
data_param {
|
||||||
source: "examples/mnist/mnist_test_leveldb"
|
source: "examples/mnist/mnist_test_lmdb"
|
||||||
|
backend: LMDB
|
||||||
batch_size: 100
|
batch_size: 100
|
||||||
transform_param {
|
transform_param {
|
||||||
scale: 0.00390625
|
scale: 0.00390625
|
||||||
|
@ -81,7 +83,8 @@ net_param {
|
||||||
top: "data"
|
top: "data"
|
||||||
top: "label"
|
top: "label"
|
||||||
data_param {
|
data_param {
|
||||||
source: "examples/mnist/mnist_train_leveldb"
|
source: "examples/mnist/mnist_train_lmdb"
|
||||||
|
backend: LMDB
|
||||||
batch_size: 100
|
batch_size: 100
|
||||||
transform_param {
|
transform_param {
|
||||||
scale: 0.00390625
|
scale: 0.00390625
|
||||||
|
|
|
@ -5,7 +5,8 @@ layers {
|
||||||
top: "data"
|
top: "data"
|
||||||
top: "label"
|
top: "label"
|
||||||
data_param {
|
data_param {
|
||||||
source: "examples/mnist/mnist_train_leveldb"
|
source: "examples/mnist/mnist_train_lmdb"
|
||||||
|
backend: LMDB
|
||||||
batch_size: 64
|
batch_size: 64
|
||||||
transform_param {
|
transform_param {
|
||||||
scale: 0.00390625
|
scale: 0.00390625
|
||||||
|
@ -19,7 +20,8 @@ layers {
|
||||||
top: "data"
|
top: "data"
|
||||||
top: "label"
|
top: "label"
|
||||||
data_param {
|
data_param {
|
||||||
source: "examples/mnist/mnist_test_leveldb"
|
source: "examples/mnist/mnist_test_lmdb"
|
||||||
|
backend: LMDB
|
||||||
batch_size: 100
|
batch_size: 100
|
||||||
transform_param {
|
transform_param {
|
||||||
scale: 0.00390625
|
scale: 0.00390625
|
||||||
|
|
|
@ -102,7 +102,8 @@ int main(int argc, char** argv) {
|
||||||
LOG(INFO) << "Opening leveldb " << db_path;
|
LOG(INFO) << "Opening leveldb " << db_path;
|
||||||
leveldb::Status status = leveldb::DB::Open(
|
leveldb::Status status = leveldb::DB::Open(
|
||||||
options, db_path, &db);
|
options, db_path, &db);
|
||||||
CHECK(status.ok()) << "Failed to open leveldb " << db_path;
|
CHECK(status.ok()) << "Failed to open leveldb " << db_path
|
||||||
|
<< ". Is it already existing?";
|
||||||
batch = new leveldb::WriteBatch();
|
batch = new leveldb::WriteBatch();
|
||||||
} else if (db_backend == "lmdb") { // lmdb
|
} else if (db_backend == "lmdb") { // lmdb
|
||||||
LOG(INFO) << "Opening lmdb " << db_path;
|
LOG(INFO) << "Opening lmdb " << db_path;
|
||||||
|
@ -116,7 +117,7 @@ int main(int argc, char** argv) {
|
||||||
CHECK_EQ(mdb_txn_begin(mdb_env, NULL, 0, &mdb_txn), MDB_SUCCESS)
|
CHECK_EQ(mdb_txn_begin(mdb_env, NULL, 0, &mdb_txn), MDB_SUCCESS)
|
||||||
<< "mdb_txn_begin failed";
|
<< "mdb_txn_begin failed";
|
||||||
CHECK_EQ(mdb_open(mdb_txn, NULL, 0, &mdb_dbi), MDB_SUCCESS)
|
CHECK_EQ(mdb_open(mdb_txn, NULL, 0, &mdb_dbi), MDB_SUCCESS)
|
||||||
<< "mdb_open failed";
|
<< "mdb_open failed. Does the lmdb already exist? ";
|
||||||
} else {
|
} else {
|
||||||
LOG(FATAL) << "Unknown db backend " << db_backend;
|
LOG(FATAL) << "Unknown db backend " << db_backend;
|
||||||
}
|
}
|
||||||
|
|
Загрузка…
Ссылка в новой задаче