From 583b84ef65871737a1fec8d69d7acc6ccfa860c7 Mon Sep 17 00:00:00 2001 From: Sergey Karayev Date: Fri, 11 Jul 2014 13:27:38 -0700 Subject: [PATCH 1/5] [docs] cosmetic --- docs/_layouts/default.html | 29 ++++++---------- docs/index.md | 2 ++ docs/javascripts/scale.fix.js | 20 ----------- docs/stylesheets/styles.css | 65 +++++------------------------------ 4 files changed, 21 insertions(+), 95 deletions(-) delete mode 100644 docs/javascripts/scale.fix.js diff --git a/docs/_layouts/default.html b/docs/_layouts/default.html index f5d90004..756e8eba 100644 --- a/docs/_layouts/default.html +++ b/docs/_layouts/default.html @@ -7,10 +7,10 @@ Caffe {% if page contains 'title' %}| {{ page.title }}{% endif %} - - - - + + + + - +

Caffe

+

+ Deep learning framework developed by Yangqing Jia / BVLC +

-

Maintained by
BVLC

-

Created by
Yangqing Jia

-
{{ content }}
- - diff --git a/docs/index.md b/docs/index.md index 5d5b35ff..64ffe8b2 100644 --- a/docs/index.md +++ b/docs/index.md @@ -7,6 +7,8 @@ Caffe is a deep learning framework developed with cleanliness, readability, and It was created by [Yangqing Jia](http://daggerfs.com), and is in active development by the Berkeley Vision and Learning Center ([BVLC](http://bvlc.eecs.berkeley.edu)) and by community contributors. Caffe is released under the [BSD 2-Clause license](https://github.com/BVLC/caffe/blob/master/LICENSE). +Check out our web image classification [demo](http://demo.caffe.berkeleyvision.org)! + ## Why **Clean architecture** enables rapid deployment. diff --git a/docs/javascripts/scale.fix.js b/docs/javascripts/scale.fix.js deleted file mode 100644 index 08716c00..00000000 --- a/docs/javascripts/scale.fix.js +++ /dev/null @@ -1,20 +0,0 @@ -fixScale = function(doc) { - - var addEvent = 'addEventListener', - type = 'gesturestart', - qsa = 'querySelectorAll', - scales = [1, 1], - meta = qsa in doc ? doc[qsa]('meta[name=viewport]') : []; - - function fix() { - meta.content = 'width=device-width,minimum-scale=' + scales[0] + ',maximum-scale=' + scales[1]; - doc.removeEventListener(type, fix, true); - } - - if ((meta = meta[meta.length - 1]) && addEvent in doc) { - fix(); - scales = [.25, 1.6]; - doc[addEvent](type, fix, true); - } - -}; \ No newline at end of file diff --git a/docs/stylesheets/styles.css b/docs/stylesheets/styles.css index 5862c6c2..2dbedb8a 100644 --- a/docs/stylesheets/styles.css +++ b/docs/stylesheets/styles.css @@ -42,7 +42,7 @@ h3 { } h4, h5, h6 { - font-family: Times, serif; + font-family: 'PT Serif', serif; font-weight: 700; } @@ -68,12 +68,11 @@ strong { } ul { - list-style: inside; padding-left: 25px; } ol { - list-style: decimal inside; + list-style: decimal; padding-left: 20px; } @@ -129,7 +128,6 @@ p img { } /* Code blocks */ - code, pre { font-family: monospace; color:#000; @@ -149,7 +147,6 @@ pre { /* Tables */ - table { width:100%; } @@ -161,7 +158,7 @@ table { } th { - font-family: 'Arvo', Helvetica, Arial, sans-serif; + font-family: 'Open Sans', sans-serif; font-size: 18px; font-weight: normal; padding: 10px; @@ -184,21 +181,11 @@ td { /* Header */ header { - background-color: #171717; - color: #FDFDFB; width:170px; float:left; position:fixed; - border: 1px solid #000; - -webkit-border-top-right-radius: 4px; - -webkit-border-bottom-right-radius: 4px; - -moz-border-radius-topright: 4px; - -moz-border-radius-bottomright: 4px; - border-top-right-radius: 4px; - border-bottom-right-radius: 4px; padding: 12px 25px 22px 50px; margin: 24px 25px 0 0; - -webkit-font-smoothing: antialiased; } p.header { @@ -206,23 +193,12 @@ p.header { } h1.header { - /*font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;*/ font-size: 30px; font-weight: 300; line-height: 1.3em; - border-bottom: none; margin-top: 0; } - -h1.header, a.header, a.name, header a{ - color: #fff; -} - -a.header { - text-decoration: underline; -} - a.name { white-space: nowrap; } @@ -239,38 +215,19 @@ header li { margin-bottom: 12px; line-height: 1em; padding: 6px 6px 6px 7px; - - background: #AF0011; - background: -moz-linear-gradient(top, #AF0011 0%, #820011 100%); - background: -webkit-gradient(linear, left top, left bottom, color-stop(0%,#f8f8f8), color-stop(100%,#dddddd)); - background: -webkit-linear-gradient(top, #AF0011 0%,#820011 100%); - background: -o-linear-gradient(top, #AF0011 0%,#820011 100%); - background: -ms-linear-gradient(top, #AF0011 0%,#820011 100%); - background: linear-gradient(top, #AF0011 0%,#820011 100%); - + background: #c30000; border-radius:4px; - border:1px solid #0D0D0D; - - -webkit-box-shadow: inset 0px 1px 1px 0 rgba(233,2,38, 1); - box-shadow: inset 0px 1px 1px 0 rgba(233,2,38, 1); - + border:1px solid #555; } header li:hover { - background: #C3001D; - background: -moz-linear-gradient(top, #C3001D 0%, #950119 100%); - background: -webkit-gradient(linear, left top, left bottom, color-stop(0%,#f8f8f8), color-stop(100%,#dddddd)); - background: -webkit-linear-gradient(top, #C3001D 0%,#950119 100%); - background: -o-linear-gradient(top, #C3001D 0%,#950119 100%); - background: -ms-linear-gradient(top, #C3001D 0%,#950119 100%); - background: linear-gradient(top, #C3001D 0%,#950119 100%); + background: #dd0000; } a.buttons { - -webkit-font-smoothing: antialiased; - background: url(../images/arrow-down.png) no-repeat; + color: #fff; + text-decoration: none; font-weight: normal; - text-shadow: rgba(0, 0, 0, 0.4) 0 -1px 0; padding: 2px 2px 2px 22px; height: 30px; } @@ -280,12 +237,6 @@ a.github { background-size: 15%; } -a.buttons:hover { - color: #fff; - text-decoration: none; -} - - /* Section - for main page content */ section { From 0de282ce21e4495dd1d0f514b01dddb3d761e10f Mon Sep 17 00:00:00 2001 From: Sergey Karayev Date: Fri, 11 Jul 2014 13:28:55 -0700 Subject: [PATCH 2/5] =?UTF-8?q?[docs]=20moved=20example=20md=E2=80=99s=20t?= =?UTF-8?q?o=20examples/**/md=E2=80=99s=20and=20added=20script=20to=20gath?= =?UTF-8?q?er=20them=20for=20publication?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 + docs/README.md | 6 +- docs/index.md | 47 ++-- docs/mnist.md | 91 ------ docs/mnist_prototxt.md | 153 ---------- docs/mnist_solver_prototxt.md | 37 --- docs/cifar10.md => examples/cifar10/readme.md | 5 +- examples/detection.ipynb | 6 +- .../feature_extraction/readme.md | 7 +- examples/filter_visualization.ipynb | 6 +- .../imagenet/readme.md | 5 +- examples/imagenet_classification.ipynb | 6 +- examples/mnist/readme.md | 266 ++++++++++++++++++ scripts/build_docs.sh | 12 +- scripts/copy_notebook.py | 32 +++ scripts/deploy_docs.sh | 4 +- scripts/gather_examples.sh | 28 ++ 17 files changed, 394 insertions(+), 318 deletions(-) delete mode 100644 docs/mnist.md delete mode 100644 docs/mnist_prototxt.md delete mode 100644 docs/mnist_solver_prototxt.md rename docs/cifar10.md => examples/cifar10/readme.md (97%) rename docs/feature_extraction.md => examples/feature_extraction/readme.md (93%) rename docs/imagenet_training.md => examples/imagenet/readme.md (98%) create mode 100644 examples/mnist/readme.md create mode 100755 scripts/copy_notebook.py create mode 100755 scripts/gather_examples.sh diff --git a/.gitignore b/.gitignore index fee9a6ba..f1f2b400 100644 --- a/.gitignore +++ b/.gitignore @@ -53,6 +53,7 @@ examples/* # Generated documentation docs/_site +docs/gathered _site # Sublime Text settings diff --git a/docs/README.md b/docs/README.md index 81e1566b..8f1781e3 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,3 +1,5 @@ -To generate stuff you can paste in an .md page from an IPython notebook, run +# Caffe Documentation - ipython nbconvert --to markdown +To generate the documentation, run `$CAFFE_ROOT/scripts/build_docs.sh`. + +To push your changes to the documentation to the gh-pages branch of your or the BVLC repo, run `$CAFFE_ROOT/scripts/deploy_docs.sh `. diff --git a/docs/index.md b/docs/index.md index 64ffe8b2..6ffab7a0 100644 --- a/docs/index.md +++ b/docs/index.md @@ -9,14 +9,14 @@ Caffe is released under the [BSD 2-Clause license](https://github.com/BVLC/caffe Check out our web image classification [demo](http://demo.caffe.berkeleyvision.org)! -## Why +## Why use Caffe? **Clean architecture** enables rapid deployment. Networks are specified in simple config files, with no hard-coded parameters in the code. -Switching between CPU and GPU code is as simple as setting a flag -- so models can be trained on a GPU machine, and then used on commodity clusters. +Switching between CPU and GPU is as simple as setting a flag -- so models can be trained on a GPU machine, and then used on commodity clusters. **Readable & modifiable implementation** fosters active development. -In Caffe's first six months, it has been forked by over 300 developers on Github, and many have contributed significant changes. +In Caffe's first six months, it has been forked by over 300 developers on Github, and many have pushed significant changes. **Speed** makes Caffe perfect for industry use. Caffe can process over **40M images per day** with a single NVIDIA K40 or Titan GPU\*. @@ -31,27 +31,34 @@ There is an active discussion and support community on [Github](https://github.c Consult performance [details](/performance_hardware.html).

-## How +## Documentation -* [Introductory slides](http://dl.caffe.berkeleyvision.org/caffe-presentation.pdf): slides about the Caffe architecture, *updated 03/14*. -* [ACM MM paper](http://ucb-icsi-vision-group.github.io/caffe-paper/caffe.pdf): a 4-page report for the ACM Multimedia Open Source competition. -* [Installation instructions](/installation.html): tested on Ubuntu, Red Hat, OS X. -* [Pre-trained models](/getting_pretrained_models.html): BVLC provides ready-to-use models for non-commercial use. -* [Development](/development.html): Guidelines for development and contributing to Caffe. +- [Introductory slides](http://dl.caffe.berkeleyvision.org/caffe-presentation.pdf)
+Slides about the Caffe architecture, *updated 03/14*. +- [ACM MM paper](http://ucb-icsi-vision-group.github.io/caffe-paper/caffe.pdf)
+A 4-page report for the ACM Multimedia Open Source competition. +- [Installation instructions](/installation.html)
+Tested on Ubuntu, Red Hat, OS X. +* [Pre-trained models](/getting_pretrained_models.html)
+BVLC provides ready-to-use models for non-commercial use. +* [Development](/development.html)
+Guidelines for development and contributing to Caffe. -### Tutorials and Examples +### Examples -* [Image Classification \[notebook\]][imagenet_classification]: classify images with the pretrained ImageNet model by the Python interface. -* [Detection \[notebook\]][detection]: run a pretrained model as a detector in Python. -* [Visualizing Features and Filters \[notebook\]][visualizing_filters]: extracting features and visualizing trained filters with an example image, viewed layer-by-layer. -* [LeNet / MNIST Demo](/mnist.html): end-to-end training and testing of LeNet on MNIST. -* [CIFAR-10 Demo](/cifar10.html): training and testing on the CIFAR-10 data. -* [Training ImageNet](/imagenet_training.html): recipe for end-to-end training of an ImageNet classifier. -* [Feature extraction with C++](/feature_extraction.html): feature extraction using pre-trained model. +{% for page in site.pages %} +{% if page.category == 'example' %} +-
{{page.title}}
{{page.description}}
+{% endif %} +{% endfor %} -[imagenet_classification]: http://nbviewer.ipython.org/github/BVLC/caffe/blob/master/examples/imagenet_classification.ipynb -[detection]: http://nbviewer.ipython.org/github/BVLC/caffe/blob/master/examples/detection.ipynb -[visualizing_filters]: http://nbviewer.ipython.org/github/BVLC/caffe/blob/master/examples/filter_visualization.ipynb +### Notebook examples + +{% for page in site.pages %} +{% if page.category == 'notebook' %} +-
{{page.title}}
{{page.description}}
+{% endif %} +{% endfor %} ## Citing Caffe diff --git a/docs/mnist.md b/docs/mnist.md deleted file mode 100644 index 9a9b46a4..00000000 --- a/docs/mnist.md +++ /dev/null @@ -1,91 +0,0 @@ ---- -layout: default -title: Caffe ---- - -Training MNIST with Caffe -================ - -We will assume that you have caffe successfully compiled. If not, please refer to the [Installation page](installation.html). In this tutorial, we will assume that your caffe installation is located at `CAFFE_ROOT`. - -Prepare Datasets ----------------- - -You will first need to download and convert the data format from the MNIST website. To do this, simply run the following commands: - - cd $CAFFE_ROOT/data/mnist - ./get_mnist.sh - cd $CAFFE_ROOT/examples/mnist - ./create_mnist.sh - -If it complains that `wget` or `gunzip` are not installed, you need to install them respectively. After running the script there should be two datasets, `mnist-train-leveldb`, and `mnist-test-leveldb`. - -LeNet: the MNIST Classification Model -------------------------------------- -Before we actually run the training program, let's explain what will happen. We will use the [LeNet](http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf) network, which is known to work well on digit classification tasks. We will use a slightly different version from the original LeNet implementation, replacing the sigmoid activations with Rectified Linear Unit (ReLU) activations for the neurons. - -The design of LeNet contains the essence of CNNs that are still used in larger models such as the ones in ImageNet. In general, it consists of a convolutional layer followed by a pooling layer, another convolution layer followed by a pooling layer, and then two fully connected layers similar to the conventional multilayer perceptrons. We have defined the layers in `CAFFE_ROOT/data/lenet.prototxt`. - -If you would like to read about step-by-step instruction on how the protobuf definitions are written, see [MNIST: Define the Network](mnist_prototxt.html) and [MNIST: Define the Solver](mnist_solver_prototxt.html)?. - -Training and Testing the Model ------------------------------- - -Training the model is simple after you have written the network definition protobuf and solver protobuf files. Simply run `train_mnist.sh`, or the following command directly: - - cd $CAFFE_ROOT/examples/mnist - ./train_lenet.sh - -`train_lenet.sh` is a simple script, but here are a few explanations: `GLOG_logtostderr=1` is the google logging flag that prints all the logging messages directly to stderr. The main tool for training is `train_net.bin`, with the solver protobuf text file as its argument. - -When you run the code, you will see a lot of messages flying by like this: - - I1203 net.cpp:66] Creating Layer conv1 - I1203 net.cpp:76] conv1 <- data - I1203 net.cpp:101] conv1 -> conv1 - I1203 net.cpp:116] Top shape: 20 24 24 - I1203 net.cpp:127] conv1 needs backward computation. - -These messages tell you the details about each layer, its connections and its output shape, which may be helpful in debugging. After the initialization, the training will start: - - I1203 net.cpp:142] Network initialization done. - I1203 solver.cpp:36] Solver scaffolding done. - I1203 solver.cpp:44] Solving LeNet - -Based on the solver setting, we will print the training loss function every 100 iterations, and test the network every 1000 iterations. You will see messages like this: - - I1203 solver.cpp:204] Iteration 100, lr = 0.00992565 - I1203 solver.cpp:66] Iteration 100, loss = 0.26044 - ... - I1203 solver.cpp:84] Testing net - I1203 solver.cpp:111] Test score #0: 0.9785 - I1203 solver.cpp:111] Test score #1: 0.0606671 - -For each training iteration, `lr` is the learning rate of that iteration, and `loss` is the training function. For the output of the testing phase, score 0 is the accuracy, and score 1 is the testing loss function. - -And after a few minutes, you are done! - - I1203 solver.cpp:84] Testing net - I1203 solver.cpp:111] Test score #0: 0.9897 - I1203 solver.cpp:111] Test score #1: 0.0324599 - I1203 solver.cpp:126] Snapshotting to lenet_iter_10000 - I1203 solver.cpp:133] Snapshotting solver state to lenet_iter_10000.solverstate - I1203 solver.cpp:78] Optimization Done. - -The final model, stored as a binary protobuf file, is stored at - - lenet_iter_10000 - -which you can deploy as a trained model in your application, if you are training on a real-world application dataset. - -Um... How about GPU training? ------------------------------ - -You just did! All the training was carried out on the GPU. In fact, if you would like to do training on CPU, you can simply change one line in `lenet_solver.prototxt`: - - # solver mode: CPU or GPU - solver_mode: CPU - -and you will be using CPU for training. Isn't that easy? - -MNIST is a small dataset, so training with GPU does not really introduce too much benefit due to communication overheads. On larger datasets with more complex models, such as ImageNet, the computation speed difference will be more significant. diff --git a/docs/mnist_prototxt.md b/docs/mnist_prototxt.md deleted file mode 100644 index aaff2b00..00000000 --- a/docs/mnist_prototxt.md +++ /dev/null @@ -1,153 +0,0 @@ ---- -layout: default -title: Caffe ---- - -Define the MNIST Network -========================= - -This page explains the prototxt file `lenet_train.prototxt` used in the MNIST demo. We assume that you are familiar with [Google Protobuf](https://developers.google.com/protocol-buffers/docs/overview), and assume that you have read the protobuf definitions used by Caffe, which can be found at [src/caffe/proto/caffe.proto](https://github.com/Yangqing/caffe/blob/master/src/caffe/proto/caffe.proto). - -Specifically, we will write a `caffe::NetParameter` (or in python, `caffe.proto.caffe_pb2.NetParameter`) protubuf. We will start by giving the network a name: - - name: "LeNet" - -Writing the Data Layer ----------------------- -Currently, we will read the MNIST data from the leveldb we created earlier in the demo. This is defined by a data layer: - - layers { - name: "mnist" - type: DATA - data_param { - source: "mnist-train-leveldb" - batch_size: 64 - scale: 0.00390625 - } - top: "data" - top: "label" - } - -Specifically, this layer has name `mnist`, type `data`, and it reads the data from the given leveldb source. We will use a batch size of 64, and scale the incoming pixels so that they are in the range \[0,1\). Why 0.00390625? It is 1 divided by 256. And finally, this layer produces two blobs, one is the `data` blob, and one is the `label` blob. - -Writing the Convolution Layer --------------------------------------------- -Let's define the first convolution layer: - - layers { - name: "conv1" - type: CONVOLUTION - blobs_lr: 1. - blobs_lr: 2. - convolution_param { - num_output: 20 - kernelsize: 5 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - } - } - bottom: "data" - top: "conv1" - } - -This layer takes the `data` blob (it is provided by the data layer), and produces the `conv1` layer. It produces outputs of 20 channels, with the convolutional kernel size 5 and carried out with stride 1. - -The fillers allow us to randomly initialize the value of the weights and bias. For the weight filler, we will use the `xavier` algorithm that automatically determines the scale of initialization based on the number of input and output neurons. For the bias filler, we will simply initialize it as constant, with the default filling value 0. - -`blobs_lr` are the learning rate adjustments for the layer's learnable parameters. In this case, we will set the weight learning rate to be the same as the learning rate given by the solver during runtime, and the bias learning rate to be twice as large as that - this usually leads to better convergence rates. - -Writing the Pooling Layer -------------------------- -Phew. Pooling layers are actually much easier to define: - - layers { - name: "pool1" - type: POOLING - pooling_param { - kernel_size: 2 - stride: 2 - pool: MAX - } - bottom: "conv1" - top: "pool1" - } - -This says we will perform max pooling with a pool kernel size 2 and a stride of 2 (so no overlapping between neighboring pooling regions). - -Similarly, you can write up the second convolution and pooling layers. Check `data/lenet.prototxt` for details. - -Writing the Fully Connected Layer ----------------------------------- -Writing a fully connected layer is also simple: - - layers { - name: "ip1" - type: INNER_PRODUCT - blobs_lr: 1. - blobs_lr: 2. - inner_product_param { - num_output: 500 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - } - } - bottom: "pool2" - top: "ip1" - } - -This defines a fully connected layer (for some legacy reason, Caffe calls it an `innerproduct` layer) with 500 outputs. All other lines look familiar, right? - -Writing the ReLU Layer ----------------------- -A ReLU Layer is also simple: - - layers { - name: "relu1" - type: RELU - bottom: "ip1" - top: "ip1" - } - -Since ReLU is an element-wise operation, we can do *in-place* operations to save some memory. This is achieved by simply giving the same name to the bottom and top blobs. Of course, do NOT use duplicated blob names for other layer types! - -After the ReLU layer, we will write another innerproduct layer: - - layers { - name: "ip2" - type: INNER_PRODUCT - blobs_lr: 1. - blobs_lr: 2. - inner_product_param { - num_output: 10 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - } - } - bottom: "ip1" - top: "ip2" - } - -Writing the Loss Layer -------------------------- -Finally, we will write the loss! - - layers { - name: "loss" - type: SOFTMAX_LOSS - bottom: "ip2" - bottom: "label" - } - -The `softmax_loss` layer implements both the softmax and the multinomial logistic loss (that saves time and improves numerical stability). It takes two blobs, the first one being the prediction and the second one being the `label` provided by the data layer (remember it?). It does not produce any outputs - all it does is to compute the loss function value, report it when backpropagation starts, and initiates the gradient with respect to `ip2`. This is where all magic starts. - -Now that we have demonstrated how to write the MNIST layer definition prototxt, maybe check out [how we write a solver prototxt](mnist_solver_prototxt.html)? diff --git a/docs/mnist_solver_prototxt.md b/docs/mnist_solver_prototxt.md deleted file mode 100644 index aa3578f1..00000000 --- a/docs/mnist_solver_prototxt.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -layout: default -title: Caffe ---- - -Define the MNIST Solver -======================= - -The page is under construction. For now, check out the comments in the solver prototxt file, which explains each line in the prototxt: - - # The training protocol buffer definition - train_net: "lenet_train.prototxt" - # The testing protocol buffer definition - test_net: "lenet_test.prototxt" - # test_iter specifies how many forward passes the test should carry out. - # In the case of MNIST, we have test batch size 100 and 100 test iterations, - # covering the full 10,000 testing images. - test_iter: 100 - # Carry out testing every 500 training iterations. - test_interval: 500 - # The base learning rate, momentum and the weight decay of the network. - base_lr: 0.01 - momentum: 0.9 - weight_decay: 0.0005 - # The learning rate policy - lr_policy: "inv" - gamma: 0.0001 - power: 0.75 - # Display every 100 iterations - display: 100 - # The maximum number of iterations - max_iter: 10000 - # snapshot intermediate results - snapshot: 5000 - snapshot_prefix: "lenet" - # solver mode: 0 for CPU and 1 for GPU - solver_mode: 1 diff --git a/docs/cifar10.md b/examples/cifar10/readme.md similarity index 97% rename from docs/cifar10.md rename to examples/cifar10/readme.md index dd85667d..9d5bd7b2 100644 --- a/docs/cifar10.md +++ b/examples/cifar10/readme.md @@ -1,6 +1,9 @@ --- +title: CIFAR-10 tutorial +category: example +description: Train and test Caffe on CIFAR-10 data. +include_in_docs: true layout: default -title: Caffe --- Alex's CIFAR-10 tutorial, Caffe style diff --git a/examples/detection.ipynb b/examples/detection.ipynb index feb3e36f..5ec986f6 100644 --- a/examples/detection.ipynb +++ b/examples/detection.ipynb @@ -1,6 +1,8 @@ { "metadata": { - "name": "" + "name": "ImageNet detection", + "description": "Run a pretrained model as a detector in Python.", + "include_in_docs": true }, "nbformat": 3, "nbformat_minor": 0, @@ -652,4 +654,4 @@ "metadata": {} } ] -} \ No newline at end of file +} diff --git a/docs/feature_extraction.md b/examples/feature_extraction/readme.md similarity index 93% rename from docs/feature_extraction.md rename to examples/feature_extraction/readme.md index fa23e9c8..c336e718 100644 --- a/docs/feature_extraction.md +++ b/examples/feature_extraction/readme.md @@ -1,6 +1,9 @@ --- +title: Feature extraction with Caffe C++ code. +description: Extract AlexNet features using the Caffe binary. +category: example +include_in_docs: true layout: default -title: Caffe --- Extracting Features @@ -57,7 +60,7 @@ The last parameter above is the number of data mini-batches. The features are stored to LevelDB `examples/_temp/features`, ready for access by some other code. -If you meet with the error "Check failed: status.ok() Failed to open leveldb examples/_temp/features", it is because the directory examples/_temp/features has been created the last time you run the command. Remove it and run again. +If you meet with the error "Check failed: status.ok() Failed to open leveldb examples/_temp/features", it is because the directory examples/_temp/features has been created the last time you run the command. Remove it and run again. rm -rf examples/_temp/features/ diff --git a/examples/filter_visualization.ipynb b/examples/filter_visualization.ipynb index 6f494821..22d7fc21 100644 --- a/examples/filter_visualization.ipynb +++ b/examples/filter_visualization.ipynb @@ -1,6 +1,8 @@ { "metadata": { - "name": "" + "name": "Filter visualization", + "description": "Extracting features and visualizing trained filters with an example image, viewed layer-by-layer.", + "include_in_docs": true }, "nbformat": 3, "nbformat_minor": 0, @@ -611,4 +613,4 @@ "metadata": {} } ] -} \ No newline at end of file +} diff --git a/docs/imagenet_training.md b/examples/imagenet/readme.md similarity index 98% rename from docs/imagenet_training.md rename to examples/imagenet/readme.md index 9e0076cf..55348727 100644 --- a/docs/imagenet_training.md +++ b/examples/imagenet/readme.md @@ -1,6 +1,9 @@ --- +title: ImageNet tutorial +description: Train and test "AlexNet" on ImageNet challenge data. +category: example +include_in_docs: true layout: default -title: Caffe --- Yangqing's Recipe on Brewing ImageNet diff --git a/examples/imagenet_classification.ipynb b/examples/imagenet_classification.ipynb index 0e0e06bb..7ac140d9 100644 --- a/examples/imagenet_classification.ipynb +++ b/examples/imagenet_classification.ipynb @@ -1,6 +1,8 @@ { "metadata": { - "name": "" + "description": "Use the pre-trained ImageNet model to classify images with the Python interface.", + "name": "ImageNet Classification", + "include_in_docs": true }, "nbformat": 3, "nbformat_minor": 0, @@ -407,4 +409,4 @@ "metadata": {} } ] -} \ No newline at end of file +} diff --git a/examples/mnist/readme.md b/examples/mnist/readme.md new file mode 100644 index 00000000..d609cfff --- /dev/null +++ b/examples/mnist/readme.md @@ -0,0 +1,266 @@ +--- +title: MNIST Tutorial +description: Train and test "LeNet" on MNIST data. +category: example +include_in_docs: true +layout: default +--- + +# Training MNIST with Caffe + +We will assume that you have caffe successfully compiled. If not, please refer to the [Installation page](installation.html). In this tutorial, we will assume that your caffe installation is located at `CAFFE_ROOT`. + +## Prepare Datasets + +You will first need to download and convert the data format from the MNIST website. To do this, simply run the following commands: + + cd $CAFFE_ROOT/data/mnist + ./get_mnist.sh + cd $CAFFE_ROOT/examples/mnist + ./create_mnist.sh + +If it complains that `wget` or `gunzip` are not installed, you need to install them respectively. After running the script there should be two datasets, `mnist-train-leveldb`, and `mnist-test-leveldb`. + +## LeNet: the MNIST Classification Model + +Before we actually run the training program, let's explain what will happen. We will use the [LeNet](http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf) network, which is known to work well on digit classification tasks. We will use a slightly different version from the original LeNet implementation, replacing the sigmoid activations with Rectified Linear Unit (ReLU) activations for the neurons. + +The design of LeNet contains the essence of CNNs that are still used in larger models such as the ones in ImageNet. In general, it consists of a convolutional layer followed by a pooling layer, another convolution layer followed by a pooling layer, and then two fully connected layers similar to the conventional multilayer perceptrons. We have defined the layers in `CAFFE_ROOT/data/lenet.prototxt`. + +## Define the MNIST Network + +This section explains the prototxt file `lenet_train.prototxt` used in the MNIST demo. We assume that you are familiar with [Google Protobuf](https://developers.google.com/protocol-buffers/docs/overview), and assume that you have read the protobuf definitions used by Caffe, which can be found at [src/caffe/proto/caffe.proto](https://github.com/Yangqing/caffe/blob/master/src/caffe/proto/caffe.proto). + +Specifically, we will write a `caffe::NetParameter` (or in python, `caffe.proto.caffe_pb2.NetParameter`) protubuf. We will start by giving the network a name: + + name: "LeNet" + +### Writing the Data Layer + +Currently, we will read the MNIST data from the leveldb we created earlier in the demo. This is defined by a data layer: + + layers { + name: "mnist" + type: DATA + data_param { + source: "mnist-train-leveldb" + batch_size: 64 + scale: 0.00390625 + } + top: "data" + top: "label" + } + +Specifically, this layer has name `mnist`, type `data`, and it reads the data from the given leveldb source. We will use a batch size of 64, and scale the incoming pixels so that they are in the range \[0,1\). Why 0.00390625? It is 1 divided by 256. And finally, this layer produces two blobs, one is the `data` blob, and one is the `label` blob. + +### Writing the Convolution Layer + +Let's define the first convolution layer: + + layers { + name: "conv1" + type: CONVOLUTION + blobs_lr: 1. + blobs_lr: 2. + convolution_param { + num_output: 20 + kernelsize: 5 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + bottom: "data" + top: "conv1" + } + +This layer takes the `data` blob (it is provided by the data layer), and produces the `conv1` layer. It produces outputs of 20 channels, with the convolutional kernel size 5 and carried out with stride 1. + +The fillers allow us to randomly initialize the value of the weights and bias. For the weight filler, we will use the `xavier` algorithm that automatically determines the scale of initialization based on the number of input and output neurons. For the bias filler, we will simply initialize it as constant, with the default filling value 0. + +`blobs_lr` are the learning rate adjustments for the layer's learnable parameters. In this case, we will set the weight learning rate to be the same as the learning rate given by the solver during runtime, and the bias learning rate to be twice as large as that - this usually leads to better convergence rates. + +### Writing the Pooling Layer + +Phew. Pooling layers are actually much easier to define: + + layers { + name: "pool1" + type: POOLING + pooling_param { + kernel_size: 2 + stride: 2 + pool: MAX + } + bottom: "conv1" + top: "pool1" + } + +This says we will perform max pooling with a pool kernel size 2 and a stride of 2 (so no overlapping between neighboring pooling regions). + +Similarly, you can write up the second convolution and pooling layers. Check `data/lenet.prototxt` for details. + +### Writing the Fully Connected Layer + +Writing a fully connected layer is also simple: + + layers { + name: "ip1" + type: INNER_PRODUCT + blobs_lr: 1. + blobs_lr: 2. + inner_product_param { + num_output: 500 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + bottom: "pool2" + top: "ip1" + } + +This defines a fully connected layer (for some legacy reason, Caffe calls it an `innerproduct` layer) with 500 outputs. All other lines look familiar, right? + +### Writing the ReLU Layer + +A ReLU Layer is also simple: + + layers { + name: "relu1" + type: RELU + bottom: "ip1" + top: "ip1" + } + +Since ReLU is an element-wise operation, we can do *in-place* operations to save some memory. This is achieved by simply giving the same name to the bottom and top blobs. Of course, do NOT use duplicated blob names for other layer types! + +After the ReLU layer, we will write another innerproduct layer: + + layers { + name: "ip2" + type: INNER_PRODUCT + blobs_lr: 1. + blobs_lr: 2. + inner_product_param { + num_output: 10 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + bottom: "ip1" + top: "ip2" + } + +### Writing the Loss Layer + +Finally, we will write the loss! + + layers { + name: "loss" + type: SOFTMAX_LOSS + bottom: "ip2" + bottom: "label" + } + +The `softmax_loss` layer implements both the softmax and the multinomial logistic loss (that saves time and improves numerical stability). It takes two blobs, the first one being the prediction and the second one being the `label` provided by the data layer (remember it?). It does not produce any outputs - all it does is to compute the loss function value, report it when backpropagation starts, and initiates the gradient with respect to `ip2`. This is where all magic starts. + +## Define the MNIST Solver + +Check out the comments explaining each line in the prototxt: + + # The training protocol buffer definition + train_net: "lenet_train.prototxt" + # The testing protocol buffer definition + test_net: "lenet_test.prototxt" + # test_iter specifies how many forward passes the test should carry out. + # In the case of MNIST, we have test batch size 100 and 100 test iterations, + # covering the full 10,000 testing images. + test_iter: 100 + # Carry out testing every 500 training iterations. + test_interval: 500 + # The base learning rate, momentum and the weight decay of the network. + base_lr: 0.01 + momentum: 0.9 + weight_decay: 0.0005 + # The learning rate policy + lr_policy: "inv" + gamma: 0.0001 + power: 0.75 + # Display every 100 iterations + display: 100 + # The maximum number of iterations + max_iter: 10000 + # snapshot intermediate results + snapshot: 5000 + snapshot_prefix: "lenet" + # solver mode: 0 for CPU and 1 for GPU + solver_mode: 1 + +## Training and Testing the Model + +Training the model is simple after you have written the network definition protobuf and solver protobuf files. Simply run `train_mnist.sh`, or the following command directly: + + cd $CAFFE_ROOT/examples/mnist + ./train_lenet.sh + +`train_lenet.sh` is a simple script, but here are a few explanations: `GLOG_logtostderr=1` is the google logging flag that prints all the logging messages directly to stderr. The main tool for training is `train_net.bin`, with the solver protobuf text file as its argument. + +When you run the code, you will see a lot of messages flying by like this: + + I1203 net.cpp:66] Creating Layer conv1 + I1203 net.cpp:76] conv1 <- data + I1203 net.cpp:101] conv1 -> conv1 + I1203 net.cpp:116] Top shape: 20 24 24 + I1203 net.cpp:127] conv1 needs backward computation. + +These messages tell you the details about each layer, its connections and its output shape, which may be helpful in debugging. After the initialization, the training will start: + + I1203 net.cpp:142] Network initialization done. + I1203 solver.cpp:36] Solver scaffolding done. + I1203 solver.cpp:44] Solving LeNet + +Based on the solver setting, we will print the training loss function every 100 iterations, and test the network every 1000 iterations. You will see messages like this: + + I1203 solver.cpp:204] Iteration 100, lr = 0.00992565 + I1203 solver.cpp:66] Iteration 100, loss = 0.26044 + ... + I1203 solver.cpp:84] Testing net + I1203 solver.cpp:111] Test score #0: 0.9785 + I1203 solver.cpp:111] Test score #1: 0.0606671 + +For each training iteration, `lr` is the learning rate of that iteration, and `loss` is the training function. For the output of the testing phase, score 0 is the accuracy, and score 1 is the testing loss function. + +And after a few minutes, you are done! + + I1203 solver.cpp:84] Testing net + I1203 solver.cpp:111] Test score #0: 0.9897 + I1203 solver.cpp:111] Test score #1: 0.0324599 + I1203 solver.cpp:126] Snapshotting to lenet_iter_10000 + I1203 solver.cpp:133] Snapshotting solver state to lenet_iter_10000.solverstate + I1203 solver.cpp:78] Optimization Done. + +The final model, stored as a binary protobuf file, is stored at + + lenet_iter_10000 + +which you can deploy as a trained model in your application, if you are training on a real-world application dataset. + +### Um... How about GPU training? + +You just did! All the training was carried out on the GPU. In fact, if you would like to do training on CPU, you can simply change one line in `lenet_solver.prototxt`: + + # solver mode: CPU or GPU + solver_mode: CPU + +and you will be using CPU for training. Isn't that easy? + +MNIST is a small dataset, so training with GPU does not really introduce too much benefit due to communication overheads. On larger datasets with more complex models, such as ImageNet, the computation speed difference will be more significant. diff --git a/scripts/build_docs.sh b/scripts/build_docs.sh index 1faf8bd5..c5680151 100755 --- a/scripts/build_docs.sh +++ b/scripts/build_docs.sh @@ -1,11 +1,17 @@ #!/bin/bash +# Build documentation for display in web browser. PORT=${1:-4000} -echo "usage: build_docs.sh [port]" +echo "usage: build.sh [port]" # Find the docs dir, no matter where the script is called -DIR="$( cd "$(dirname "$0")" ; pwd -P )" -cd $DIR/../docs +ROOT_DIR="$( cd "$(dirname "$0")"/.. ; pwd -P )" +cd $ROOT_DIR +# Gather docs. +scripts/gather_examples.sh + +# Display docs using web server. +cd docs jekyll serve -w -s . -d _site --port=$PORT diff --git a/scripts/copy_notebook.py b/scripts/copy_notebook.py new file mode 100755 index 00000000..4f60cfd0 --- /dev/null +++ b/scripts/copy_notebook.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +""" +Takes as arguments: +1. the path to a JSON file (such as an IPython notebook). +2. the path to output file + +If 'metadata' dict in the JSON file contains 'include_in_docs': true, +then copies the file to output file, appending the 'metadata' property +as YAML front-matter, adding the field 'category' with value 'notebook'. +""" +import os +import sys +import json + +filename = sys.argv[1] +output_filename = sys.argv[2] +content = json.load(open(filename)) + +if 'include_in_docs' in content['metadata'] and content['metadata']['include_in_docs']: + yaml_frontmatter = ['---'] + for key, val in content['metadata'].iteritems(): + if key == 'name': + key = 'title' + if val == '': + val = os.path.basename(filename) + yaml_frontmatter.append('{}: {}'.format(key, val)) + yaml_frontmatter += ['category: notebook'] + yaml_frontmatter += ['original_path: ' + filename] + + with open(output_filename, 'w') as fo: + fo.write('\n'.join(yaml_frontmatter + ['---']) + '\n') + fo.write(open(filename).read()) diff --git a/scripts/deploy_docs.sh b/scripts/deploy_docs.sh index b6029613..fdf97f71 100755 --- a/scripts/deploy_docs.sh +++ b/scripts/deploy_docs.sh @@ -1,5 +1,5 @@ -#!/usr/bin/env sh -# Publish/ Pull-request documentation to the gh-pages site. +#!/bin/bash +# Publish documentation to the gh-pages site. # The remote for pushing the docs (defaults to origin). # This is where you will submit the PR to BVLC:gh-pages from. diff --git a/scripts/gather_examples.sh b/scripts/gather_examples.sh new file mode 100755 index 00000000..f5192375 --- /dev/null +++ b/scripts/gather_examples.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# Assemble documentation for the project into one directory via symbolic links. + +# Find the docs dir, no matter where the script is called +ROOT_DIR="$( cd "$(dirname "$0")"/.. ; pwd -P )" +cd $ROOT_DIR + +# Gather docs from examples/**/readme.md +rm -r docs/gathered +mkdir docs/gathered +for README_FILENAME in $(find examples -iname "readme.md"); do + # Only use file if it is to be included in docs. + if grep -Fxq "include_in_docs: true" $README_FILENAME; then + # Make link to readme.md in docs/gathered/. + # Since everything is called readme.md, rename it by its dirname. + README_DIRNAME=`dirname $README_FILENAME` + DOCS_FILENAME=docs/gathered/$README_DIRNAME.md + mkdir -p `dirname $DOCS_FILENAME` + ln -s $ROOT_DIR/$README_FILENAME $DOCS_FILENAME + fi +done + +# Gather docs from examples/*.ipynb and add YAML front-matter. +for NOTEBOOK_FILENAME in $(find examples -d 1 -iname "*.ipynb"); do + DOCS_FILENAME=docs/gathered/$NOTEBOOK_FILENAME + mkdir -p `dirname $DOCS_FILENAME` + python scripts/copy_notebook.py $NOTEBOOK_FILENAME $DOCS_FILENAME +done From 54c94190a939ad9719f7cb788d63e92e5849bce5 Mon Sep 17 00:00:00 2001 From: Sergey Karayev Date: Fri, 11 Jul 2014 14:00:56 -0700 Subject: [PATCH 3/5] [docs] updated instructions for contributing documentation --- docs/development.md | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/docs/development.md b/docs/development.md index 444c9e8e..d3b0e8c3 100644 --- a/docs/development.md +++ b/docs/development.md @@ -9,9 +9,14 @@ The [BVLC](http://bvlc.eecs.berkeley.edu/) maintainers welcome all contributions ### Documentation -Tutorials and general documentation -- including this website -- are written in Markdown format in the `docs/` folder. -While the format is quite easy to read directly, you may prefer to view the whole thing as a website. -To do so, simply run `jekyll serve -s docs` and view the documentation website at `http://0.0.0.0:4000` (for [jekyll](http://jekyllrb.com/), you must have ruby and do `gem install jekyll`). +This website, written with [Jekyll](http://jekyllrb.com/), functions as the documentation portal -- simply run `scripts/build_docs.sh` and view the website at `http://0.0.0.0:4000`. + +We prefer tutorials and examples to be documented close to where they live, in `readme.md` files. +The `build_docs.sh` script gathers all `examples/**/readme.md` and `examples/*.ipynb` files, and makes a table of contents. +To be included in the docs, the readme files must be annotated with [YAML front-matter](http://jekyllrb.com/docs/frontmatter/), including the flag `include_in_docs: true`. +Similarly for IPython notebooks: simply include `"include_in_docs": true` in the `"metadata"` JSON field. + +Other docs, such as installation guides, are written in the `docs` directory and manually linked to from the `index.md` page. We strive to provide provide lots of usage examples, and to document all code in docstrings. We absolutely appreciate any contribution to this effort! From 05d661f0420d67368466dffb37976cdf7497a706 Mon Sep 17 00:00:00 2001 From: Sergey Karayev Date: Fri, 11 Jul 2014 17:19:35 -0700 Subject: [PATCH 4/5] [docs] shelhamer's minor suggestions --- docs/development.md | 2 +- examples/imagenet/readme.md | 2 +- scripts/gather_examples.sh | 9 +++++---- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/development.md b/docs/development.md index d3b0e8c3..ff734d17 100644 --- a/docs/development.md +++ b/docs/development.md @@ -9,7 +9,7 @@ The [BVLC](http://bvlc.eecs.berkeley.edu/) maintainers welcome all contributions ### Documentation -This website, written with [Jekyll](http://jekyllrb.com/), functions as the documentation portal -- simply run `scripts/build_docs.sh` and view the website at `http://0.0.0.0:4000`. +This website, written with [Jekyll](http://jekyllrb.com/), functions as the official Caffe documentation -- simply run `scripts/build_docs.sh` and view the website at `http://0.0.0.0:4000`. We prefer tutorials and examples to be documented close to where they live, in `readme.md` files. The `build_docs.sh` script gathers all `examples/**/readme.md` and `examples/*.ipynb` files, and makes a table of contents. diff --git a/examples/imagenet/readme.md b/examples/imagenet/readme.md index 55348727..cdc8ecb7 100644 --- a/examples/imagenet/readme.md +++ b/examples/imagenet/readme.md @@ -1,6 +1,6 @@ --- title: ImageNet tutorial -description: Train and test "AlexNet" on ImageNet challenge data. +description: Train and test "CaffeNet" on ImageNet challenge data. category: example include_in_docs: true layout: default diff --git a/scripts/gather_examples.sh b/scripts/gather_examples.sh index f5192375..dd6cef0d 100755 --- a/scripts/gather_examples.sh +++ b/scripts/gather_examples.sh @@ -6,15 +6,16 @@ ROOT_DIR="$( cd "$(dirname "$0")"/.. ; pwd -P )" cd $ROOT_DIR # Gather docs from examples/**/readme.md -rm -r docs/gathered -mkdir docs/gathered +GATHERED_DIR=docs/gathered +rm -r $GATHERED_DIR +mkdir $GATHERED_DIR for README_FILENAME in $(find examples -iname "readme.md"); do # Only use file if it is to be included in docs. if grep -Fxq "include_in_docs: true" $README_FILENAME; then # Make link to readme.md in docs/gathered/. # Since everything is called readme.md, rename it by its dirname. README_DIRNAME=`dirname $README_FILENAME` - DOCS_FILENAME=docs/gathered/$README_DIRNAME.md + DOCS_FILENAME=$GATHERED_DIR/$README_DIRNAME.md mkdir -p `dirname $DOCS_FILENAME` ln -s $ROOT_DIR/$README_FILENAME $DOCS_FILENAME fi @@ -22,7 +23,7 @@ done # Gather docs from examples/*.ipynb and add YAML front-matter. for NOTEBOOK_FILENAME in $(find examples -d 1 -iname "*.ipynb"); do - DOCS_FILENAME=docs/gathered/$NOTEBOOK_FILENAME + DOCS_FILENAME=$GATHERED_DIR/$NOTEBOOK_FILENAME mkdir -p `dirname $DOCS_FILENAME` python scripts/copy_notebook.py $NOTEBOOK_FILENAME $DOCS_FILENAME done From dd546171bae1c3608ce70111c8ffb46c766ea9df Mon Sep 17 00:00:00 2001 From: Sergey Karayev Date: Fri, 11 Jul 2014 19:23:47 -0700 Subject: [PATCH 5/5] [example] image classification web demo --- data/ilsvrc12/get_ilsvrc_aux.sh | 1 + docs/getting_pretrained_models.md | 5 +- examples/web_demo/app.py | 215 +++++++++++++++++++++++++ examples/web_demo/exifutil.py | 33 ++++ examples/web_demo/readme.md | 30 ++++ examples/web_demo/templates/index.html | 138 ++++++++++++++++ 6 files changed, 421 insertions(+), 1 deletion(-) create mode 100644 examples/web_demo/app.py create mode 100644 examples/web_demo/exifutil.py create mode 100644 examples/web_demo/readme.md create mode 100644 examples/web_demo/templates/index.html diff --git a/data/ilsvrc12/get_ilsvrc_aux.sh b/data/ilsvrc12/get_ilsvrc_aux.sh index 3fa58dc7..b9b85d21 100755 --- a/data/ilsvrc12/get_ilsvrc_aux.sh +++ b/data/ilsvrc12/get_ilsvrc_aux.sh @@ -4,6 +4,7 @@ # This script downloads the imagenet example auxiliary files including: # - the ilsvrc12 image mean, binaryproto # - synset ids and words +# - Python pickle-format data of ImageNet graph structure and relative infogain # - the training splits with labels DIR="$( cd "$(dirname "$0")" ; pwd -P )" diff --git a/docs/getting_pretrained_models.md b/docs/getting_pretrained_models.md index bbac5ac4..14e6ee91 100644 --- a/docs/getting_pretrained_models.md +++ b/docs/getting_pretrained_models.md @@ -8,7 +8,8 @@ layout: default Note that unlike Caffe itself, these models are licensed for **academic research / non-commercial use only**. If you have any questions, please get in touch with us. -This page will be updated as more models become available. +*UPDATE* July 2014: we are actively working on a service for hosting user-uploaded model definition and trained weight files. +Soon, the community will be able to easily contribute different architectures! ### ImageNet @@ -26,4 +27,6 @@ This page will be updated as more models become available. validation accuracy 57.258% and loss 1.83948. - This model obtains a top-1 accuracy 57.1% and a top-5 accuracy 80.2% on the validation set, using just the center crop. (Using the average of 10 crops, (4 + 1 center) * 2 mirror, should obtain a bit higher accuracy) +### Auxiliary Data + Additionally, you will probably eventually need some auxiliary data (mean image, synset list, etc.): run `data/ilsvrc12/get_ilsvrc_aux.sh` from the root directory to obtain it. diff --git a/examples/web_demo/app.py b/examples/web_demo/app.py new file mode 100644 index 00000000..9bc4ed5c --- /dev/null +++ b/examples/web_demo/app.py @@ -0,0 +1,215 @@ +import os +import time +import cPickle +import datetime +import logging +import flask +import werkzeug +import optparse +import tornado.wsgi +import tornado.httpserver +import numpy as np +import pandas as pd +from PIL import Image as PILImage +import cStringIO as StringIO +import urllib +import caffe +import exifutil + +REPO_DIRNAME = os.path.abspath(os.path.dirname(__file__) + '/../..') +UPLOAD_FOLDER = '/tmp/caffe_demos_uploads' +ALLOWED_IMAGE_EXTENSIONS = set(['png', 'bmp', 'jpg', 'jpe', 'jpeg', 'gif']) + +# Obtain the flask app object +app = flask.Flask(__name__) + + +@app.route('/') +def index(): + return flask.render_template('index.html', has_result=False) + + +@app.route('/classify_url', methods=['GET']) +def classify_url(): + imageurl = flask.request.args.get('imageurl', '') + try: + string_buffer = StringIO.StringIO( + urllib.urlopen(imageurl).read()) + image = caffe.io.load_image(string_buffer) + + except Exception as err: + # For any exception we encounter in reading the image, we will just + # not continue. + logging.info('URL Image open error: %s', err) + return flask.render_template( + 'index.html', has_result=True, + result=(False, 'Cannot open image from URL.') + ) + + logging.info('Image: %s', imageurl) + result = app.clf.classify_image(image) + return flask.render_template( + 'index.html', has_result=True, result=result, imagesrc=imageurl) + + +@app.route('/classify_upload', methods=['POST']) +def classify_upload(): + try: + # We will save the file to disk for possible data collection. + imagefile = flask.request.files['imagefile'] + filename_ = str(datetime.datetime.now()).replace(' ', '_') + \ + werkzeug.secure_filename(imagefile.filename) + filename = os.path.join(UPLOAD_FOLDER, filename_) + imagefile.save(filename) + logging.info('Saving to %s.', filename) + image = exifutil.open_oriented_im(filename) + + except Exception as err: + logging.info('Uploaded image open error: %s', err) + return flask.render_template( + 'index.html', has_result=True, + result=(False, 'Cannot open uploaded image.') + ) + + result = app.clf.classify_image(image) + return flask.render_template( + 'index.html', has_result=True, result=result, + imagesrc=embed_image_html(image) + ) + + +def embed_image_html(image): + """Creates an image embedded in HTML base64 format.""" + image_pil = PILImage.fromarray((255 * image).astype('uint8')) + image_pil = image_pil.resize((256, 256)) + string_buf = StringIO.StringIO() + image_pil.save(string_buf, format='png') + data = string_buf.getvalue().encode('base64').replace('\n', '') + return 'data:image/png;base64,' + data + + +def allowed_file(filename): + return ( + '.' in filename and + filename.rsplit('.', 1)[1] in ALLOWED_IMAGE_EXTENSIONS + ) + + +class ImagenetClassifier(object): + default_args = { + 'model_def_file': ( + '{}/examples/imagenet/imagenet_deploy.prototxt'.format(REPO_DIRNAME)), + 'pretrained_model_file': ( + '{}/examples/imagenet/caffe_reference_imagenet_model'.format(REPO_DIRNAME)), + 'mean_file': ( + '{}/python/caffe/imagenet/ilsvrc_2012_mean.npy'.format(REPO_DIRNAME)), + 'class_labels_file': ( + '{}/data/ilsvrc12/synset_words.txt'.format(REPO_DIRNAME)), + 'bet_file': ( + '{}/data/ilsvrc12/imagenet.bet.pickle'.format(REPO_DIRNAME)), + } + for key, val in default_args.iteritems(): + if not os.path.exists(val): + raise Exception( + "File for {} is missing. Should be at: {}".format(key, val)) + default_args['image_dim'] = 227 + default_args['gpu_mode'] = True + + def __init__(self, model_def_file, pretrained_model_file, mean_file, + class_labels_file, bet_file, image_dim, gpu_mode=False): + logging.info('Loading net and associated files...') + self.net = caffe.Classifier( + model_def_file, pretrained_model_file, input_scale=255, + image_dims=(image_dim, image_dim), gpu=gpu_mode, + mean_file=mean_file, channel_swap=(2, 1, 0) + ) + + with open(class_labels_file) as f: + labels_df = pd.DataFrame([ + { + 'synset_id': l.strip().split(' ')[0], + 'name': ' '.join(l.strip().split(' ')[1:]).split(',')[0] + } + for l in f.readlines() + ]) + self.labels = labels_df.sort('synset_id')['name'].values + + self.bet = cPickle.load(open(bet_file)) + # A bias to prefer children nodes in single-chain paths + # I am setting the value to 0.1 as a quick, simple model. + # We could use better psychological models here... + self.bet['infogain'] -= np.array(self.bet['preferences']) * 0.1 + + def classify_image(self, image): + try: + starttime = time.time() + scores = self.net.predict([image], oversample=True).flatten() + endtime = time.time() + + indices = (-scores).argsort()[:5] + predictions = self.labels[indices] + + # In addition to the prediction text, we will also produce + # the length for the progress bar visualization. + meta = [ + (p, '%.5f' % scores[i]) + for i, p in zip(indices, predictions) + ] + logging.info('result: %s', str(meta)) + + # Compute expected information gain + expected_infogain = np.dot( + self.bet['probmat'], scores[self.bet['idmapping']]) + expected_infogain *= self.bet['infogain'] + + # sort the scores + infogain_sort = expected_infogain.argsort()[::-1] + bet_result = [(self.bet['words'][v], '%.5f' % expected_infogain[v]) + for v in infogain_sort[:5]] + logging.info('bet result: %s', str(bet_result)) + + return (True, meta, bet_result, '%.3f' % (endtime - starttime)) + + except Exception as err: + logging.info('Classification error: %s', err) + return (False, 'Something went wrong when classifying the ' + 'image. Maybe try another one?') + + +def start_tornado(app, port=5000): + http_server = tornado.httpserver.HTTPServer( + tornado.wsgi.WSGIContainer(app)) + http_server.listen(port) + print("Tornado server starting on port {}".format(port)) + tornado.ioloop.IOLoop.instance().start() + + +def start_from_terminal(app): + """ + Parse command line options and start the server. + """ + parser = optparse.OptionParser() + parser.add_option( + '-d', '--debug', + help="enable debug mode", + action="store_true", default=False) + parser.add_option( + '-p', '--port', + help="which port to serve content on", + type='int', default=5000) + opts, args = parser.parse_args() + + # Initialize classifier + app.clf = ImagenetClassifier(**ImagenetClassifier.default_args) + + if opts.debug: + app.run(debug=True, host='0.0.0.0', port=opts.port) + else: + start_tornado(app, opts.port) + + +if __name__ == '__main__': + logging.getLogger().setLevel(logging.INFO) + if not os.path.exists(UPLOAD_FOLDER): + os.makedirs(UPLOAD_FOLDER) + start_from_terminal(app) diff --git a/examples/web_demo/exifutil.py b/examples/web_demo/exifutil.py new file mode 100644 index 00000000..8c07aa88 --- /dev/null +++ b/examples/web_demo/exifutil.py @@ -0,0 +1,33 @@ +""" +This script handles the skimage exif problem. +""" + +from PIL import Image +import numpy as np + +ORIENTATIONS = { # used in apply_orientation + 2: (Image.FLIP_LEFT_RIGHT,), + 3: (Image.ROTATE_180,), + 4: (Image.FLIP_TOP_BOTTOM,), + 5: (Image.FLIP_LEFT_RIGHT, Image.ROTATE_90), + 6: (Image.ROTATE_270,), + 7: (Image.FLIP_LEFT_RIGHT, Image.ROTATE_270), + 8: (Image.ROTATE_90,) +} + + +def open_oriented_im(im_path): + im = Image.open(im_path) + if hasattr(im, '_getexif'): + exif = im._getexif() + if exif is not None and 274 in exif: + orientation = exif[274] + im = apply_orientation(im, orientation) + return np.asarray(im).astype(np.float32) / 255. + + +def apply_orientation(im, orientation): + if orientation in ORIENTATIONS: + for method in ORIENTATIONS[orientation]: + im = im.transpose(method) + return im diff --git a/examples/web_demo/readme.md b/examples/web_demo/readme.md new file mode 100644 index 00000000..559c41e0 --- /dev/null +++ b/examples/web_demo/readme.md @@ -0,0 +1,30 @@ +--- +title: Web demo +description: Image classification demo running as a Flask web server. +category: example +layout: default +include_in_docs: true +--- + +# Web Demo + +## Requirements + +The demo server requires Python with some dependencies. +To make sure you have the dependencies, please run `pip install -r examples/web_demo/requirements.txt`, and also make sure that you've compiled the Python Caffe interface and that it is on your `PYTHONPATH` (see [installation instructions](/installation.html)). + +Make sure that you have obtained the Caffe Reference ImageNet Model and the ImageNet Auxiliary Data ([instructions](/getting_pretrained_models.html)). +NOTE: if you run into trouble, try re-downloading the auxiliary files. + +## Run + +Running `python examples/web_demo/app.py` will bring up the demo server, accessible at `http://0.0.0.0:5000`. +You can enable debug mode of the web server, or switch to a different port: + + % python examples/web_demo/app.py -h + Usage: app.py [options] + + Options: + -h, --help show this help message and exit + -d, --debug enable debug mode + -p PORT, --port=PORT which port to serve content on diff --git a/examples/web_demo/templates/index.html b/examples/web_demo/templates/index.html new file mode 100644 index 00000000..87893341 --- /dev/null +++ b/examples/web_demo/templates/index.html @@ -0,0 +1,138 @@ + + + + + + + + + Caffe Demos + + + + + + + + + + + + + + +
+ + +
+

Classification

+ Click for a Quick Example +
+ + {% if has_result %} + {% if not result[0] %} + +
{{ result[1] }} Did you provide a valid URL or a valid image file?
+ {% else %} +
+ +
+
+ +
+
+
    + {% for single_pred in result[2] %} +
  • + {{ single_pred[1] }} +

    + {{ single_pred[0] }} +

    +
  • + {% endfor %} +
+
+
+
    + {% for single_pred in result[1] %} +
  • + {{ single_pred[1] }} +

    + {{ single_pred[0] }} +

    +
  • + {% endfor %} +
+
+
+
+ +
+
+

CNN took {{ result[3] }} seconds.

+ {% endif %} +
+ {% endif %} + +
+
+
+ + + + +
+
+
+ +
+
+ + +
+ +
+
+ +
+ + +