Caffe
parallel.hpp
1 #ifndef CAFFE_PARALLEL_HPP_
2 #define CAFFE_PARALLEL_HPP_
3 
4 #include <boost/date_time/posix_time/posix_time.hpp>
5 
6 #include <vector>
7 
8 #include "caffe/blob.hpp"
9 #include "caffe/common.hpp"
10 #include "caffe/internal_thread.hpp"
11 #include "caffe/layer.hpp"
12 #include "caffe/proto/caffe.pb.h"
13 #include "caffe/solver.hpp"
14 #include "caffe/syncedmem.hpp"
15 #include "caffe/util/blocking_queue.hpp"
16 
17 namespace caffe {
18 
19 // Represents a net parameters. Once a net is created, its parameter buffers can
20 // be replaced by ones from Params, to allow parallelization. Params ensures
21 // parameters are allocated in one consecutive array.
22 template<typename Dtype>
23 class Params {
24  public:
25  explicit Params(shared_ptr<Solver<Dtype> > root_solver);
26  virtual ~Params() {
27  }
28 
29  inline size_t size() const {
30  return size_;
31  }
32  inline Dtype* data() const {
33  return data_;
34  }
35  inline Dtype* diff() const {
36  return diff_;
37  }
38 
39  protected:
40  const size_t size_; // Size of buffers
41  Dtype* data_; // Network parameters
42  Dtype* diff_; // Gradient
43 
44 DISABLE_COPY_AND_ASSIGN(Params);
45 };
46 
47 // Params stored in GPU memory.
48 template<typename Dtype>
49 class GPUParams : public Params<Dtype> {
50  public:
51  GPUParams(shared_ptr<Solver<Dtype> > root_solver, int device);
52  virtual ~GPUParams();
53 
54  void configure(Solver<Dtype>* solver) const;
55 
56  protected:
60 };
61 
62 class DevicePair {
63  public:
64  DevicePair(int parent, int device)
65  : parent_(parent),
66  device_(device) {
67  }
68  inline int parent() {
69  return parent_;
70  }
71  inline int device() {
72  return device_;
73  }
74 
75  // Group GPUs in pairs, by proximity depending on machine's topology
76  static void compute(const vector<int> devices, vector<DevicePair>* pairs);
77 
78  protected:
79  int parent_;
80  int device_;
81 };
82 
83 // Synchronous data parallelism using map-reduce between local GPUs.
84 template<typename Dtype>
85 class P2PSync : public GPUParams<Dtype>, public Solver<Dtype>::Callback,
86  public InternalThread {
87  public:
88  explicit P2PSync(shared_ptr<Solver<Dtype> > root_solver,
89  P2PSync<Dtype>* parent, const SolverParameter& param);
90  virtual ~P2PSync();
91 
92  inline const shared_ptr<Solver<Dtype> >& solver() const {
93  return solver_;
94  }
95 
96  void Run(const vector<int>& gpus);
97  void Prepare(const vector<int>& gpus,
98  vector<shared_ptr<P2PSync<Dtype> > >* syncs);
99  inline const int initial_iter() const { return initial_iter_; }
100 
101  protected:
102  void on_start();
103  void on_gradients_ready();
104 
105  void InternalThreadEntry();
106 
107  P2PSync<Dtype>* parent_;
108  vector<P2PSync<Dtype>*> children_;
110  const int initial_iter_;
111  Dtype* parent_grads_;
112  shared_ptr<Solver<Dtype> > solver_;
113 
114  using Params<Dtype>::size_;
115  using Params<Dtype>::data_;
116  using Params<Dtype>::diff_;
117 };
118 
119 } // namespace caffe
120 
121 #endif
A layer factory that allows one to register layers. During runtime, registered layers can be called b...
Definition: blob.hpp:14
Definition: parallel.hpp:23
An interface for classes that perform optimization on Nets.
Definition: solver.hpp:41
Definition: parallel.hpp:49
Definition: parallel.hpp:85
Definition: parallel.hpp:62
Definition: internal_thread.hpp:19
Definition: blocking_queue.hpp:10