Caffe
syncedmem.hpp
1 #ifndef CAFFE_SYNCEDMEM_HPP_
2 #define CAFFE_SYNCEDMEM_HPP_
3 
4 #include <cstdlib>
5 
6 #include "caffe/common.hpp"
7 
8 namespace caffe {
9 
10 // If CUDA is available and in GPU mode, host memory will be allocated pinned,
11 // using cudaMallocHost. It avoids dynamic pinning for transfers (DMA).
12 // The improvement in performance seems negligible in the single GPU case,
13 // but might be more significant for parallel training. Most importantly,
14 // it improved stability for large models on many GPUs.
15 inline void CaffeMallocHost(void** ptr, size_t size, bool* use_cuda) {
16 #ifndef CPU_ONLY
17  if (Caffe::mode() == Caffe::GPU) {
18  CUDA_CHECK(cudaMallocHost(ptr, size));
19  *use_cuda = true;
20  return;
21  }
22 #endif
23  *ptr = malloc(size);
24  *use_cuda = false;
25  CHECK(*ptr) << "host allocation of size " << size << " failed";
26 }
27 
28 inline void CaffeFreeHost(void* ptr, bool use_cuda) {
29 #ifndef CPU_ONLY
30  if (use_cuda) {
31  CUDA_CHECK(cudaFreeHost(ptr));
32  return;
33  }
34 #endif
35  free(ptr);
36 }
37 
38 
45 class SyncedMemory {
46  public:
47  SyncedMemory()
48  : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(0), head_(UNINITIALIZED),
49  own_cpu_data_(false), cpu_malloc_use_cuda_(false), own_gpu_data_(false),
50  gpu_device_(-1) {}
51  explicit SyncedMemory(size_t size)
52  : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(size), head_(UNINITIALIZED),
53  own_cpu_data_(false), cpu_malloc_use_cuda_(false), own_gpu_data_(false),
54  gpu_device_(-1) {}
55  ~SyncedMemory();
56  const void* cpu_data();
57  void set_cpu_data(void* data);
58  const void* gpu_data();
59  void set_gpu_data(void* data);
60  void* mutable_cpu_data();
61  void* mutable_gpu_data();
62  enum SyncedHead { UNINITIALIZED, HEAD_AT_CPU, HEAD_AT_GPU, SYNCED };
63  SyncedHead head() { return head_; }
64  size_t size() { return size_; }
65 
66 #ifndef CPU_ONLY
67  void async_gpu_push(const cudaStream_t& stream);
68 #endif
69 
70  private:
71  void to_cpu();
72  void to_gpu();
73  void* cpu_ptr_;
74  void* gpu_ptr_;
75  size_t size_;
76  SyncedHead head_;
77  bool own_cpu_data_;
78  bool cpu_malloc_use_cuda_;
79  bool own_gpu_data_;
80  int gpu_device_;
81 
82  DISABLE_COPY_AND_ASSIGN(SyncedMemory);
83 }; // class SyncedMemory
84 
85 } // namespace caffe
86 
87 #endif // CAFFE_SYNCEDMEM_HPP_
A layer factory that allows one to register layers. During runtime, registered layers can be called b...
Definition: blob.hpp:14
Manages memory allocation and synchronization between the host (CPU) and device (GPU).
Definition: syncedmem.hpp:45