src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234

/*
 * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef SHARE_VM_GC_IMPLEMENTATION_SHARED_MUTABLENUMASPACE_HPP
#define SHARE_VM_GC_IMPLEMENTATION_SHARED_MUTABLENUMASPACE_HPP

#include "utilities/macros.hpp"
#if INCLUDE_ALL_GCS
#include "gc_implementation/shared/gcUtil.hpp"
#include "gc_implementation/shared/mutableSpace.hpp"
#endif // INCLUDE_ALL_GCS

/*
 *    The NUMA-aware allocator (MutableNUMASpace) is basically a modification
 * of MutableSpace which preserves interfaces but implements different
 * functionality. The space is split into chunks for each locality group
 * (resizing for adaptive size policy is also supported). For each thread
 * allocations are performed in the chunk corresponding to the home locality
 * group of the thread. Whenever any chunk fills-in the young generation
 * collection occurs.
 *   The chunks can be also be adaptively resized. The idea behind the adaptive
 * sizing is to reduce the loss of the space in the eden due to fragmentation.
 * The main cause of fragmentation is uneven allocation rates of threads.
 * The allocation rate difference between locality groups may be caused either by
 * application specifics or by uneven LWP distribution by the OS. Besides,
 * application can have less threads then the number of locality groups.
 * In order to resize the chunk we measure the allocation rate of the
 * application between collections. After that we reshape the chunks to reflect
 * the allocation rate pattern. The AdaptiveWeightedAverage exponentially
 * decaying average is used to smooth the measurements. The NUMASpaceResizeRate
 * parameter is used to control the adaptation speed by restricting the number of
 * bytes that can be moved during the adaptation phase.
 *   Chunks may contain pages from a wrong locality group. The page-scanner has
 * been introduced to address the problem. Remote pages typically appear due to
 * the memory shortage in the target locality group. Besides Solaris would
 * allocate a large page from the remote locality group even if there are small
 * local pages available. The page-scanner scans the pages right after the
 * collection and frees remote pages in hope that subsequent reallocation would
 * be more successful. This approach proved to be useful on systems with high
 * load where multiple processes are competing for the memory.
 */

class MutableNUMASpace : public MutableSpace {
  friend class VMStructs;

  class LGRPSpace : public CHeapObj<mtGC> {
    int _lgrp_id;
    MutableSpace* _space;
    MemRegion _invalid_region;
    AdaptiveWeightedAverage *_alloc_rate;
    bool _allocation_failed;

    struct SpaceStats {
      size_t _local_space, _remote_space, _unbiased_space, _uncommited_space;
      size_t _large_pages, _small_pages;

      SpaceStats() {
        _local_space = 0;
        _remote_space = 0;
        _unbiased_space = 0;
        _uncommited_space = 0;
        _large_pages = 0;
        _small_pages = 0;
      }
    };

    SpaceStats _space_stats;

    char* _last_page_scanned;
    char* last_page_scanned()            { return _last_page_scanned; }
    void set_last_page_scanned(char* p)  { _last_page_scanned = p;    }
   public:
    LGRPSpace(int l, size_t alignment) : _lgrp_id(l), _last_page_scanned(NULL), _allocation_failed(false) {
      _space = new MutableSpace(alignment);
      _alloc_rate = new AdaptiveWeightedAverage(NUMAChunkResizeWeight);
    }
    ~LGRPSpace() {
      delete _space;
      delete _alloc_rate;
    }

    void add_invalid_region(MemRegion r) {
      if (!_invalid_region.is_empty()) {
      _invalid_region.set_start(MIN2(_invalid_region.start(), r.start()));
      _invalid_region.set_end(MAX2(_invalid_region.end(), r.end()));
      } else {
      _invalid_region = r;
      }
    }

    static bool equals(void* lgrp_id_value, LGRPSpace* p) {
      return *(int*)lgrp_id_value == p->lgrp_id();
    }

    // Report a failed allocation.
    void set_allocation_failed() { _allocation_failed = true;  }

    void sample() {
      // If there was a failed allocation make allocation rate equal
      // to the size of the whole chunk. This ensures the progress of
      // the adaptation process.
      size_t alloc_rate_sample;
      if (_allocation_failed) {
        alloc_rate_sample = space()->capacity_in_bytes();
        _allocation_failed = false;
      } else {
        alloc_rate_sample = space()->used_in_bytes();
      }
      alloc_rate()->sample(alloc_rate_sample);
    }

    MemRegion invalid_region() const                { return _invalid_region;      }
    void set_invalid_region(MemRegion r)            { _invalid_region = r;         }
    int lgrp_id() const                             { return _lgrp_id;             }
    MutableSpace* space() const                     { return _space;               }
    AdaptiveWeightedAverage* alloc_rate() const     { return _alloc_rate;          }
    void clear_alloc_rate()                         { _alloc_rate->clear();        }
    SpaceStats* space_stats()                       { return &_space_stats;        }
    void clear_space_stats()                        { _space_stats = SpaceStats(); }

    void accumulate_statistics(size_t page_size);
    void scan_pages(size_t page_size, size_t page_count);
  };

  GrowableArray<LGRPSpace*>* _lgrp_spaces;
  size_t _page_size;
  unsigned _adaptation_cycles, _samples_count;

  void set_page_size(size_t psz)                     { _page_size = psz;          }
  size_t page_size() const                           { return _page_size;         }

  unsigned adaptation_cycles()                       { return _adaptation_cycles; }
  void set_adaptation_cycles(int v)                  { _adaptation_cycles = v;    }

  unsigned samples_count()                           { return _samples_count;     }
  void increment_samples_count()                     { ++_samples_count;          }

  size_t _base_space_size;
  void set_base_space_size(size_t v)                 { _base_space_size = v;      }
  size_t base_space_size() const                     { return _base_space_size;   }

  // Check if the NUMA topology has changed. Add and remove spaces if needed.
  // The update can be forced by setting the force parameter equal to true.
  bool update_layout(bool force);
  // Bias region towards the lgrp.
  void bias_region(MemRegion mr, int lgrp_id);
  // Free pages in a given region.
  void free_region(MemRegion mr);
  // Get current chunk size.
  size_t current_chunk_size(int i);
  // Get default chunk size (equally divide the space).
  size_t default_chunk_size();
  // Adapt the chunk size to follow the allocation rate.
  size_t adaptive_chunk_size(int i, size_t limit);
  // Scan and free invalid pages.
  void scan_pages(size_t page_count);
  // Return the bottom_region and the top_region. Align them to page_size() boundary.
  // |------------------new_region---------------------------------|
  // |----bottom_region--|---intersection---|------top_region------|
  void select_tails(MemRegion new_region, MemRegion intersection,
                    MemRegion* bottom_region, MemRegion *top_region);
  // Try to merge the invalid region with the bottom or top region by decreasing
  // the intersection area. Return the invalid_region aligned to the page_size()
  // boundary if it's inside the intersection. Return non-empty invalid_region
  // if it lies inside the intersection (also page-aligned).
  // |------------------new_region---------------------------------|
  // |----------------|-------invalid---|--------------------------|
  // |----bottom_region--|---intersection---|------top_region------|
  void merge_regions(MemRegion new_region, MemRegion* intersection,
                     MemRegion *invalid_region);

 public:
  GrowableArray<LGRPSpace*>* lgrp_spaces() const     { return _lgrp_spaces;       }
  MutableNUMASpace(size_t alignment);
  virtual ~MutableNUMASpace();
  // Space initialization.
  virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space, bool setup_pages = SetupPages);
  // Update space layout if necessary. Do all adaptive resizing job.
  virtual void update();
  // Update allocation rate averages.
  virtual void accumulate_statistics();

  virtual void clear(bool mangle_space);
  virtual void mangle_unused_area() PRODUCT_RETURN;
  virtual void mangle_unused_area_complete() PRODUCT_RETURN;
  virtual void mangle_region(MemRegion mr) PRODUCT_RETURN;
  virtual void check_mangled_unused_area(HeapWord* limit) PRODUCT_RETURN;
  virtual void check_mangled_unused_area_complete() PRODUCT_RETURN;
  virtual void set_top_for_allocations(HeapWord* v) PRODUCT_RETURN;
  virtual void set_top_for_allocations() PRODUCT_RETURN;

  virtual void ensure_parsability();
  virtual size_t used_in_words() const;
  virtual size_t free_in_words() const;

  using MutableSpace::capacity_in_words;
  virtual size_t capacity_in_words(Thread* thr) const;
  virtual size_t tlab_capacity(Thread* thr) const;
  virtual size_t unsafe_max_tlab_alloc(Thread* thr) const;

  // Allocation (return NULL if full)
  virtual HeapWord* allocate(size_t word_size);
  virtual HeapWord* cas_allocate(size_t word_size);

  // Debugging
  virtual void print_on(outputStream* st) const;
  virtual void print_short_on(outputStream* st) const;
  virtual void verify();

  virtual void set_top(HeapWord* value);
};

#endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_MUTABLENUMASPACE_HPP