001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.hdfs.server.datanode.fsdataset;
019
020 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_DEFAULT;
021 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_KEY;
022 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT;
023 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY;
024
025 import java.io.IOException;
026 import java.util.ArrayList;
027 import java.util.List;
028 import java.util.Random;
029
030 import org.apache.commons.logging.Log;
031 import org.apache.commons.logging.LogFactory;
032 import org.apache.hadoop.conf.Configurable;
033 import org.apache.hadoop.conf.Configuration;
034 import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException;
035
036 /**
037 * A DN volume choosing policy which takes into account the amount of free
038 * space on each of the available volumes when considering where to assign a
039 * new replica allocation. By default this policy prefers assigning replicas to
040 * those volumes with more available free space, so as to over time balance the
041 * available space of all the volumes within a DN.
042 */
043 public class AvailableSpaceVolumeChoosingPolicy<V extends FsVolumeSpi>
044 implements VolumeChoosingPolicy<V>, Configurable {
045
046 private static final Log LOG = LogFactory.getLog(AvailableSpaceVolumeChoosingPolicy.class);
047
048 private final Random random;
049
050 private long balancedSpaceThreshold = DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_DEFAULT;
051 private float balancedPreferencePercent = DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT;
052
053 AvailableSpaceVolumeChoosingPolicy(Random random) {
054 this.random = random;
055 }
056
057 public AvailableSpaceVolumeChoosingPolicy() {
058 this(new Random());
059 }
060
061 @Override
062 public synchronized void setConf(Configuration conf) {
063 balancedSpaceThreshold = conf.getLong(
064 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_KEY,
065 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_DEFAULT);
066 balancedPreferencePercent = conf.getFloat(
067 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY,
068 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT);
069
070 LOG.info("Available space volume choosing policy initialized: " +
071 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_KEY +
072 " = " + balancedSpaceThreshold + ", " +
073 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY +
074 " = " + balancedPreferencePercent);
075
076 if (balancedPreferencePercent > 1.0) {
077 LOG.warn("The value of " + DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY +
078 " is greater than 1.0 but should be in the range 0.0 - 1.0");
079 }
080
081 if (balancedPreferencePercent < 0.5) {
082 LOG.warn("The value of " + DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY +
083 " is less than 0.5 so volumes with less available disk space will receive more block allocations");
084 }
085 }
086
087 @Override
088 public synchronized Configuration getConf() {
089 // Nothing to do. Only added to fulfill the Configurable contract.
090 return null;
091 }
092
093 private final VolumeChoosingPolicy<V> roundRobinPolicyBalanced =
094 new RoundRobinVolumeChoosingPolicy<V>();
095 private final VolumeChoosingPolicy<V> roundRobinPolicyHighAvailable =
096 new RoundRobinVolumeChoosingPolicy<V>();
097 private final VolumeChoosingPolicy<V> roundRobinPolicyLowAvailable =
098 new RoundRobinVolumeChoosingPolicy<V>();
099
100 @Override
101 public synchronized V chooseVolume(List<V> volumes,
102 long replicaSize) throws IOException {
103 if (volumes.size() < 1) {
104 throw new DiskOutOfSpaceException("No more available volumes");
105 }
106
107 AvailableSpaceVolumeList volumesWithSpaces =
108 new AvailableSpaceVolumeList(volumes);
109
110 if (volumesWithSpaces.areAllVolumesWithinFreeSpaceThreshold()) {
111 // If they're actually not too far out of whack, fall back on pure round
112 // robin.
113 V volume = roundRobinPolicyBalanced.chooseVolume(volumes, replicaSize);
114 if (LOG.isDebugEnabled()) {
115 LOG.debug("All volumes are within the configured free space balance " +
116 "threshold. Selecting " + volume + " for write of block size " +
117 replicaSize);
118 }
119 return volume;
120 } else {
121 V volume = null;
122 // If none of the volumes with low free space have enough space for the
123 // replica, always try to choose a volume with a lot of free space.
124 long mostAvailableAmongLowVolumes = volumesWithSpaces
125 .getMostAvailableSpaceAmongVolumesWithLowAvailableSpace();
126
127 List<V> highAvailableVolumes = extractVolumesFromPairs(
128 volumesWithSpaces.getVolumesWithHighAvailableSpace());
129 List<V> lowAvailableVolumes = extractVolumesFromPairs(
130 volumesWithSpaces.getVolumesWithLowAvailableSpace());
131
132 float preferencePercentScaler =
133 (highAvailableVolumes.size() * balancedPreferencePercent) +
134 (lowAvailableVolumes.size() * (1 - balancedPreferencePercent));
135 float scaledPreferencePercent =
136 (highAvailableVolumes.size() * balancedPreferencePercent) /
137 preferencePercentScaler;
138 if (mostAvailableAmongLowVolumes < replicaSize ||
139 random.nextFloat() < scaledPreferencePercent) {
140 volume = roundRobinPolicyHighAvailable.chooseVolume(
141 highAvailableVolumes, replicaSize);
142 if (LOG.isDebugEnabled()) {
143 LOG.debug("Volumes are imbalanced. Selecting " + volume +
144 " from high available space volumes for write of block size "
145 + replicaSize);
146 }
147 } else {
148 volume = roundRobinPolicyLowAvailable.chooseVolume(
149 lowAvailableVolumes, replicaSize);
150 if (LOG.isDebugEnabled()) {
151 LOG.debug("Volumes are imbalanced. Selecting " + volume +
152 " from low available space volumes for write of block size "
153 + replicaSize);
154 }
155 }
156 return volume;
157 }
158 }
159
160 /**
161 * Used to keep track of the list of volumes we're choosing from.
162 */
163 private class AvailableSpaceVolumeList {
164 private final List<AvailableSpaceVolumePair> volumes;
165
166 public AvailableSpaceVolumeList(List<V> volumes) throws IOException {
167 this.volumes = new ArrayList<AvailableSpaceVolumePair>();
168 for (V volume : volumes) {
169 this.volumes.add(new AvailableSpaceVolumePair(volume));
170 }
171 }
172
173 /**
174 * @return true if all volumes' free space is within the
175 * configured threshold, false otherwise.
176 */
177 public boolean areAllVolumesWithinFreeSpaceThreshold() {
178 long leastAvailable = Long.MAX_VALUE;
179 long mostAvailable = 0;
180 for (AvailableSpaceVolumePair volume : volumes) {
181 leastAvailable = Math.min(leastAvailable, volume.getAvailable());
182 mostAvailable = Math.max(mostAvailable, volume.getAvailable());
183 }
184 return (mostAvailable - leastAvailable) < balancedSpaceThreshold;
185 }
186
187 /**
188 * @return the minimum amount of space available on a single volume,
189 * across all volumes.
190 */
191 private long getLeastAvailableSpace() {
192 long leastAvailable = Long.MAX_VALUE;
193 for (AvailableSpaceVolumePair volume : volumes) {
194 leastAvailable = Math.min(leastAvailable, volume.getAvailable());
195 }
196 return leastAvailable;
197 }
198
199 /**
200 * @return the maximum amount of space available across volumes with low space.
201 */
202 public long getMostAvailableSpaceAmongVolumesWithLowAvailableSpace() {
203 long mostAvailable = Long.MIN_VALUE;
204 for (AvailableSpaceVolumePair volume : getVolumesWithLowAvailableSpace()) {
205 mostAvailable = Math.max(mostAvailable, volume.getAvailable());
206 }
207 return mostAvailable;
208 }
209
210 /**
211 * @return the list of volumes with relatively low available space.
212 */
213 public List<AvailableSpaceVolumePair> getVolumesWithLowAvailableSpace() {
214 long leastAvailable = getLeastAvailableSpace();
215 List<AvailableSpaceVolumePair> ret = new ArrayList<AvailableSpaceVolumePair>();
216 for (AvailableSpaceVolumePair volume : volumes) {
217 if (volume.getAvailable() <= leastAvailable + balancedSpaceThreshold) {
218 ret.add(volume);
219 }
220 }
221 return ret;
222 }
223
224 /**
225 * @return the list of volumes with a lot of available space.
226 */
227 public List<AvailableSpaceVolumePair> getVolumesWithHighAvailableSpace() {
228 long leastAvailable = getLeastAvailableSpace();
229 List<AvailableSpaceVolumePair> ret = new ArrayList<AvailableSpaceVolumePair>();
230 for (AvailableSpaceVolumePair volume : volumes) {
231 if (volume.getAvailable() > leastAvailable + balancedSpaceThreshold) {
232 ret.add(volume);
233 }
234 }
235 return ret;
236 }
237
238 }
239
240 /**
241 * Used so that we only check the available space on a given volume once, at
242 * the beginning of {@link AvailableSpaceVolumeChoosingPolicy#chooseVolume(List, long)}.
243 */
244 private class AvailableSpaceVolumePair {
245 private final V volume;
246 private final long availableSpace;
247
248 public AvailableSpaceVolumePair(V volume) throws IOException {
249 this.volume = volume;
250 this.availableSpace = volume.getAvailable();
251 }
252
253 public long getAvailable() {
254 return availableSpace;
255 }
256
257 public V getVolume() {
258 return volume;
259 }
260 }
261
262 private List<V> extractVolumesFromPairs(List<AvailableSpaceVolumePair> volumes) {
263 List<V> ret = new ArrayList<V>();
264 for (AvailableSpaceVolumePair volume : volumes) {
265 ret.add(volume.getVolume());
266 }
267 return ret;
268 }
269
270 }