001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.hdfs.server.namenode;
019
020 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT;
021 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT_DEFAULT;
022 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES;
023 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES_DEFAULT;
024 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES;
025 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES_DEFAULT;
026 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS;
027 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT;
028
029 import java.io.DataInput;
030 import java.io.DataOutputStream;
031 import java.io.IOException;
032 import java.util.ArrayList;
033 import java.util.Collection;
034 import java.util.Collections;
035 import java.util.Date;
036 import java.util.EnumSet;
037 import java.util.Iterator;
038 import java.util.LinkedList;
039 import java.util.List;
040 import java.util.Map.Entry;
041 import java.util.SortedMap;
042 import java.util.TreeMap;
043 import java.util.concurrent.locks.ReentrantLock;
044
045 import org.apache.commons.io.IOUtils;
046 import org.apache.hadoop.classification.InterfaceAudience;
047 import org.apache.hadoop.conf.Configuration;
048 import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries;
049 import org.apache.hadoop.fs.CacheFlag;
050 import org.apache.hadoop.fs.InvalidRequestException;
051 import org.apache.hadoop.fs.Path;
052 import org.apache.hadoop.fs.UnresolvedLinkException;
053 import org.apache.hadoop.fs.permission.FsAction;
054 import org.apache.hadoop.fs.permission.FsPermission;
055 import org.apache.hadoop.hdfs.DFSUtil;
056 import org.apache.hadoop.hdfs.protocol.CacheDirective;
057 import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
058 import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
059 import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo.Expiration;
060 import org.apache.hadoop.hdfs.protocol.CacheDirectiveStats;
061 import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
062 import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
063 import org.apache.hadoop.hdfs.protocol.DatanodeID;
064 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
065 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
066 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
067 import org.apache.hadoop.hdfs.protocolPB.PBHelper;
068 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
069 import org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor;
070 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
071 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList;
072 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList.Type;
073 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection;
074 import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
075 import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
076 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
077 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
078 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
079 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
080 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
081 import org.apache.hadoop.hdfs.util.ReadOnlyList;
082 import org.apache.hadoop.security.AccessControlException;
083 import org.apache.hadoop.util.GSet;
084 import org.apache.hadoop.util.LightWeightGSet;
085 import org.apache.hadoop.util.Time;
086 import org.slf4j.Logger;
087 import org.slf4j.LoggerFactory;
088
089 import com.google.common.annotations.VisibleForTesting;
090 import com.google.common.collect.Lists;
091
092 /**
093 * The Cache Manager handles caching on DataNodes.
094 *
095 * This class is instantiated by the FSNamesystem.
096 * It maintains the mapping of cached blocks to datanodes via processing
097 * datanode cache reports. Based on these reports and addition and removal of
098 * caching directives, we will schedule caching and uncaching work.
099 */
100 @InterfaceAudience.LimitedPrivate({"HDFS"})
101 public final class CacheManager {
102 public static final Logger LOG = LoggerFactory.getLogger(CacheManager.class);
103
104 private static final float MIN_CACHED_BLOCKS_PERCENT = 0.001f;
105
106 // TODO: add pending / underCached / schedule cached blocks stats.
107
108 /**
109 * The FSNamesystem that contains this CacheManager.
110 */
111 private final FSNamesystem namesystem;
112
113 /**
114 * The BlockManager associated with the FSN that owns this CacheManager.
115 */
116 private final BlockManager blockManager;
117
118 /**
119 * Cache directives, sorted by ID.
120 *
121 * listCacheDirectives relies on the ordering of elements in this map
122 * to track what has already been listed by the client.
123 */
124 private final TreeMap<Long, CacheDirective> directivesById =
125 new TreeMap<Long, CacheDirective>();
126
127 /**
128 * The directive ID to use for a new directive. IDs always increase, and are
129 * never reused.
130 */
131 private long nextDirectiveId;
132
133 /**
134 * Cache directives, sorted by path
135 */
136 private final TreeMap<String, List<CacheDirective>> directivesByPath =
137 new TreeMap<String, List<CacheDirective>>();
138
139 /**
140 * Cache pools, sorted by name.
141 */
142 private final TreeMap<String, CachePool> cachePools =
143 new TreeMap<String, CachePool>();
144
145 /**
146 * Maximum number of cache pools to list in one operation.
147 */
148 private final int maxListCachePoolsResponses;
149
150 /**
151 * Maximum number of cache pool directives to list in one operation.
152 */
153 private final int maxListCacheDirectivesNumResponses;
154
155 /**
156 * Interval between scans in milliseconds.
157 */
158 private final long scanIntervalMs;
159
160 /**
161 * All cached blocks.
162 */
163 private final GSet<CachedBlock, CachedBlock> cachedBlocks;
164
165 /**
166 * Lock which protects the CacheReplicationMonitor.
167 */
168 private final ReentrantLock crmLock = new ReentrantLock();
169
170 private final SerializerCompat serializerCompat = new SerializerCompat();
171
172 /**
173 * The CacheReplicationMonitor.
174 */
175 private CacheReplicationMonitor monitor;
176
177 public static final class PersistState {
178 public final CacheManagerSection section;
179 public final List<CachePoolInfoProto> pools;
180 public final List<CacheDirectiveInfoProto> directives;
181
182 public PersistState(CacheManagerSection section,
183 List<CachePoolInfoProto> pools, List<CacheDirectiveInfoProto> directives) {
184 this.section = section;
185 this.pools = pools;
186 this.directives = directives;
187 }
188 }
189
190 CacheManager(FSNamesystem namesystem, Configuration conf,
191 BlockManager blockManager) {
192 this.namesystem = namesystem;
193 this.blockManager = blockManager;
194 this.nextDirectiveId = 1;
195 this.maxListCachePoolsResponses = conf.getInt(
196 DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES,
197 DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES_DEFAULT);
198 this.maxListCacheDirectivesNumResponses = conf.getInt(
199 DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES,
200 DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES_DEFAULT);
201 scanIntervalMs = conf.getLong(
202 DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS,
203 DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT);
204 float cachedBlocksPercent = conf.getFloat(
205 DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT,
206 DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT_DEFAULT);
207 if (cachedBlocksPercent < MIN_CACHED_BLOCKS_PERCENT) {
208 LOG.info("Using minimum value {} for {}", MIN_CACHED_BLOCKS_PERCENT,
209 DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT);
210 cachedBlocksPercent = MIN_CACHED_BLOCKS_PERCENT;
211 }
212 this.cachedBlocks = new LightWeightGSet<CachedBlock, CachedBlock>(
213 LightWeightGSet.computeCapacity(cachedBlocksPercent,
214 "cachedBlocks"));
215
216 }
217
218 /**
219 * Resets all tracked directives and pools. Called during 2NN checkpointing to
220 * reset FSNamesystem state. See {@link FSNamesystem#clear()}.
221 */
222 void clear() {
223 directivesById.clear();
224 directivesByPath.clear();
225 cachePools.clear();
226 nextDirectiveId = 1;
227 }
228
229 public void startMonitorThread() {
230 crmLock.lock();
231 try {
232 if (this.monitor == null) {
233 this.monitor = new CacheReplicationMonitor(namesystem, this,
234 scanIntervalMs, crmLock);
235 this.monitor.start();
236 }
237 } finally {
238 crmLock.unlock();
239 }
240 }
241
242 public void stopMonitorThread() {
243 crmLock.lock();
244 try {
245 if (this.monitor != null) {
246 CacheReplicationMonitor prevMonitor = this.monitor;
247 this.monitor = null;
248 IOUtils.closeQuietly(prevMonitor);
249 }
250 } finally {
251 crmLock.unlock();
252 }
253 }
254
255 public void clearDirectiveStats() {
256 assert namesystem.hasWriteLock();
257 for (CacheDirective directive : directivesById.values()) {
258 directive.resetStatistics();
259 }
260 }
261
262 /**
263 * @return Unmodifiable view of the collection of CachePools.
264 */
265 public Collection<CachePool> getCachePools() {
266 assert namesystem.hasReadLock();
267 return Collections.unmodifiableCollection(cachePools.values());
268 }
269
270 /**
271 * @return Unmodifiable view of the collection of CacheDirectives.
272 */
273 public Collection<CacheDirective> getCacheDirectives() {
274 assert namesystem.hasReadLock();
275 return Collections.unmodifiableCollection(directivesById.values());
276 }
277
278 @VisibleForTesting
279 public GSet<CachedBlock, CachedBlock> getCachedBlocks() {
280 assert namesystem.hasReadLock();
281 return cachedBlocks;
282 }
283
284 private long getNextDirectiveId() throws IOException {
285 assert namesystem.hasWriteLock();
286 if (nextDirectiveId >= Long.MAX_VALUE - 1) {
287 throw new IOException("No more available IDs.");
288 }
289 return nextDirectiveId++;
290 }
291
292 // Helper getter / validation methods
293
294 private static void checkWritePermission(FSPermissionChecker pc,
295 CachePool pool) throws AccessControlException {
296 if ((pc != null)) {
297 pc.checkPermission(pool, FsAction.WRITE);
298 }
299 }
300
301 private static String validatePoolName(CacheDirectiveInfo directive)
302 throws InvalidRequestException {
303 String pool = directive.getPool();
304 if (pool == null) {
305 throw new InvalidRequestException("No pool specified.");
306 }
307 if (pool.isEmpty()) {
308 throw new InvalidRequestException("Invalid empty pool name.");
309 }
310 return pool;
311 }
312
313 private static String validatePath(CacheDirectiveInfo directive)
314 throws InvalidRequestException {
315 if (directive.getPath() == null) {
316 throw new InvalidRequestException("No path specified.");
317 }
318 String path = directive.getPath().toUri().getPath();
319 if (!DFSUtil.isValidName(path)) {
320 throw new InvalidRequestException("Invalid path '" + path + "'.");
321 }
322 return path;
323 }
324
325 private static short validateReplication(CacheDirectiveInfo directive,
326 short defaultValue) throws InvalidRequestException {
327 short repl = (directive.getReplication() != null)
328 ? directive.getReplication() : defaultValue;
329 if (repl <= 0) {
330 throw new InvalidRequestException("Invalid replication factor " + repl
331 + " <= 0");
332 }
333 return repl;
334 }
335
336 /**
337 * Calculates the absolute expiry time of the directive from the
338 * {@link CacheDirectiveInfo.Expiration}. This converts a relative Expiration
339 * into an absolute time based on the local clock.
340 *
341 * @param info to validate.
342 * @param maxRelativeExpiryTime of the info's pool.
343 * @return the expiration time, or the pool's max absolute expiration if the
344 * info's expiration was not set.
345 * @throws InvalidRequestException if the info's Expiration is invalid.
346 */
347 private static long validateExpiryTime(CacheDirectiveInfo info,
348 long maxRelativeExpiryTime) throws InvalidRequestException {
349 LOG.trace("Validating directive {} pool maxRelativeExpiryTime {}", info,
350 maxRelativeExpiryTime);
351 final long now = new Date().getTime();
352 final long maxAbsoluteExpiryTime = now + maxRelativeExpiryTime;
353 if (info == null || info.getExpiration() == null) {
354 return maxAbsoluteExpiryTime;
355 }
356 Expiration expiry = info.getExpiration();
357 if (expiry.getMillis() < 0l) {
358 throw new InvalidRequestException("Cannot set a negative expiration: "
359 + expiry.getMillis());
360 }
361 long relExpiryTime, absExpiryTime;
362 if (expiry.isRelative()) {
363 relExpiryTime = expiry.getMillis();
364 absExpiryTime = now + relExpiryTime;
365 } else {
366 absExpiryTime = expiry.getMillis();
367 relExpiryTime = absExpiryTime - now;
368 }
369 // Need to cap the expiry so we don't overflow a long when doing math
370 if (relExpiryTime > Expiration.MAX_RELATIVE_EXPIRY_MS) {
371 throw new InvalidRequestException("Expiration "
372 + expiry.toString() + " is too far in the future!");
373 }
374 // Fail if the requested expiry is greater than the max
375 if (relExpiryTime > maxRelativeExpiryTime) {
376 throw new InvalidRequestException("Expiration " + expiry.toString()
377 + " exceeds the max relative expiration time of "
378 + maxRelativeExpiryTime + " ms.");
379 }
380 return absExpiryTime;
381 }
382
383 /**
384 * Throws an exception if the CachePool does not have enough capacity to
385 * cache the given path at the replication factor.
386 *
387 * @param pool CachePool where the path is being cached
388 * @param path Path that is being cached
389 * @param replication Replication factor of the path
390 * @throws InvalidRequestException if the pool does not have enough capacity
391 */
392 private void checkLimit(CachePool pool, String path,
393 short replication) throws InvalidRequestException {
394 CacheDirectiveStats stats = computeNeeded(path, replication);
395 if (pool.getLimit() == CachePoolInfo.LIMIT_UNLIMITED) {
396 return;
397 }
398 if (pool.getBytesNeeded() + (stats.getBytesNeeded() * replication) > pool
399 .getLimit()) {
400 throw new InvalidRequestException("Caching path " + path + " of size "
401 + stats.getBytesNeeded() / replication + " bytes at replication "
402 + replication + " would exceed pool " + pool.getPoolName()
403 + "'s remaining capacity of "
404 + (pool.getLimit() - pool.getBytesNeeded()) + " bytes.");
405 }
406 }
407
408 /**
409 * Computes the needed number of bytes and files for a path.
410 * @return CacheDirectiveStats describing the needed stats for this path
411 */
412 private CacheDirectiveStats computeNeeded(String path, short replication) {
413 FSDirectory fsDir = namesystem.getFSDirectory();
414 INode node;
415 long requestedBytes = 0;
416 long requestedFiles = 0;
417 CacheDirectiveStats.Builder builder = new CacheDirectiveStats.Builder();
418 try {
419 node = fsDir.getINode(path);
420 } catch (UnresolvedLinkException e) {
421 // We don't cache through symlinks
422 return builder.build();
423 }
424 if (node == null) {
425 return builder.build();
426 }
427 if (node.isFile()) {
428 requestedFiles = 1;
429 INodeFile file = node.asFile();
430 requestedBytes = file.computeFileSize();
431 } else if (node.isDirectory()) {
432 INodeDirectory dir = node.asDirectory();
433 ReadOnlyList<INode> children = dir
434 .getChildrenList(Snapshot.CURRENT_STATE_ID);
435 requestedFiles = children.size();
436 for (INode child : children) {
437 if (child.isFile()) {
438 requestedBytes += child.asFile().computeFileSize();
439 }
440 }
441 }
442 return new CacheDirectiveStats.Builder()
443 .setBytesNeeded(requestedBytes)
444 .setFilesCached(requestedFiles)
445 .build();
446 }
447
448 /**
449 * Get a CacheDirective by ID, validating the ID and that the directive
450 * exists.
451 */
452 private CacheDirective getById(long id) throws InvalidRequestException {
453 // Check for invalid IDs.
454 if (id <= 0) {
455 throw new InvalidRequestException("Invalid negative ID.");
456 }
457 // Find the directive.
458 CacheDirective directive = directivesById.get(id);
459 if (directive == null) {
460 throw new InvalidRequestException("No directive with ID " + id
461 + " found.");
462 }
463 return directive;
464 }
465
466 /**
467 * Get a CachePool by name, validating that it exists.
468 */
469 private CachePool getCachePool(String poolName)
470 throws InvalidRequestException {
471 CachePool pool = cachePools.get(poolName);
472 if (pool == null) {
473 throw new InvalidRequestException("Unknown pool " + poolName);
474 }
475 return pool;
476 }
477
478 // RPC handlers
479
480 private void addInternal(CacheDirective directive, CachePool pool) {
481 boolean addedDirective = pool.getDirectiveList().add(directive);
482 assert addedDirective;
483 directivesById.put(directive.getId(), directive);
484 String path = directive.getPath();
485 List<CacheDirective> directives = directivesByPath.get(path);
486 if (directives == null) {
487 directives = new ArrayList<CacheDirective>(1);
488 directivesByPath.put(path, directives);
489 }
490 directives.add(directive);
491 // Fix up pool stats
492 CacheDirectiveStats stats =
493 computeNeeded(directive.getPath(), directive.getReplication());
494 directive.addBytesNeeded(stats.getBytesNeeded());
495 directive.addFilesNeeded(directive.getFilesNeeded());
496
497 setNeedsRescan();
498 }
499
500 /**
501 * Adds a directive, skipping most error checking. This should only be called
502 * internally in special scenarios like edit log replay.
503 */
504 CacheDirectiveInfo addDirectiveFromEditLog(CacheDirectiveInfo directive)
505 throws InvalidRequestException {
506 long id = directive.getId();
507 CacheDirective entry = new CacheDirective(directive);
508 CachePool pool = cachePools.get(directive.getPool());
509 addInternal(entry, pool);
510 if (nextDirectiveId <= id) {
511 nextDirectiveId = id + 1;
512 }
513 return entry.toInfo();
514 }
515
516 public CacheDirectiveInfo addDirective(
517 CacheDirectiveInfo info, FSPermissionChecker pc, EnumSet<CacheFlag> flags)
518 throws IOException {
519 assert namesystem.hasWriteLock();
520 CacheDirective directive;
521 try {
522 CachePool pool = getCachePool(validatePoolName(info));
523 checkWritePermission(pc, pool);
524 String path = validatePath(info);
525 short replication = validateReplication(info, (short)1);
526 long expiryTime = validateExpiryTime(info, pool.getMaxRelativeExpiryMs());
527 // Do quota validation if required
528 if (!flags.contains(CacheFlag.FORCE)) {
529 checkLimit(pool, path, replication);
530 }
531 // All validation passed
532 // Add a new entry with the next available ID.
533 long id = getNextDirectiveId();
534 directive = new CacheDirective(id, path, replication, expiryTime);
535 addInternal(directive, pool);
536 } catch (IOException e) {
537 LOG.warn("addDirective of " + info + " failed: ", e);
538 throw e;
539 }
540 LOG.info("addDirective of {} successful.", info);
541 return directive.toInfo();
542 }
543
544 /**
545 * Factory method that makes a new CacheDirectiveInfo by applying fields in a
546 * CacheDirectiveInfo to an existing CacheDirective.
547 *
548 * @param info with some or all fields set.
549 * @param defaults directive providing default values for unset fields in
550 * info.
551 *
552 * @return new CacheDirectiveInfo of the info applied to the defaults.
553 */
554 private static CacheDirectiveInfo createFromInfoAndDefaults(
555 CacheDirectiveInfo info, CacheDirective defaults) {
556 // Initialize the builder with the default values
557 CacheDirectiveInfo.Builder builder =
558 new CacheDirectiveInfo.Builder(defaults.toInfo());
559 // Replace default with new value if present
560 if (info.getPath() != null) {
561 builder.setPath(info.getPath());
562 }
563 if (info.getReplication() != null) {
564 builder.setReplication(info.getReplication());
565 }
566 if (info.getPool() != null) {
567 builder.setPool(info.getPool());
568 }
569 if (info.getExpiration() != null) {
570 builder.setExpiration(info.getExpiration());
571 }
572 return builder.build();
573 }
574
575 /**
576 * Modifies a directive, skipping most error checking. This is for careful
577 * internal use only. modifyDirective can be non-deterministic since its error
578 * checking depends on current system time, which poses a problem for edit log
579 * replay.
580 */
581 void modifyDirectiveFromEditLog(CacheDirectiveInfo info)
582 throws InvalidRequestException {
583 // Check for invalid IDs.
584 Long id = info.getId();
585 if (id == null) {
586 throw new InvalidRequestException("Must supply an ID.");
587 }
588 CacheDirective prevEntry = getById(id);
589 CacheDirectiveInfo newInfo = createFromInfoAndDefaults(info, prevEntry);
590 removeInternal(prevEntry);
591 addInternal(new CacheDirective(newInfo), getCachePool(newInfo.getPool()));
592 }
593
594 public void modifyDirective(CacheDirectiveInfo info,
595 FSPermissionChecker pc, EnumSet<CacheFlag> flags) throws IOException {
596 assert namesystem.hasWriteLock();
597 String idString =
598 (info.getId() == null) ?
599 "(null)" : info.getId().toString();
600 try {
601 // Check for invalid IDs.
602 Long id = info.getId();
603 if (id == null) {
604 throw new InvalidRequestException("Must supply an ID.");
605 }
606 CacheDirective prevEntry = getById(id);
607 checkWritePermission(pc, prevEntry.getPool());
608
609 // Fill in defaults
610 CacheDirectiveInfo infoWithDefaults =
611 createFromInfoAndDefaults(info, prevEntry);
612 CacheDirectiveInfo.Builder builder =
613 new CacheDirectiveInfo.Builder(infoWithDefaults);
614
615 // Do validation
616 validatePath(infoWithDefaults);
617 validateReplication(infoWithDefaults, (short)-1);
618 // Need to test the pool being set here to avoid rejecting a modify for a
619 // directive that's already been forced into a pool
620 CachePool srcPool = prevEntry.getPool();
621 CachePool destPool = getCachePool(validatePoolName(infoWithDefaults));
622 if (!srcPool.getPoolName().equals(destPool.getPoolName())) {
623 checkWritePermission(pc, destPool);
624 if (!flags.contains(CacheFlag.FORCE)) {
625 checkLimit(destPool, infoWithDefaults.getPath().toUri().getPath(),
626 infoWithDefaults.getReplication());
627 }
628 }
629 // Verify the expiration against the destination pool
630 validateExpiryTime(infoWithDefaults, destPool.getMaxRelativeExpiryMs());
631
632 // Indicate changes to the CRM
633 setNeedsRescan();
634
635 // Validation passed
636 removeInternal(prevEntry);
637 addInternal(new CacheDirective(builder.build()), destPool);
638 } catch (IOException e) {
639 LOG.warn("modifyDirective of " + idString + " failed: ", e);
640 throw e;
641 }
642 LOG.info("modifyDirective of {} successfully applied {}.", idString, info);
643 }
644
645 private void removeInternal(CacheDirective directive)
646 throws InvalidRequestException {
647 assert namesystem.hasWriteLock();
648 // Remove the corresponding entry in directivesByPath.
649 String path = directive.getPath();
650 List<CacheDirective> directives = directivesByPath.get(path);
651 if (directives == null || !directives.remove(directive)) {
652 throw new InvalidRequestException("Failed to locate entry " +
653 directive.getId() + " by path " + directive.getPath());
654 }
655 if (directives.size() == 0) {
656 directivesByPath.remove(path);
657 }
658 // Fix up the stats from removing the pool
659 final CachePool pool = directive.getPool();
660 directive.addBytesNeeded(-directive.getBytesNeeded());
661 directive.addFilesNeeded(-directive.getFilesNeeded());
662
663 directivesById.remove(directive.getId());
664 pool.getDirectiveList().remove(directive);
665 assert directive.getPool() == null;
666
667 setNeedsRescan();
668 }
669
670 public void removeDirective(long id, FSPermissionChecker pc)
671 throws IOException {
672 assert namesystem.hasWriteLock();
673 try {
674 CacheDirective directive = getById(id);
675 checkWritePermission(pc, directive.getPool());
676 removeInternal(directive);
677 } catch (IOException e) {
678 LOG.warn("removeDirective of " + id + " failed: ", e);
679 throw e;
680 }
681 LOG.info("removeDirective of " + id + " successful.");
682 }
683
684 public BatchedListEntries<CacheDirectiveEntry>
685 listCacheDirectives(long prevId,
686 CacheDirectiveInfo filter,
687 FSPermissionChecker pc) throws IOException {
688 assert namesystem.hasReadLock();
689 final int NUM_PRE_ALLOCATED_ENTRIES = 16;
690 String filterPath = null;
691 if (filter.getPath() != null) {
692 filterPath = validatePath(filter);
693 }
694 if (filter.getReplication() != null) {
695 throw new InvalidRequestException(
696 "Filtering by replication is unsupported.");
697 }
698
699 // Querying for a single ID
700 final Long id = filter.getId();
701 if (id != null) {
702 if (!directivesById.containsKey(id)) {
703 throw new InvalidRequestException("Did not find requested id " + id);
704 }
705 // Since we use a tailMap on directivesById, setting prev to id-1 gets
706 // us the directive with the id (if present)
707 prevId = id - 1;
708 }
709
710 ArrayList<CacheDirectiveEntry> replies =
711 new ArrayList<CacheDirectiveEntry>(NUM_PRE_ALLOCATED_ENTRIES);
712 int numReplies = 0;
713 SortedMap<Long, CacheDirective> tailMap =
714 directivesById.tailMap(prevId + 1);
715 for (Entry<Long, CacheDirective> cur : tailMap.entrySet()) {
716 if (numReplies >= maxListCacheDirectivesNumResponses) {
717 return new BatchedListEntries<CacheDirectiveEntry>(replies, true);
718 }
719 CacheDirective curDirective = cur.getValue();
720 CacheDirectiveInfo info = cur.getValue().toInfo();
721
722 // If the requested ID is present, it should be the first item.
723 // Hitting this case means the ID is not present, or we're on the second
724 // item and should break out.
725 if (id != null &&
726 !(info.getId().equals(id))) {
727 break;
728 }
729 if (filter.getPool() != null &&
730 !info.getPool().equals(filter.getPool())) {
731 continue;
732 }
733 if (filterPath != null &&
734 !info.getPath().toUri().getPath().equals(filterPath)) {
735 continue;
736 }
737 boolean hasPermission = true;
738 if (pc != null) {
739 try {
740 pc.checkPermission(curDirective.getPool(), FsAction.READ);
741 } catch (AccessControlException e) {
742 hasPermission = false;
743 }
744 }
745 if (hasPermission) {
746 replies.add(new CacheDirectiveEntry(info, cur.getValue().toStats()));
747 numReplies++;
748 }
749 }
750 return new BatchedListEntries<CacheDirectiveEntry>(replies, false);
751 }
752
753 /**
754 * Create a cache pool.
755 *
756 * Only the superuser should be able to call this function.
757 *
758 * @param info The info for the cache pool to create.
759 * @return Information about the cache pool we created.
760 */
761 public CachePoolInfo addCachePool(CachePoolInfo info)
762 throws IOException {
763 assert namesystem.hasWriteLock();
764 CachePool pool;
765 try {
766 CachePoolInfo.validate(info);
767 String poolName = info.getPoolName();
768 pool = cachePools.get(poolName);
769 if (pool != null) {
770 throw new InvalidRequestException("Cache pool " + poolName
771 + " already exists.");
772 }
773 pool = CachePool.createFromInfoAndDefaults(info);
774 cachePools.put(pool.getPoolName(), pool);
775 } catch (IOException e) {
776 LOG.info("addCachePool of " + info + " failed: ", e);
777 throw e;
778 }
779 LOG.info("addCachePool of {} successful.", info);
780 return pool.getInfo(true);
781 }
782
783 /**
784 * Modify a cache pool.
785 *
786 * Only the superuser should be able to call this function.
787 *
788 * @param info
789 * The info for the cache pool to modify.
790 */
791 public void modifyCachePool(CachePoolInfo info)
792 throws IOException {
793 assert namesystem.hasWriteLock();
794 StringBuilder bld = new StringBuilder();
795 try {
796 CachePoolInfo.validate(info);
797 String poolName = info.getPoolName();
798 CachePool pool = cachePools.get(poolName);
799 if (pool == null) {
800 throw new InvalidRequestException("Cache pool " + poolName
801 + " does not exist.");
802 }
803 String prefix = "";
804 if (info.getOwnerName() != null) {
805 pool.setOwnerName(info.getOwnerName());
806 bld.append(prefix).
807 append("set owner to ").append(info.getOwnerName());
808 prefix = "; ";
809 }
810 if (info.getGroupName() != null) {
811 pool.setGroupName(info.getGroupName());
812 bld.append(prefix).
813 append("set group to ").append(info.getGroupName());
814 prefix = "; ";
815 }
816 if (info.getMode() != null) {
817 pool.setMode(info.getMode());
818 bld.append(prefix).append("set mode to " + info.getMode());
819 prefix = "; ";
820 }
821 if (info.getLimit() != null) {
822 pool.setLimit(info.getLimit());
823 bld.append(prefix).append("set limit to " + info.getLimit());
824 prefix = "; ";
825 // New limit changes stats, need to set needs refresh
826 setNeedsRescan();
827 }
828 if (info.getMaxRelativeExpiryMs() != null) {
829 final Long maxRelativeExpiry = info.getMaxRelativeExpiryMs();
830 pool.setMaxRelativeExpiryMs(maxRelativeExpiry);
831 bld.append(prefix).append("set maxRelativeExpiry to "
832 + maxRelativeExpiry);
833 prefix = "; ";
834 }
835 if (prefix.isEmpty()) {
836 bld.append("no changes.");
837 }
838 } catch (IOException e) {
839 LOG.info("modifyCachePool of " + info + " failed: ", e);
840 throw e;
841 }
842 LOG.info("modifyCachePool of {} successful; {}", info.getPoolName(),
843 bld.toString());
844 }
845
846 /**
847 * Remove a cache pool.
848 *
849 * Only the superuser should be able to call this function.
850 *
851 * @param poolName
852 * The name for the cache pool to remove.
853 */
854 public void removeCachePool(String poolName)
855 throws IOException {
856 assert namesystem.hasWriteLock();
857 try {
858 CachePoolInfo.validateName(poolName);
859 CachePool pool = cachePools.remove(poolName);
860 if (pool == null) {
861 throw new InvalidRequestException(
862 "Cannot remove non-existent cache pool " + poolName);
863 }
864 // Remove all directives in this pool.
865 Iterator<CacheDirective> iter = pool.getDirectiveList().iterator();
866 while (iter.hasNext()) {
867 CacheDirective directive = iter.next();
868 directivesByPath.remove(directive.getPath());
869 directivesById.remove(directive.getId());
870 iter.remove();
871 }
872 setNeedsRescan();
873 } catch (IOException e) {
874 LOG.info("removeCachePool of " + poolName + " failed: ", e);
875 throw e;
876 }
877 LOG.info("removeCachePool of " + poolName + " successful.");
878 }
879
880 public BatchedListEntries<CachePoolEntry>
881 listCachePools(FSPermissionChecker pc, String prevKey) {
882 assert namesystem.hasReadLock();
883 final int NUM_PRE_ALLOCATED_ENTRIES = 16;
884 ArrayList<CachePoolEntry> results =
885 new ArrayList<CachePoolEntry>(NUM_PRE_ALLOCATED_ENTRIES);
886 SortedMap<String, CachePool> tailMap = cachePools.tailMap(prevKey, false);
887 int numListed = 0;
888 for (Entry<String, CachePool> cur : tailMap.entrySet()) {
889 if (numListed++ >= maxListCachePoolsResponses) {
890 return new BatchedListEntries<CachePoolEntry>(results, true);
891 }
892 results.add(cur.getValue().getEntry(pc));
893 }
894 return new BatchedListEntries<CachePoolEntry>(results, false);
895 }
896
897 public void setCachedLocations(LocatedBlock block) {
898 CachedBlock cachedBlock =
899 new CachedBlock(block.getBlock().getBlockId(),
900 (short)0, false);
901 cachedBlock = cachedBlocks.get(cachedBlock);
902 if (cachedBlock == null) {
903 return;
904 }
905 List<DatanodeDescriptor> datanodes = cachedBlock.getDatanodes(Type.CACHED);
906 for (DatanodeDescriptor datanode : datanodes) {
907 block.addCachedLoc(datanode);
908 }
909 }
910
911 public final void processCacheReport(final DatanodeID datanodeID,
912 final List<Long> blockIds) throws IOException {
913 namesystem.writeLock();
914 final long startTime = Time.monotonicNow();
915 final long endTime;
916 try {
917 final DatanodeDescriptor datanode =
918 blockManager.getDatanodeManager().getDatanode(datanodeID);
919 if (datanode == null || !datanode.isAlive) {
920 throw new IOException(
921 "processCacheReport from dead or unregistered datanode: " +
922 datanode);
923 }
924 processCacheReportImpl(datanode, blockIds);
925 } finally {
926 endTime = Time.monotonicNow();
927 namesystem.writeUnlock();
928 }
929
930 // Log the block report processing stats from Namenode perspective
931 final NameNodeMetrics metrics = NameNode.getNameNodeMetrics();
932 if (metrics != null) {
933 metrics.addCacheBlockReport((int) (endTime - startTime));
934 }
935 LOG.debug("Processed cache report from {}, blocks: {}, " +
936 "processing time: {} msecs", datanodeID, blockIds.size(),
937 (endTime - startTime));
938 }
939
940 private void processCacheReportImpl(final DatanodeDescriptor datanode,
941 final List<Long> blockIds) {
942 CachedBlocksList cached = datanode.getCached();
943 cached.clear();
944 CachedBlocksList cachedList = datanode.getCached();
945 CachedBlocksList pendingCachedList = datanode.getPendingCached();
946 for (Iterator<Long> iter = blockIds.iterator(); iter.hasNext(); ) {
947 long blockId = iter.next();
948 LOG.trace("Cache report from datanode {} has block {}", datanode,
949 blockId);
950 CachedBlock cachedBlock =
951 new CachedBlock(blockId, (short)0, false);
952 CachedBlock prevCachedBlock = cachedBlocks.get(cachedBlock);
953 // Add the block ID from the cache report to the cachedBlocks map
954 // if it's not already there.
955 if (prevCachedBlock != null) {
956 cachedBlock = prevCachedBlock;
957 } else {
958 cachedBlocks.put(cachedBlock);
959 LOG.trace("Added block {} to cachedBlocks", cachedBlock);
960 }
961 // Add the block to the datanode's implicit cached block list
962 // if it's not already there. Similarly, remove it from the pending
963 // cached block list if it exists there.
964 if (!cachedBlock.isPresent(cachedList)) {
965 cachedList.add(cachedBlock);
966 LOG.trace("Added block {} to CACHED list.", cachedBlock);
967 }
968 if (cachedBlock.isPresent(pendingCachedList)) {
969 pendingCachedList.remove(cachedBlock);
970 LOG.trace("Removed block {} from PENDING_CACHED list.", cachedBlock);
971 }
972 }
973 }
974
975 /**
976 * Saves the current state of the CacheManager to the DataOutput. Used
977 * to persist CacheManager state in the FSImage.
978 * @param out DataOutput to persist state
979 * @param sdPath path of the storage directory
980 * @throws IOException
981 */
982 public void saveStateCompat(DataOutputStream out, String sdPath)
983 throws IOException {
984 serializerCompat.save(out, sdPath);
985 }
986
987 public PersistState saveState() throws IOException {
988 ArrayList<CachePoolInfoProto> pools = Lists
989 .newArrayListWithCapacity(cachePools.size());
990 ArrayList<CacheDirectiveInfoProto> directives = Lists
991 .newArrayListWithCapacity(directivesById.size());
992
993 for (CachePool pool : cachePools.values()) {
994 CachePoolInfo p = pool.getInfo(true);
995 CachePoolInfoProto.Builder b = CachePoolInfoProto.newBuilder()
996 .setPoolName(p.getPoolName());
997
998 if (p.getOwnerName() != null)
999 b.setOwnerName(p.getOwnerName());
1000
1001 if (p.getGroupName() != null)
1002 b.setGroupName(p.getGroupName());
1003
1004 if (p.getMode() != null)
1005 b.setMode(p.getMode().toShort());
1006
1007 if (p.getLimit() != null)
1008 b.setLimit(p.getLimit());
1009
1010 pools.add(b.build());
1011 }
1012
1013 for (CacheDirective directive : directivesById.values()) {
1014 CacheDirectiveInfo info = directive.toInfo();
1015 CacheDirectiveInfoProto.Builder b = CacheDirectiveInfoProto.newBuilder()
1016 .setId(info.getId());
1017
1018 if (info.getPath() != null) {
1019 b.setPath(info.getPath().toUri().getPath());
1020 }
1021
1022 if (info.getReplication() != null) {
1023 b.setReplication(info.getReplication());
1024 }
1025
1026 if (info.getPool() != null) {
1027 b.setPool(info.getPool());
1028 }
1029
1030 Expiration expiry = info.getExpiration();
1031 if (expiry != null) {
1032 assert (!expiry.isRelative());
1033 b.setExpiration(PBHelper.convert(expiry));
1034 }
1035
1036 directives.add(b.build());
1037 }
1038 CacheManagerSection s = CacheManagerSection.newBuilder()
1039 .setNextDirectiveId(nextDirectiveId).setNumPools(pools.size())
1040 .setNumDirectives(directives.size()).build();
1041
1042 return new PersistState(s, pools, directives);
1043 }
1044
1045 /**
1046 * Reloads CacheManager state from the passed DataInput. Used during namenode
1047 * startup to restore CacheManager state from an FSImage.
1048 * @param in DataInput from which to restore state
1049 * @throws IOException
1050 */
1051 public void loadStateCompat(DataInput in) throws IOException {
1052 serializerCompat.load(in);
1053 }
1054
1055 public void loadState(PersistState s) throws IOException {
1056 nextDirectiveId = s.section.getNextDirectiveId();
1057 for (CachePoolInfoProto p : s.pools) {
1058 CachePoolInfo info = new CachePoolInfo(p.getPoolName());
1059 if (p.hasOwnerName())
1060 info.setOwnerName(p.getOwnerName());
1061
1062 if (p.hasGroupName())
1063 info.setGroupName(p.getGroupName());
1064
1065 if (p.hasMode())
1066 info.setMode(new FsPermission((short) p.getMode()));
1067
1068 if (p.hasLimit())
1069 info.setLimit(p.getLimit());
1070
1071 addCachePool(info);
1072 }
1073
1074 for (CacheDirectiveInfoProto p : s.directives) {
1075 // Get pool reference by looking it up in the map
1076 final String poolName = p.getPool();
1077 CacheDirective directive = new CacheDirective(p.getId(), new Path(
1078 p.getPath()).toUri().getPath(), (short) p.getReplication(), p
1079 .getExpiration().getMillis());
1080 addCacheDirective(poolName, directive);
1081 }
1082 }
1083
1084 private void addCacheDirective(final String poolName,
1085 final CacheDirective directive) throws IOException {
1086 CachePool pool = cachePools.get(poolName);
1087 if (pool == null) {
1088 throw new IOException("Directive refers to pool " + poolName
1089 + ", which does not exist.");
1090 }
1091 boolean addedDirective = pool.getDirectiveList().add(directive);
1092 assert addedDirective;
1093 if (directivesById.put(directive.getId(), directive) != null) {
1094 throw new IOException("A directive with ID " + directive.getId()
1095 + " already exists");
1096 }
1097 List<CacheDirective> directives = directivesByPath.get(directive.getPath());
1098 if (directives == null) {
1099 directives = new LinkedList<CacheDirective>();
1100 directivesByPath.put(directive.getPath(), directives);
1101 }
1102 directives.add(directive);
1103 }
1104
1105 private final class SerializerCompat {
1106 private void save(DataOutputStream out, String sdPath) throws IOException {
1107 out.writeLong(nextDirectiveId);
1108 savePools(out, sdPath);
1109 saveDirectives(out, sdPath);
1110 }
1111
1112 private void load(DataInput in) throws IOException {
1113 nextDirectiveId = in.readLong();
1114 // pools need to be loaded first since directives point to their parent pool
1115 loadPools(in);
1116 loadDirectives(in);
1117 }
1118
1119 /**
1120 * Save cache pools to fsimage
1121 */
1122 private void savePools(DataOutputStream out,
1123 String sdPath) throws IOException {
1124 StartupProgress prog = NameNode.getStartupProgress();
1125 Step step = new Step(StepType.CACHE_POOLS, sdPath);
1126 prog.beginStep(Phase.SAVING_CHECKPOINT, step);
1127 prog.setTotal(Phase.SAVING_CHECKPOINT, step, cachePools.size());
1128 Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step);
1129 out.writeInt(cachePools.size());
1130 for (CachePool pool: cachePools.values()) {
1131 FSImageSerialization.writeCachePoolInfo(out, pool.getInfo(true));
1132 counter.increment();
1133 }
1134 prog.endStep(Phase.SAVING_CHECKPOINT, step);
1135 }
1136
1137 /*
1138 * Save cache entries to fsimage
1139 */
1140 private void saveDirectives(DataOutputStream out, String sdPath)
1141 throws IOException {
1142 StartupProgress prog = NameNode.getStartupProgress();
1143 Step step = new Step(StepType.CACHE_ENTRIES, sdPath);
1144 prog.beginStep(Phase.SAVING_CHECKPOINT, step);
1145 prog.setTotal(Phase.SAVING_CHECKPOINT, step, directivesById.size());
1146 Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step);
1147 out.writeInt(directivesById.size());
1148 for (CacheDirective directive : directivesById.values()) {
1149 FSImageSerialization.writeCacheDirectiveInfo(out, directive.toInfo());
1150 counter.increment();
1151 }
1152 prog.endStep(Phase.SAVING_CHECKPOINT, step);
1153 }
1154
1155 /**
1156 * Load cache pools from fsimage
1157 */
1158 private void loadPools(DataInput in)
1159 throws IOException {
1160 StartupProgress prog = NameNode.getStartupProgress();
1161 Step step = new Step(StepType.CACHE_POOLS);
1162 prog.beginStep(Phase.LOADING_FSIMAGE, step);
1163 int numberOfPools = in.readInt();
1164 prog.setTotal(Phase.LOADING_FSIMAGE, step, numberOfPools);
1165 Counter counter = prog.getCounter(Phase.LOADING_FSIMAGE, step);
1166 for (int i = 0; i < numberOfPools; i++) {
1167 addCachePool(FSImageSerialization.readCachePoolInfo(in));
1168 counter.increment();
1169 }
1170 prog.endStep(Phase.LOADING_FSIMAGE, step);
1171 }
1172
1173 /**
1174 * Load cache directives from the fsimage
1175 */
1176 private void loadDirectives(DataInput in) throws IOException {
1177 StartupProgress prog = NameNode.getStartupProgress();
1178 Step step = new Step(StepType.CACHE_ENTRIES);
1179 prog.beginStep(Phase.LOADING_FSIMAGE, step);
1180 int numDirectives = in.readInt();
1181 prog.setTotal(Phase.LOADING_FSIMAGE, step, numDirectives);
1182 Counter counter = prog.getCounter(Phase.LOADING_FSIMAGE, step);
1183 for (int i = 0; i < numDirectives; i++) {
1184 CacheDirectiveInfo info = FSImageSerialization.readCacheDirectiveInfo(in);
1185 // Get pool reference by looking it up in the map
1186 final String poolName = info.getPool();
1187 CacheDirective directive =
1188 new CacheDirective(info.getId(), info.getPath().toUri().getPath(),
1189 info.getReplication(), info.getExpiration().getAbsoluteMillis());
1190 addCacheDirective(poolName, directive);
1191 counter.increment();
1192 }
1193 prog.endStep(Phase.LOADING_FSIMAGE, step);
1194 }
1195 }
1196
1197 public void waitForRescanIfNeeded() {
1198 crmLock.lock();
1199 try {
1200 if (monitor != null) {
1201 monitor.waitForRescanIfNeeded();
1202 }
1203 } finally {
1204 crmLock.unlock();
1205 }
1206 }
1207
1208 private void setNeedsRescan() {
1209 crmLock.lock();
1210 try {
1211 if (monitor != null) {
1212 monitor.setNeedsRescan();
1213 }
1214 } finally {
1215 crmLock.unlock();
1216 }
1217 }
1218
1219 @VisibleForTesting
1220 public Thread getCacheReplicationMonitor() {
1221 crmLock.lock();
1222 try {
1223 return monitor;
1224 } finally {
1225 crmLock.unlock();
1226 }
1227 }
1228 }