001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs.server.namenode;
019    
020    import static org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.EncryptedKeyVersion;
021    import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT;
022    import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;
023    import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT;
024    import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY;
025    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT;
026    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_KEY;
027    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_DEFAULT;
028    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY;
029    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CHECKSUM_TYPE_DEFAULT;
030    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY;
031    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT;
032    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_KEY;
033    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_DEFAULT;
034    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_KEY;
035    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_DEFAULT;
036    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_KEY;
037    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT;
038    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY;
039    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY;
040    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT;
041    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY;
042    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT;
043    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY;
044    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT;
045    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY;
046    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME;
047    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT;
048    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY;
049    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT;
050    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY;
051    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT;
052    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY;
053    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT;
054    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY;
055    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY;
056    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY;
057    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS;
058    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT;
059    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD;
060    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT;
061    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT;
062    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY;
063    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC;
064    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC_DEFAULT;
065    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_DEFAULT;
066    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_KEY;
067    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY;
068    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_DEFAULT;
069    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY;
070    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY;
071    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT;
072    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY;
073    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT;
074    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY;
075    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT;
076    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY;
077    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY;
078    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT;
079    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY;
080    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT;
081    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY;
082    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY;
083    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_DEFAULT;
084    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY;
085    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT;
086    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY;
087    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT;
088    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY;
089    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_KEY;
090    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_DEFAULT;
091    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_DEFAULT;
092    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_KEY;
093    import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.SECURITY_XATTR_UNREADABLE_BY_SUPERUSER;
094    import static org.apache.hadoop.util.Time.now;
095    
096    import java.io.BufferedWriter;
097    import java.io.ByteArrayInputStream;
098    import java.io.DataInput;
099    import java.io.DataInputStream;
100    import java.io.DataOutputStream;
101    import java.io.File;
102    import java.io.FileNotFoundException;
103    import java.io.FileOutputStream;
104    import java.io.IOException;
105    import java.io.OutputStreamWriter;
106    import java.io.PrintWriter;
107    import java.io.StringWriter;
108    import java.lang.management.ManagementFactory;
109    import java.net.InetAddress;
110    import java.net.URI;
111    import java.security.GeneralSecurityException;
112    import java.security.NoSuchAlgorithmException;
113    import java.util.ArrayList;
114    import java.util.Arrays;
115    import java.util.Collection;
116    import java.util.Collections;
117    import java.util.Date;
118    import java.util.EnumSet;
119    import java.util.HashMap;
120    import java.util.HashSet;
121    import java.util.Iterator;
122    import java.util.LinkedHashSet;
123    import java.util.List;
124    import java.util.Map;
125    import java.util.Set;
126    import java.util.UUID;
127    import java.util.concurrent.TimeUnit;
128    import java.util.concurrent.locks.Condition;
129    import java.util.concurrent.locks.ReentrantLock;
130    import java.util.concurrent.locks.ReentrantReadWriteLock;
131    
132    import javax.management.NotCompliantMBeanException;
133    import javax.management.ObjectName;
134    import javax.management.StandardMBean;
135    
136    import org.apache.commons.logging.Log;
137    import org.apache.commons.logging.LogFactory;
138    import org.apache.commons.logging.impl.Log4JLogger;
139    import org.apache.hadoop.HadoopIllegalArgumentException;
140    import org.apache.hadoop.classification.InterfaceAudience;
141    import org.apache.hadoop.conf.Configuration;
142    import org.apache.hadoop.crypto.CipherSuite;
143    import org.apache.hadoop.crypto.CryptoProtocolVersion;
144    import org.apache.hadoop.crypto.key.KeyProvider;
145    import org.apache.hadoop.crypto.CryptoCodec;
146    import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension;
147    import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries;
148    import org.apache.hadoop.fs.CacheFlag;
149    import org.apache.hadoop.fs.ContentSummary;
150    import org.apache.hadoop.fs.CreateFlag;
151    import org.apache.hadoop.fs.DirectoryListingStartAfterNotFoundException;
152    import org.apache.hadoop.fs.FileAlreadyExistsException;
153    import org.apache.hadoop.fs.FileEncryptionInfo;
154    import org.apache.hadoop.fs.FileStatus;
155    import org.apache.hadoop.fs.FileSystem;
156    import org.apache.hadoop.fs.FsServerDefaults;
157    import org.apache.hadoop.fs.InvalidPathException;
158    import org.apache.hadoop.fs.Options;
159    import org.apache.hadoop.fs.Options.Rename;
160    import org.apache.hadoop.fs.ParentNotDirectoryException;
161    import org.apache.hadoop.fs.Path;
162    import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException;
163    import org.apache.hadoop.fs.UnresolvedLinkException;
164    import org.apache.hadoop.fs.XAttr;
165    import org.apache.hadoop.fs.XAttrSetFlag;
166    import org.apache.hadoop.fs.permission.AclEntry;
167    import org.apache.hadoop.fs.permission.AclStatus;
168    import org.apache.hadoop.fs.permission.FsAction;
169    import org.apache.hadoop.fs.permission.FsPermission;
170    import org.apache.hadoop.fs.permission.PermissionStatus;
171    import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
172    import org.apache.hadoop.ha.ServiceFailedException;
173    import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
174    import org.apache.hadoop.hdfs.DFSConfigKeys;
175    import org.apache.hadoop.hdfs.DFSUtil;
176    import org.apache.hadoop.hdfs.HAUtil;
177    import org.apache.hadoop.hdfs.HdfsConfiguration;
178    import org.apache.hadoop.hdfs.UnknownCryptoProtocolVersionException;
179    import org.apache.hadoop.hdfs.XAttrHelper;
180    import org.apache.hadoop.hdfs.protocol.AclException;
181    import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
182    import org.apache.hadoop.hdfs.protocol.Block;
183    import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
184    import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
185    import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
186    import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
187    import org.apache.hadoop.hdfs.protocol.ClientProtocol;
188    import org.apache.hadoop.hdfs.protocol.DatanodeID;
189    import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
190    import org.apache.hadoop.hdfs.protocol.DirectoryListing;
191    import org.apache.hadoop.hdfs.protocol.EncryptionZone;
192    import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
193    import org.apache.hadoop.hdfs.protocol.HdfsConstants;
194    import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
195    import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
196    import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
197    import org.apache.hadoop.hdfs.protocol.LocatedBlock;
198    import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
199    import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
200    import org.apache.hadoop.hdfs.protocol.RecoveryInProgressException;
201    import org.apache.hadoop.hdfs.protocol.RollingUpgradeException;
202    import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo;
203    import org.apache.hadoop.hdfs.protocol.SnapshotAccessControlException;
204    import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
205    import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
206    import org.apache.hadoop.hdfs.protocol.datatransfer.ReplaceDatanodeOnFailure;
207    import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
208    import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager.AccessMode;
209    import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
210    import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
211    import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager.SecretManagerState;
212    import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection;
213    import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
214    import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
215    import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
216    import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
217    import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
218    import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStatistics;
219    import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
220    import org.apache.hadoop.hdfs.server.blockmanagement.OutOfV1GenerationStampsException;
221    import org.apache.hadoop.hdfs.server.common.GenerationStamp;
222    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
223    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
224    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption;
225    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
226    import org.apache.hadoop.hdfs.server.common.Storage;
227    import org.apache.hadoop.hdfs.server.common.Storage.StorageDirType;
228    import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
229    import org.apache.hadoop.hdfs.server.common.Util;
230    import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection;
231    import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
232    import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream;
233    import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
234    import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
235    import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
236    import org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer;
237    import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
238    import org.apache.hadoop.hdfs.server.namenode.ha.StandbyCheckpointer;
239    import org.apache.hadoop.hdfs.server.namenode.metrics.FSNamesystemMBean;
240    import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
241    import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
242    import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager;
243    import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
244    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
245    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
246    import org.apache.hadoop.hdfs.server.namenode.startupprogress.Status;
247    import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
248    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
249    import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods;
250    import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
251    import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
252    import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport;
253    import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
254    import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat;
255    import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand;
256    import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
257    import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
258    import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
259    import org.apache.hadoop.hdfs.server.protocol.StorageReport;
260    import org.apache.hadoop.hdfs.util.ChunkedArrayList;
261    import org.apache.hadoop.io.IOUtils;
262    import org.apache.hadoop.io.Text;
263    import org.apache.hadoop.ipc.RetriableException;
264    import org.apache.hadoop.ipc.RetryCache;
265    import org.apache.hadoop.ipc.RetryCache.CacheEntry;
266    import org.apache.hadoop.ipc.RetryCache.CacheEntryWithPayload;
267    import org.apache.hadoop.ipc.Server;
268    import org.apache.hadoop.ipc.StandbyException;
269    import org.apache.hadoop.metrics2.annotation.Metric;
270    import org.apache.hadoop.metrics2.annotation.Metrics;
271    import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
272    import org.apache.hadoop.metrics2.util.MBeans;
273    import org.apache.hadoop.net.NetworkTopology;
274    import org.apache.hadoop.net.Node;
275    import org.apache.hadoop.net.NodeBase;
276    import org.apache.hadoop.security.AccessControlException;
277    import org.apache.hadoop.security.UserGroupInformation;
278    import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
279    import org.apache.hadoop.security.token.SecretManager.InvalidToken;
280    import org.apache.hadoop.security.token.Token;
281    import org.apache.hadoop.security.token.TokenIdentifier;
282    import org.apache.hadoop.security.token.delegation.DelegationKey;
283    import org.apache.hadoop.util.Daemon;
284    import org.apache.hadoop.util.DataChecksum;
285    import org.apache.hadoop.util.StringUtils;
286    import org.apache.hadoop.util.Time;
287    import org.apache.hadoop.util.VersionInfo;
288    import org.apache.log4j.Appender;
289    import org.apache.log4j.AsyncAppender;
290    import org.apache.log4j.Logger;
291    import org.mortbay.util.ajax.JSON;
292    
293    import com.google.common.annotations.VisibleForTesting;
294    import com.google.common.base.Charsets;
295    import com.google.common.base.Preconditions;
296    import com.google.common.collect.ImmutableMap;
297    import com.google.common.collect.Lists;
298    
299    /***************************************************
300     * FSNamesystem does the actual bookkeeping work for the
301     * DataNode.
302     *
303     * It tracks several important tables.
304     *
305     * 1)  valid fsname --> blocklist  (kept on disk, logged)
306     * 2)  Set of all valid blocks (inverted #1)
307     * 3)  block --> machinelist (kept in memory, rebuilt dynamically from reports)
308     * 4)  machine --> blocklist (inverted #2)
309     * 5)  LRU cache of updated-heartbeat machines
310     ***************************************************/
311    @InterfaceAudience.Private
312    @Metrics(context="dfs")
313    public class FSNamesystem implements Namesystem, FSClusterStats,
314        FSNamesystemMBean, NameNodeMXBean {
315      public static final Log LOG = LogFactory.getLog(FSNamesystem.class);
316    
317      private static final ThreadLocal<StringBuilder> auditBuffer =
318        new ThreadLocal<StringBuilder>() {
319          @Override
320          protected StringBuilder initialValue() {
321            return new StringBuilder();
322          }
323      };
324    
325      @VisibleForTesting
326      public boolean isAuditEnabled() {
327        return !isDefaultAuditLogger || auditLog.isInfoEnabled();
328      }
329    
330      private HdfsFileStatus getAuditFileInfo(String path, boolean resolveSymlink)
331          throws IOException {
332        return (isAuditEnabled() && isExternalInvocation())
333            ? dir.getFileInfo(path, resolveSymlink, false, false) : null;
334      }
335      
336      private void logAuditEvent(boolean succeeded, String cmd, String src)
337          throws IOException {
338        logAuditEvent(succeeded, cmd, src, null, null);
339      }
340      
341      private void logAuditEvent(boolean succeeded, String cmd, String src,
342          String dst, HdfsFileStatus stat) throws IOException {
343        if (isAuditEnabled() && isExternalInvocation()) {
344          logAuditEvent(succeeded, getRemoteUser(), getRemoteIp(),
345                        cmd, src, dst, stat);
346        }
347      }
348    
349      private void logAuditEvent(boolean succeeded,
350          UserGroupInformation ugi, InetAddress addr, String cmd, String src,
351          String dst, HdfsFileStatus stat) {
352        FileStatus status = null;
353        if (stat != null) {
354          Path symlink = stat.isSymlink() ? new Path(stat.getSymlink()) : null;
355          Path path = dst != null ? new Path(dst) : new Path(src);
356          status = new FileStatus(stat.getLen(), stat.isDir(),
357              stat.getReplication(), stat.getBlockSize(), stat.getModificationTime(),
358              stat.getAccessTime(), stat.getPermission(), stat.getOwner(),
359              stat.getGroup(), symlink, path);
360        }
361        for (AuditLogger logger : auditLoggers) {
362          if (logger instanceof HdfsAuditLogger) {
363            HdfsAuditLogger hdfsLogger = (HdfsAuditLogger) logger;
364            hdfsLogger.logAuditEvent(succeeded, ugi.toString(), addr, cmd, src, dst,
365                status, ugi, dtSecretManager);
366          } else {
367            logger.logAuditEvent(succeeded, ugi.toString(), addr,
368                cmd, src, dst, status);
369          }
370        }
371      }
372    
373      /**
374       * Logger for audit events, noting successful FSNamesystem operations. Emits
375       * to FSNamesystem.audit at INFO. Each event causes a set of tab-separated
376       * <code>key=value</code> pairs to be written for the following properties:
377       * <code>
378       * ugi=&lt;ugi in RPC&gt;
379       * ip=&lt;remote IP&gt;
380       * cmd=&lt;command&gt;
381       * src=&lt;src path&gt;
382       * dst=&lt;dst path (optional)&gt;
383       * perm=&lt;permissions (optional)&gt;
384       * </code>
385       */
386      public static final Log auditLog = LogFactory.getLog(
387          FSNamesystem.class.getName() + ".audit");
388    
389      static final int DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED = 100;
390      static int BLOCK_DELETION_INCREMENT = 1000;
391      private final boolean isPermissionEnabled;
392      private final UserGroupInformation fsOwner;
393      private final String fsOwnerShortUserName;
394      private final String supergroup;
395      private final boolean standbyShouldCheckpoint;
396      
397      // Scan interval is not configurable.
398      private static final long DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL =
399        TimeUnit.MILLISECONDS.convert(1, TimeUnit.HOURS);
400      final DelegationTokenSecretManager dtSecretManager;
401      private final boolean alwaysUseDelegationTokensForTests;
402    
403      private static final Step STEP_AWAITING_REPORTED_BLOCKS =
404        new Step(StepType.AWAITING_REPORTED_BLOCKS);
405    
406      // Tracks whether the default audit logger is the only configured audit
407      // logger; this allows isAuditEnabled() to return false in case the
408      // underlying logger is disabled, and avoid some unnecessary work.
409      private final boolean isDefaultAuditLogger;
410      private final List<AuditLogger> auditLoggers;
411    
412      /** The namespace tree. */
413      FSDirectory dir;
414      private final BlockManager blockManager;
415      private final SnapshotManager snapshotManager;
416      private final CacheManager cacheManager;
417      private final DatanodeStatistics datanodeStatistics;
418    
419      // whether setStoragePolicy is allowed.
420      private final boolean isStoragePolicyEnabled;
421    
422      private String nameserviceId;
423    
424      private RollingUpgradeInfo rollingUpgradeInfo = null;
425      /**
426       * A flag that indicates whether the checkpointer should checkpoint a rollback
427       * fsimage. The edit log tailer sets this flag. The checkpoint will create a
428       * rollback fsimage if the flag is true, and then change the flag to false.
429       */
430      private volatile boolean needRollbackFsImage;
431    
432      // Block pool ID used by this namenode
433      private String blockPoolId;
434    
435      final LeaseManager leaseManager = new LeaseManager(this); 
436    
437      volatile Daemon smmthread = null;  // SafeModeMonitor thread
438      
439      Daemon nnrmthread = null; // NamenodeResourceMonitor thread
440    
441      Daemon nnEditLogRoller = null; // NameNodeEditLogRoller thread
442    
443      // A daemon to periodically clean up corrupt lazyPersist files
444      // from the name space.
445      Daemon lazyPersistFileScrubber = null;
446      /**
447       * When an active namenode will roll its own edit log, in # edits
448       */
449      private final long editLogRollerThreshold;
450      /**
451       * Check interval of an active namenode's edit log roller thread 
452       */
453      private final int editLogRollerInterval;
454    
455      /**
456       * How frequently we scan and unlink corrupt lazyPersist files.
457       * (In seconds)
458       */
459      private final int lazyPersistFileScrubIntervalSec;
460    
461      private volatile boolean hasResourcesAvailable = false;
462      private volatile boolean fsRunning = true;
463      
464      /** The start time of the namesystem. */
465      private final long startTime = now();
466    
467      /** The interval of namenode checking for the disk space availability */
468      private final long resourceRecheckInterval;
469    
470      // The actual resource checker instance.
471      NameNodeResourceChecker nnResourceChecker;
472    
473      private final FsServerDefaults serverDefaults;
474      private final boolean supportAppends;
475      private final ReplaceDatanodeOnFailure dtpReplaceDatanodeOnFailure;
476    
477      private volatile SafeModeInfo safeMode;  // safe mode information
478    
479      private final long maxFsObjects;          // maximum number of fs objects
480    
481      private final long minBlockSize;         // minimum block size
482      private final long maxBlocksPerFile;     // maximum # of blocks per file
483    
484      /**
485       * The global generation stamp for legacy blocks with randomly
486       * generated block IDs.
487       */
488      private final GenerationStamp generationStampV1 = new GenerationStamp();
489    
490      /**
491       * The global generation stamp for this file system.
492       */
493      private final GenerationStamp generationStampV2 = new GenerationStamp();
494    
495      /**
496       * The value of the generation stamp when the first switch to sequential
497       * block IDs was made. Blocks with generation stamps below this value
498       * have randomly allocated block IDs. Blocks with generation stamps above
499       * this value had sequentially allocated block IDs. Read from the fsImage
500       * (or initialized as an offset from the V1 (legacy) generation stamp on
501       * upgrade).
502       */
503      private long generationStampV1Limit =
504          GenerationStamp.GRANDFATHER_GENERATION_STAMP;
505    
506      /**
507       * The global block ID space for this file system.
508       */
509      @VisibleForTesting
510      private final SequentialBlockIdGenerator blockIdGenerator;
511    
512      // precision of access times.
513      private final long accessTimePrecision;
514    
515      /** Lock to protect FSNamesystem. */
516      private final FSNamesystemLock fsLock;
517    
518      /**
519       * Used when this NN is in standby state to read from the shared edit log.
520       */
521      private EditLogTailer editLogTailer = null;
522    
523      /**
524       * Used when this NN is in standby state to perform checkpoints.
525       */
526      private StandbyCheckpointer standbyCheckpointer;
527    
528      /**
529       * Reference to the NN's HAContext object. This is only set once
530       * {@link #startCommonServices(Configuration, HAContext)} is called. 
531       */
532      private HAContext haContext;
533    
534      private final boolean haEnabled;
535    
536      /** flag indicating whether replication queues have been initialized */
537      boolean initializedReplQueues = false;
538    
539      /**
540       * Whether the namenode is in the middle of starting the active service
541       */
542      private volatile boolean startingActiveService = false;
543        
544      private INodeId inodeId;
545      
546      private final RetryCache retryCache;
547    
548      private final NNConf nnConf;
549    
550      private KeyProviderCryptoExtension provider = null;
551      private KeyProvider.Options providerOptions = null;
552    
553      private final CryptoCodec codec;
554    
555      private volatile boolean imageLoaded = false;
556      private final Condition cond;
557    
558      private final FSImage fsImage;
559    
560      /**
561       * Notify that loading of this FSDirectory is complete, and
562       * it is imageLoaded for use
563       */
564      void imageLoadComplete() {
565        Preconditions.checkState(!imageLoaded, "FSDirectory already loaded");
566        setImageLoaded();
567      }
568    
569      void setImageLoaded() {
570        if(imageLoaded) return;
571        writeLock();
572        try {
573          setImageLoaded(true);
574          dir.markNameCacheInitialized();
575          cond.signalAll();
576        } finally {
577          writeUnlock();
578        }
579      }
580    
581      //This is for testing purposes only
582      @VisibleForTesting
583      boolean isImageLoaded() {
584        return imageLoaded;
585      }
586    
587      // exposed for unit tests
588      protected void setImageLoaded(boolean flag) {
589        imageLoaded = flag;
590      }
591    
592      /**
593       * Block until the object is imageLoaded to be used.
594       */
595      void waitForLoadingFSImage() {
596        if (!imageLoaded) {
597          writeLock();
598          try {
599            while (!imageLoaded) {
600              try {
601                cond.await(5000, TimeUnit.MILLISECONDS);
602              } catch (InterruptedException ignored) {
603              }
604            }
605          } finally {
606            writeUnlock();
607          }
608        }
609      }
610    
611      /**
612       * Set the last allocated inode id when fsimage or editlog is loaded. 
613       */
614      public void resetLastInodeId(long newValue) throws IOException {
615        try {
616          inodeId.skipTo(newValue);
617        } catch(IllegalStateException ise) {
618          throw new IOException(ise);
619        }
620      }
621    
622      /** Should only be used for tests to reset to any value */
623      void resetLastInodeIdWithoutChecking(long newValue) {
624        inodeId.setCurrentValue(newValue);
625      }
626      
627      /** @return the last inode ID. */
628      public long getLastInodeId() {
629        return inodeId.getCurrentValue();
630      }
631    
632      /** Allocate a new inode ID. */
633      public long allocateNewInodeId() {
634        return inodeId.nextValue();
635      }
636      
637      /**
638       * Clear all loaded data
639       */
640      void clear() {
641        dir.reset();
642        dtSecretManager.reset();
643        generationStampV1.setCurrentValue(GenerationStamp.LAST_RESERVED_STAMP);
644        generationStampV2.setCurrentValue(GenerationStamp.LAST_RESERVED_STAMP);
645        blockIdGenerator.setCurrentValue(
646            SequentialBlockIdGenerator.LAST_RESERVED_BLOCK_ID);
647        generationStampV1Limit = GenerationStamp.GRANDFATHER_GENERATION_STAMP;
648        leaseManager.removeAllLeases();
649        inodeId.setCurrentValue(INodeId.LAST_RESERVED_ID);
650        snapshotManager.clearSnapshottableDirs();
651        cacheManager.clear();
652        setImageLoaded(false);
653      }
654    
655      @VisibleForTesting
656      LeaseManager getLeaseManager() {
657        return leaseManager;
658      }
659      
660      boolean isHaEnabled() {
661        return haEnabled;
662      }
663      
664      /**
665       * Check the supplied configuration for correctness.
666       * @param conf Supplies the configuration to validate.
667       * @throws IOException if the configuration could not be queried.
668       * @throws IllegalArgumentException if the configuration is invalid.
669       */
670      private static void checkConfiguration(Configuration conf)
671          throws IOException {
672    
673        final Collection<URI> namespaceDirs =
674            FSNamesystem.getNamespaceDirs(conf);
675        final Collection<URI> editsDirs =
676            FSNamesystem.getNamespaceEditsDirs(conf);
677        final Collection<URI> requiredEditsDirs =
678            FSNamesystem.getRequiredNamespaceEditsDirs(conf);
679        final Collection<URI> sharedEditsDirs =
680            FSNamesystem.getSharedEditsDirs(conf);
681    
682        for (URI u : requiredEditsDirs) {
683          if (u.toString().compareTo(
684                  DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_DEFAULT) == 0) {
685            continue;
686          }
687    
688          // Each required directory must also be in editsDirs or in
689          // sharedEditsDirs.
690          if (!editsDirs.contains(u) &&
691              !sharedEditsDirs.contains(u)) {
692            throw new IllegalArgumentException(
693                "Required edits directory " + u.toString() + " not present in " +
694                DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY + ". " +
695                DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY + "=" +
696                editsDirs.toString() + "; " +
697                DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY + "=" +
698                requiredEditsDirs.toString() + ". " +
699                DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY + "=" +
700                sharedEditsDirs.toString() + ".");
701          }
702        }
703    
704        if (namespaceDirs.size() == 1) {
705          LOG.warn("Only one image storage directory ("
706              + DFS_NAMENODE_NAME_DIR_KEY + ") configured. Beware of data loss"
707              + " due to lack of redundant storage directories!");
708        }
709        if (editsDirs.size() == 1) {
710          LOG.warn("Only one namespace edits storage directory ("
711              + DFS_NAMENODE_EDITS_DIR_KEY + ") configured. Beware of data loss"
712              + " due to lack of redundant storage directories!");
713        }
714      }
715    
716      /**
717       * Instantiates an FSNamesystem loaded from the image and edits
718       * directories specified in the passed Configuration.
719       *
720       * @param conf the Configuration which specifies the storage directories
721       *             from which to load
722       * @return an FSNamesystem which contains the loaded namespace
723       * @throws IOException if loading fails
724       */
725      static FSNamesystem loadFromDisk(Configuration conf) throws IOException {
726    
727        checkConfiguration(conf);
728        FSImage fsImage = new FSImage(conf,
729            FSNamesystem.getNamespaceDirs(conf),
730            FSNamesystem.getNamespaceEditsDirs(conf));
731        FSNamesystem namesystem = new FSNamesystem(conf, fsImage, false);
732        StartupOption startOpt = NameNode.getStartupOption(conf);
733        if (startOpt == StartupOption.RECOVER) {
734          namesystem.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
735        }
736    
737        long loadStart = now();
738        try {
739          namesystem.loadFSImage(startOpt);
740        } catch (IOException ioe) {
741          LOG.warn("Encountered exception loading fsimage", ioe);
742          fsImage.close();
743          throw ioe;
744        }
745        long timeTakenToLoadFSImage = now() - loadStart;
746        LOG.info("Finished loading FSImage in " + timeTakenToLoadFSImage + " msecs");
747        NameNodeMetrics nnMetrics = NameNode.getNameNodeMetrics();
748        if (nnMetrics != null) {
749          nnMetrics.setFsImageLoadTime((int) timeTakenToLoadFSImage);
750        }
751        return namesystem;
752      }
753      
754      FSNamesystem(Configuration conf, FSImage fsImage) throws IOException {
755        this(conf, fsImage, false);
756      }
757      
758      /**
759       * Create an FSNamesystem associated with the specified image.
760       * 
761       * Note that this does not load any data off of disk -- if you would
762       * like that behavior, use {@link #loadFromDisk(Configuration)}
763       *
764       * @param conf configuration
765       * @param fsImage The FSImage to associate with
766       * @param ignoreRetryCache Whether or not should ignore the retry cache setup
767       *                         step. For Secondary NN this should be set to true.
768       * @throws IOException on bad configuration
769       */
770      FSNamesystem(Configuration conf, FSImage fsImage, boolean ignoreRetryCache)
771          throws IOException {
772        provider = DFSUtil.createKeyProviderCryptoExtension(conf);
773        if (provider == null) {
774          LOG.info("No KeyProvider found.");
775        } else {
776          LOG.info("Found KeyProvider: " + provider.toString());
777        }
778        providerOptions = KeyProvider.options(conf);
779        this.codec = CryptoCodec.getInstance(conf);
780        if (conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY,
781                            DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT)) {
782          LOG.info("Enabling async auditlog");
783          enableAsyncAuditLog();
784        }
785        boolean fair = conf.getBoolean("dfs.namenode.fslock.fair", true);
786        LOG.info("fsLock is fair:" + fair);
787        fsLock = new FSNamesystemLock(fair);
788        cond = fsLock.writeLock().newCondition();
789        this.fsImage = fsImage;
790        try {
791          resourceRecheckInterval = conf.getLong(
792              DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY,
793              DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT);
794    
795          this.blockManager = new BlockManager(this, this, conf);
796          this.datanodeStatistics = blockManager.getDatanodeManager().getDatanodeStatistics();
797          this.blockIdGenerator = new SequentialBlockIdGenerator(this.blockManager);
798    
799          this.isStoragePolicyEnabled =
800              conf.getBoolean(DFS_STORAGE_POLICY_ENABLED_KEY,
801                              DFS_STORAGE_POLICY_ENABLED_DEFAULT);
802    
803          this.fsOwner = UserGroupInformation.getCurrentUser();
804          this.fsOwnerShortUserName = fsOwner.getShortUserName();
805          this.supergroup = conf.get(DFS_PERMISSIONS_SUPERUSERGROUP_KEY, 
806                                     DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT);
807          this.isPermissionEnabled = conf.getBoolean(DFS_PERMISSIONS_ENABLED_KEY,
808                                                     DFS_PERMISSIONS_ENABLED_DEFAULT);
809          LOG.info("fsOwner             = " + fsOwner);
810          LOG.info("supergroup          = " + supergroup);
811          LOG.info("isPermissionEnabled = " + isPermissionEnabled);
812    
813          // block allocation has to be persisted in HA using a shared edits directory
814          // so that the standby has up-to-date namespace information
815          nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
816          this.haEnabled = HAUtil.isHAEnabled(conf, nameserviceId);  
817          
818          // Sanity check the HA-related config.
819          if (nameserviceId != null) {
820            LOG.info("Determined nameservice ID: " + nameserviceId);
821          }
822          LOG.info("HA Enabled: " + haEnabled);
823          if (!haEnabled && HAUtil.usesSharedEditsDir(conf)) {
824            LOG.warn("Configured NNs:\n" + DFSUtil.nnAddressesAsString(conf));
825            throw new IOException("Invalid configuration: a shared edits dir " +
826                "must not be specified if HA is not enabled.");
827          }
828    
829          // Get the checksum type from config
830          String checksumTypeStr = conf.get(DFS_CHECKSUM_TYPE_KEY, DFS_CHECKSUM_TYPE_DEFAULT);
831          DataChecksum.Type checksumType;
832          try {
833             checksumType = DataChecksum.Type.valueOf(checksumTypeStr);
834          } catch (IllegalArgumentException iae) {
835             throw new IOException("Invalid checksum type in "
836                + DFS_CHECKSUM_TYPE_KEY + ": " + checksumTypeStr);
837          }
838    
839          this.serverDefaults = new FsServerDefaults(
840              conf.getLongBytes(DFS_BLOCK_SIZE_KEY, DFS_BLOCK_SIZE_DEFAULT),
841              conf.getInt(DFS_BYTES_PER_CHECKSUM_KEY, DFS_BYTES_PER_CHECKSUM_DEFAULT),
842              conf.getInt(DFS_CLIENT_WRITE_PACKET_SIZE_KEY, DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT),
843              (short) conf.getInt(DFS_REPLICATION_KEY, DFS_REPLICATION_DEFAULT),
844              conf.getInt(IO_FILE_BUFFER_SIZE_KEY, IO_FILE_BUFFER_SIZE_DEFAULT),
845              conf.getBoolean(DFS_ENCRYPT_DATA_TRANSFER_KEY, DFS_ENCRYPT_DATA_TRANSFER_DEFAULT),
846              conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT),
847              checksumType);
848          
849          this.maxFsObjects = conf.getLong(DFS_NAMENODE_MAX_OBJECTS_KEY, 
850                                           DFS_NAMENODE_MAX_OBJECTS_DEFAULT);
851    
852          this.minBlockSize = conf.getLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY,
853              DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_DEFAULT);
854          this.maxBlocksPerFile = conf.getLong(DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_KEY,
855              DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_DEFAULT);
856          this.accessTimePrecision = conf.getLong(DFS_NAMENODE_ACCESSTIME_PRECISION_KEY,
857              DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT);
858          this.supportAppends = conf.getBoolean(DFS_SUPPORT_APPEND_KEY, DFS_SUPPORT_APPEND_DEFAULT);
859          LOG.info("Append Enabled: " + supportAppends);
860    
861          this.dtpReplaceDatanodeOnFailure = ReplaceDatanodeOnFailure.get(conf);
862          
863          this.standbyShouldCheckpoint = conf.getBoolean(
864              DFS_HA_STANDBY_CHECKPOINTS_KEY, DFS_HA_STANDBY_CHECKPOINTS_DEFAULT);
865          // # edit autoroll threshold is a multiple of the checkpoint threshold 
866          this.editLogRollerThreshold = (long)
867              (conf.getFloat(
868                  DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD,
869                  DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT) *
870              conf.getLong(
871                  DFS_NAMENODE_CHECKPOINT_TXNS_KEY,
872                  DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT));
873          this.editLogRollerInterval = conf.getInt(
874              DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS,
875              DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT);
876          this.inodeId = new INodeId();
877          
878          this.lazyPersistFileScrubIntervalSec = conf.getInt(
879              DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC,
880              DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC_DEFAULT);
881    
882          if (this.lazyPersistFileScrubIntervalSec == 0) {
883            throw new IllegalArgumentException(
884                DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC + " must be non-zero.");
885          }
886    
887          // For testing purposes, allow the DT secret manager to be started regardless
888          // of whether security is enabled.
889          alwaysUseDelegationTokensForTests = conf.getBoolean(
890              DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY,
891              DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT);
892          
893          this.dtSecretManager = createDelegationTokenSecretManager(conf);
894          this.dir = new FSDirectory(this, conf);
895          this.snapshotManager = new SnapshotManager(dir);
896          this.cacheManager = new CacheManager(this, conf, blockManager);
897          this.safeMode = new SafeModeInfo(conf);
898          this.auditLoggers = initAuditLoggers(conf);
899          this.isDefaultAuditLogger = auditLoggers.size() == 1 &&
900            auditLoggers.get(0) instanceof DefaultAuditLogger;
901          this.retryCache = ignoreRetryCache ? null : initRetryCache(conf);
902          this.nnConf = new NNConf(conf);
903        } catch(IOException e) {
904          LOG.error(getClass().getSimpleName() + " initialization failed.", e);
905          close();
906          throw e;
907        } catch (RuntimeException re) {
908          LOG.error(getClass().getSimpleName() + " initialization failed.", re);
909          close();
910          throw re;
911        }
912      }
913      
914      @VisibleForTesting
915      public RetryCache getRetryCache() {
916        return retryCache;
917      }
918    
919      void lockRetryCache() {
920        if (retryCache != null) {
921          retryCache.lock();
922        }
923      }
924    
925      void unlockRetryCache() {
926        if (retryCache != null) {
927          retryCache.unlock();
928        }
929      }
930    
931      /** Whether or not retry cache is enabled */
932      boolean hasRetryCache() {
933        return retryCache != null;
934      }
935      
936      void addCacheEntryWithPayload(byte[] clientId, int callId, Object payload) {
937        if (retryCache != null) {
938          retryCache.addCacheEntryWithPayload(clientId, callId, payload);
939        }
940      }
941      
942      void addCacheEntry(byte[] clientId, int callId) {
943        if (retryCache != null) {
944          retryCache.addCacheEntry(clientId, callId);
945        }
946      }
947    
948      @VisibleForTesting
949      public KeyProviderCryptoExtension getProvider() {
950        return provider;
951      }
952    
953      @VisibleForTesting
954      static RetryCache initRetryCache(Configuration conf) {
955        boolean enable = conf.getBoolean(DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY,
956                                         DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT);
957        LOG.info("Retry cache on namenode is " + (enable ? "enabled" : "disabled"));
958        if (enable) {
959          float heapPercent = conf.getFloat(
960              DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY,
961              DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT);
962          long entryExpiryMillis = conf.getLong(
963              DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY,
964              DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT);
965          LOG.info("Retry cache will use " + heapPercent
966              + " of total heap and retry cache entry expiry time is "
967              + entryExpiryMillis + " millis");
968          long entryExpiryNanos = entryExpiryMillis * 1000 * 1000;
969          return new RetryCache("NameNodeRetryCache", heapPercent,
970              entryExpiryNanos);
971        }
972        return null;
973      }
974    
975      private List<AuditLogger> initAuditLoggers(Configuration conf) {
976        // Initialize the custom access loggers if configured.
977        Collection<String> alClasses = conf.getStringCollection(DFS_NAMENODE_AUDIT_LOGGERS_KEY);
978        List<AuditLogger> auditLoggers = Lists.newArrayList();
979        if (alClasses != null && !alClasses.isEmpty()) {
980          for (String className : alClasses) {
981            try {
982              AuditLogger logger;
983              if (DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME.equals(className)) {
984                logger = new DefaultAuditLogger();
985              } else {
986                logger = (AuditLogger) Class.forName(className).newInstance();
987              }
988              logger.initialize(conf);
989              auditLoggers.add(logger);
990            } catch (RuntimeException re) {
991              throw re;
992            } catch (Exception e) {
993              throw new RuntimeException(e);
994            }
995          }
996        }
997    
998        // Make sure there is at least one logger installed.
999        if (auditLoggers.isEmpty()) {
1000          auditLoggers.add(new DefaultAuditLogger());
1001        }
1002        return Collections.unmodifiableList(auditLoggers);
1003      }
1004    
1005      private void loadFSImage(StartupOption startOpt) throws IOException {
1006        final FSImage fsImage = getFSImage();
1007    
1008        // format before starting up if requested
1009        if (startOpt == StartupOption.FORMAT) {
1010          
1011          fsImage.format(this, fsImage.getStorage().determineClusterId());// reuse current id
1012    
1013          startOpt = StartupOption.REGULAR;
1014        }
1015        boolean success = false;
1016        writeLock();
1017        try {
1018          // We shouldn't be calling saveNamespace if we've come up in standby state.
1019          MetaRecoveryContext recovery = startOpt.createRecoveryContext();
1020          final boolean staleImage
1021              = fsImage.recoverTransitionRead(startOpt, this, recovery);
1022          if (RollingUpgradeStartupOption.ROLLBACK.matches(startOpt) ||
1023              RollingUpgradeStartupOption.DOWNGRADE.matches(startOpt)) {
1024            rollingUpgradeInfo = null;
1025          }
1026          final boolean needToSave = staleImage && !haEnabled && !isRollingUpgrade(); 
1027          LOG.info("Need to save fs image? " + needToSave
1028              + " (staleImage=" + staleImage + ", haEnabled=" + haEnabled
1029              + ", isRollingUpgrade=" + isRollingUpgrade() + ")");
1030          if (needToSave) {
1031            fsImage.saveNamespace(this);
1032          } else {
1033            updateStorageVersionForRollingUpgrade(fsImage.getLayoutVersion(),
1034                startOpt);
1035            // No need to save, so mark the phase done.
1036            StartupProgress prog = NameNode.getStartupProgress();
1037            prog.beginPhase(Phase.SAVING_CHECKPOINT);
1038            prog.endPhase(Phase.SAVING_CHECKPOINT);
1039          }
1040          // This will start a new log segment and write to the seen_txid file, so
1041          // we shouldn't do it when coming up in standby state
1042          if (!haEnabled || (haEnabled && startOpt == StartupOption.UPGRADE)
1043              || (haEnabled && startOpt == StartupOption.UPGRADEONLY)) {
1044            fsImage.openEditLogForWrite();
1045          }
1046          success = true;
1047        } finally {
1048          if (!success) {
1049            fsImage.close();
1050          }
1051          writeUnlock();
1052        }
1053        imageLoadComplete();
1054      }
1055    
1056      private void updateStorageVersionForRollingUpgrade(final long layoutVersion,
1057          StartupOption startOpt) throws IOException {
1058        boolean rollingStarted = RollingUpgradeStartupOption.STARTED
1059            .matches(startOpt) && layoutVersion > HdfsConstants
1060            .NAMENODE_LAYOUT_VERSION;
1061        boolean rollingRollback = RollingUpgradeStartupOption.ROLLBACK
1062            .matches(startOpt);
1063        if (rollingRollback || rollingStarted) {
1064          fsImage.updateStorageVersion();
1065        }
1066      }
1067    
1068      private void startSecretManager() {
1069        if (dtSecretManager != null) {
1070          try {
1071            dtSecretManager.startThreads();
1072          } catch (IOException e) {
1073            // Inability to start secret manager
1074            // can't be recovered from.
1075            throw new RuntimeException(e);
1076          }
1077        }
1078      }
1079      
1080      private void startSecretManagerIfNecessary() {
1081        boolean shouldRun = shouldUseDelegationTokens() &&
1082          !isInSafeMode() && getEditLog().isOpenForWrite();
1083        boolean running = dtSecretManager.isRunning();
1084        if (shouldRun && !running) {
1085          startSecretManager();
1086        }
1087      }
1088    
1089      private void stopSecretManager() {
1090        if (dtSecretManager != null) {
1091          dtSecretManager.stopThreads();
1092        }
1093      }
1094      
1095      /** 
1096       * Start services common to both active and standby states
1097       */
1098      void startCommonServices(Configuration conf, HAContext haContext) throws IOException {
1099        this.registerMBean(); // register the MBean for the FSNamesystemState
1100        writeLock();
1101        this.haContext = haContext;
1102        try {
1103          nnResourceChecker = new NameNodeResourceChecker(conf);
1104          checkAvailableResources();
1105          assert safeMode != null && !isPopulatingReplQueues();
1106          StartupProgress prog = NameNode.getStartupProgress();
1107          prog.beginPhase(Phase.SAFEMODE);
1108          prog.setTotal(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS,
1109            getCompleteBlocksTotal());
1110          setBlockTotal();
1111          blockManager.activate(conf);
1112        } finally {
1113          writeUnlock();
1114        }
1115        
1116        registerMXBean();
1117        DefaultMetricsSystem.instance().register(this);
1118        snapshotManager.registerMXBean();
1119      }
1120      
1121      /** 
1122       * Stop services common to both active and standby states
1123       */
1124      void stopCommonServices() {
1125        writeLock();
1126        try {
1127          if (blockManager != null) blockManager.close();
1128        } finally {
1129          writeUnlock();
1130        }
1131        RetryCache.clear(retryCache);
1132      }
1133      
1134      /**
1135       * Start services required in active state
1136       * @throws IOException
1137       */
1138      void startActiveServices() throws IOException {
1139        startingActiveService = true;
1140        LOG.info("Starting services required for active state");
1141        writeLock();
1142        try {
1143          FSEditLog editLog = getFSImage().getEditLog();
1144          
1145          if (!editLog.isOpenForWrite()) {
1146            // During startup, we're already open for write during initialization.
1147            editLog.initJournalsForWrite();
1148            // May need to recover
1149            editLog.recoverUnclosedStreams();
1150            
1151            LOG.info("Catching up to latest edits from old active before " +
1152                "taking over writer role in edits logs");
1153            editLogTailer.catchupDuringFailover();
1154            
1155            blockManager.setPostponeBlocksFromFuture(false);
1156            blockManager.getDatanodeManager().markAllDatanodesStale();
1157            blockManager.clearQueues();
1158            blockManager.processAllPendingDNMessages();
1159    
1160            // Only need to re-process the queue, If not in SafeMode.
1161            if (!isInSafeMode()) {
1162              LOG.info("Reprocessing replication and invalidation queues");
1163              initializeReplQueues();
1164            }
1165    
1166            if (LOG.isDebugEnabled()) {
1167              LOG.debug("NameNode metadata after re-processing " +
1168                  "replication and invalidation queues during failover:\n" +
1169                  metaSaveAsString());
1170            }
1171            
1172            long nextTxId = getFSImage().getLastAppliedTxId() + 1;
1173            LOG.info("Will take over writing edit logs at txnid " + 
1174                nextTxId);
1175            editLog.setNextTxId(nextTxId);
1176    
1177            getFSImage().editLog.openForWrite();
1178          }
1179    
1180          // Enable quota checks.
1181          dir.enableQuotaChecks();
1182          if (haEnabled) {
1183            // Renew all of the leases before becoming active.
1184            // This is because, while we were in standby mode,
1185            // the leases weren't getting renewed on this NN.
1186            // Give them all a fresh start here.
1187            leaseManager.renewAllLeases();
1188          }
1189          leaseManager.startMonitor();
1190          startSecretManagerIfNecessary();
1191    
1192          //ResourceMonitor required only at ActiveNN. See HDFS-2914
1193          this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
1194          nnrmthread.start();
1195    
1196          nnEditLogRoller = new Daemon(new NameNodeEditLogRoller(
1197              editLogRollerThreshold, editLogRollerInterval));
1198          nnEditLogRoller.start();
1199    
1200          if (lazyPersistFileScrubIntervalSec > 0) {
1201            lazyPersistFileScrubber = new Daemon(new LazyPersistFileScrubber(
1202                lazyPersistFileScrubIntervalSec));
1203            lazyPersistFileScrubber.start();
1204          }
1205    
1206          cacheManager.startMonitorThread();
1207          blockManager.getDatanodeManager().setShouldSendCachingCommands(true);
1208        } finally {
1209          startingActiveService = false;
1210          checkSafeMode();
1211          writeUnlock();
1212        }
1213      }
1214    
1215      /**
1216       * Initialize replication queues.
1217       */
1218      private void initializeReplQueues() {
1219        LOG.info("initializing replication queues");
1220        blockManager.processMisReplicatedBlocks();
1221        initializedReplQueues = true;
1222      }
1223    
1224      private boolean inActiveState() {
1225        return haContext != null &&
1226            haContext.getState().getServiceState() == HAServiceState.ACTIVE;
1227      }
1228    
1229      /**
1230       * @return Whether the namenode is transitioning to active state and is in the
1231       *         middle of the {@link #startActiveServices()}
1232       */
1233      public boolean inTransitionToActive() {
1234        return haEnabled && inActiveState() && startingActiveService;
1235      }
1236    
1237      private boolean shouldUseDelegationTokens() {
1238        return UserGroupInformation.isSecurityEnabled() ||
1239          alwaysUseDelegationTokensForTests;
1240      }
1241    
1242      /** 
1243       * Stop services required in active state
1244       */
1245      void stopActiveServices() {
1246        LOG.info("Stopping services started for active state");
1247        writeLock();
1248        try {
1249          stopSecretManager();
1250          leaseManager.stopMonitor();
1251          if (nnrmthread != null) {
1252            ((NameNodeResourceMonitor) nnrmthread.getRunnable()).stopMonitor();
1253            nnrmthread.interrupt();
1254          }
1255          if (nnEditLogRoller != null) {
1256            ((NameNodeEditLogRoller)nnEditLogRoller.getRunnable()).stop();
1257            nnEditLogRoller.interrupt();
1258          }
1259          if (lazyPersistFileScrubber != null) {
1260            ((LazyPersistFileScrubber) lazyPersistFileScrubber.getRunnable()).stop();
1261            lazyPersistFileScrubber.interrupt();
1262          }
1263          if (dir != null && getFSImage() != null) {
1264            if (getFSImage().editLog != null) {
1265              getFSImage().editLog.close();
1266            }
1267            // Update the fsimage with the last txid that we wrote
1268            // so that the tailer starts from the right spot.
1269            getFSImage().updateLastAppliedTxIdFromWritten();
1270          }
1271          if (cacheManager != null) {
1272            cacheManager.stopMonitorThread();
1273            cacheManager.clearDirectiveStats();
1274          }
1275          blockManager.getDatanodeManager().clearPendingCachingCommands();
1276          blockManager.getDatanodeManager().setShouldSendCachingCommands(false);
1277          // Don't want to keep replication queues when not in Active.
1278          blockManager.clearQueues();
1279          initializedReplQueues = false;
1280        } finally {
1281          writeUnlock();
1282        }
1283      }
1284      
1285      /**
1286       * Start services required in standby state 
1287       * 
1288       * @throws IOException
1289       */
1290      void startStandbyServices(final Configuration conf) throws IOException {
1291        LOG.info("Starting services required for standby state");
1292        if (!getFSImage().editLog.isOpenForRead()) {
1293          // During startup, we're already open for read.
1294          getFSImage().editLog.initSharedJournalsForRead();
1295        }
1296        
1297        blockManager.setPostponeBlocksFromFuture(true);
1298    
1299        // Disable quota checks while in standby.
1300        dir.disableQuotaChecks();
1301        editLogTailer = new EditLogTailer(this, conf);
1302        editLogTailer.start();
1303        if (standbyShouldCheckpoint) {
1304          standbyCheckpointer = new StandbyCheckpointer(conf, this);
1305          standbyCheckpointer.start();
1306        }
1307      }
1308    
1309      /**
1310       * Called when the NN is in Standby state and the editlog tailer tails the
1311       * OP_ROLLING_UPGRADE_START.
1312       */
1313      void triggerRollbackCheckpoint() {
1314        setNeedRollbackFsImage(true);
1315        if (standbyCheckpointer != null) {
1316          standbyCheckpointer.triggerRollbackCheckpoint();
1317        }
1318      }
1319    
1320      /**
1321       * Called while the NN is in Standby state, but just about to be
1322       * asked to enter Active state. This cancels any checkpoints
1323       * currently being taken.
1324       */
1325      void prepareToStopStandbyServices() throws ServiceFailedException {
1326        if (standbyCheckpointer != null) {
1327          standbyCheckpointer.cancelAndPreventCheckpoints(
1328              "About to leave standby state");
1329        }
1330      }
1331    
1332      /** Stop services required in standby state */
1333      void stopStandbyServices() throws IOException {
1334        LOG.info("Stopping services started for standby state");
1335        if (standbyCheckpointer != null) {
1336          standbyCheckpointer.stop();
1337        }
1338        if (editLogTailer != null) {
1339          editLogTailer.stop();
1340        }
1341        if (dir != null && getFSImage() != null && getFSImage().editLog != null) {
1342          getFSImage().editLog.close();
1343        }
1344      }
1345      
1346      @Override
1347      public void checkOperation(OperationCategory op) throws StandbyException {
1348        if (haContext != null) {
1349          // null in some unit tests
1350          haContext.checkOperation(op);
1351        }
1352      }
1353      
1354      /**
1355       * @throws RetriableException
1356       *           If 1) The NameNode is in SafeMode, 2) HA is enabled, and 3)
1357       *           NameNode is in active state
1358       * @throws SafeModeException
1359       *           Otherwise if NameNode is in SafeMode.
1360       */
1361      private void checkNameNodeSafeMode(String errorMsg)
1362          throws RetriableException, SafeModeException {
1363        if (isInSafeMode()) {
1364          SafeModeException se = new SafeModeException(errorMsg, safeMode);
1365          if (haEnabled && haContext != null
1366              && haContext.getState().getServiceState() == HAServiceState.ACTIVE
1367              && shouldRetrySafeMode(this.safeMode)) {
1368            throw new RetriableException(se);
1369          } else {
1370            throw se;
1371          }
1372        }
1373      }
1374      
1375      /**
1376       * We already know that the safemode is on. We will throw a RetriableException
1377       * if the safemode is not manual or caused by low resource.
1378       */
1379      private boolean shouldRetrySafeMode(SafeModeInfo safeMode) {
1380        if (safeMode == null) {
1381          return false;
1382        } else {
1383          return !safeMode.isManual() && !safeMode.areResourcesLow();
1384        }
1385      }
1386      
1387      public static Collection<URI> getNamespaceDirs(Configuration conf) {
1388        return getStorageDirs(conf, DFS_NAMENODE_NAME_DIR_KEY);
1389      }
1390    
1391      /**
1392       * Get all edits dirs which are required. If any shared edits dirs are
1393       * configured, these are also included in the set of required dirs.
1394       * 
1395       * @param conf the HDFS configuration.
1396       * @return all required dirs.
1397       */
1398      public static Collection<URI> getRequiredNamespaceEditsDirs(Configuration conf) {
1399        Set<URI> ret = new HashSet<URI>();
1400        ret.addAll(getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY));
1401        ret.addAll(getSharedEditsDirs(conf));
1402        return ret;
1403      }
1404    
1405      private static Collection<URI> getStorageDirs(Configuration conf,
1406                                                    String propertyName) {
1407        Collection<String> dirNames = conf.getTrimmedStringCollection(propertyName);
1408        StartupOption startOpt = NameNode.getStartupOption(conf);
1409        if(startOpt == StartupOption.IMPORT) {
1410          // In case of IMPORT this will get rid of default directories 
1411          // but will retain directories specified in hdfs-site.xml
1412          // When importing image from a checkpoint, the name-node can
1413          // start with empty set of storage directories.
1414          Configuration cE = new HdfsConfiguration(false);
1415          cE.addResource("core-default.xml");
1416          cE.addResource("core-site.xml");
1417          cE.addResource("hdfs-default.xml");
1418          Collection<String> dirNames2 = cE.getTrimmedStringCollection(propertyName);
1419          dirNames.removeAll(dirNames2);
1420          if(dirNames.isEmpty())
1421            LOG.warn("!!! WARNING !!!" +
1422              "\n\tThe NameNode currently runs without persistent storage." +
1423              "\n\tAny changes to the file system meta-data may be lost." +
1424              "\n\tRecommended actions:" +
1425              "\n\t\t- shutdown and restart NameNode with configured \"" 
1426              + propertyName + "\" in hdfs-site.xml;" +
1427              "\n\t\t- use Backup Node as a persistent and up-to-date storage " +
1428              "of the file system meta-data.");
1429        } else if (dirNames.isEmpty()) {
1430          dirNames = Collections.singletonList(
1431              DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_DEFAULT);
1432        }
1433        return Util.stringCollectionAsURIs(dirNames);
1434      }
1435    
1436      /**
1437       * Return an ordered list of edits directories to write to.
1438       * The list is ordered such that all shared edits directories
1439       * are ordered before non-shared directories, and any duplicates
1440       * are removed. The order they are specified in the configuration
1441       * is retained.
1442       * @return Collection of shared edits directories.
1443       * @throws IOException if multiple shared edits directories are configured
1444       */
1445      public static List<URI> getNamespaceEditsDirs(Configuration conf)
1446          throws IOException {
1447        return getNamespaceEditsDirs(conf, true);
1448      }
1449      
1450      public static List<URI> getNamespaceEditsDirs(Configuration conf,
1451          boolean includeShared)
1452          throws IOException {
1453        // Use a LinkedHashSet so that order is maintained while we de-dup
1454        // the entries.
1455        LinkedHashSet<URI> editsDirs = new LinkedHashSet<URI>();
1456        
1457        if (includeShared) {
1458          List<URI> sharedDirs = getSharedEditsDirs(conf);
1459      
1460          // Fail until multiple shared edits directories are supported (HDFS-2782)
1461          if (sharedDirs.size() > 1) {
1462            throw new IOException(
1463                "Multiple shared edits directories are not yet supported");
1464          }
1465      
1466          // First add the shared edits dirs. It's critical that the shared dirs
1467          // are added first, since JournalSet syncs them in the order they are listed,
1468          // and we need to make sure all edits are in place in the shared storage
1469          // before they are replicated locally. See HDFS-2874.
1470          for (URI dir : sharedDirs) {
1471            if (!editsDirs.add(dir)) {
1472              LOG.warn("Edits URI " + dir + " listed multiple times in " + 
1473                  DFS_NAMENODE_SHARED_EDITS_DIR_KEY + ". Ignoring duplicates.");
1474            }
1475          }
1476        }    
1477        // Now add the non-shared dirs.
1478        for (URI dir : getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_KEY)) {
1479          if (!editsDirs.add(dir)) {
1480            LOG.warn("Edits URI " + dir + " listed multiple times in " + 
1481                DFS_NAMENODE_SHARED_EDITS_DIR_KEY + " and " +
1482                DFS_NAMENODE_EDITS_DIR_KEY + ". Ignoring duplicates.");
1483          }
1484        }
1485    
1486        if (editsDirs.isEmpty()) {
1487          // If this is the case, no edit dirs have been explicitly configured.
1488          // Image dirs are to be used for edits too.
1489          return Lists.newArrayList(getNamespaceDirs(conf));
1490        } else {
1491          return Lists.newArrayList(editsDirs);
1492        }
1493      }
1494      
1495      /**
1496       * Returns edit directories that are shared between primary and secondary.
1497       * @param conf configuration
1498       * @return collection of edit directories from {@code conf}
1499       */
1500      public static List<URI> getSharedEditsDirs(Configuration conf) {
1501        // don't use getStorageDirs here, because we want an empty default
1502        // rather than the dir in /tmp
1503        Collection<String> dirNames = conf.getTrimmedStringCollection(
1504            DFS_NAMENODE_SHARED_EDITS_DIR_KEY);
1505        return Util.stringCollectionAsURIs(dirNames);
1506      }
1507    
1508      @Override
1509      public void readLock() {
1510        this.fsLock.readLock().lock();
1511      }
1512      @Override
1513      public void longReadLockInterruptibly() throws InterruptedException {
1514        this.fsLock.longReadLock().lockInterruptibly();
1515        try {
1516          this.fsLock.readLock().lockInterruptibly();
1517        } catch (InterruptedException ie) {
1518          // In the event we're interrupted while getting the normal FSNS read lock,
1519          // release the long read lock.
1520          this.fsLock.longReadLock().unlock();
1521          throw ie;
1522        }
1523      }
1524      @Override
1525      public void longReadUnlock() {
1526        this.fsLock.readLock().unlock();
1527        this.fsLock.longReadLock().unlock();
1528      }
1529      @Override
1530      public void readUnlock() {
1531        this.fsLock.readLock().unlock();
1532      }
1533      @Override
1534      public void writeLock() {
1535        this.fsLock.longReadLock().lock();
1536        this.fsLock.writeLock().lock();
1537      }
1538      @Override
1539      public void writeLockInterruptibly() throws InterruptedException {
1540        this.fsLock.longReadLock().lockInterruptibly();
1541        try {
1542          this.fsLock.writeLock().lockInterruptibly();
1543        } catch (InterruptedException ie) {
1544          // In the event we're interrupted while getting the normal FSNS write
1545          // lock, release the long read lock.
1546          this.fsLock.longReadLock().unlock();
1547          throw ie;
1548        }
1549      }
1550      @Override
1551      public void writeUnlock() {
1552        this.fsLock.writeLock().unlock();
1553        this.fsLock.longReadLock().unlock();
1554      }
1555      @Override
1556      public boolean hasWriteLock() {
1557        return this.fsLock.isWriteLockedByCurrentThread();
1558      }
1559      @Override
1560      public boolean hasReadLock() {
1561        return this.fsLock.getReadHoldCount() > 0 || hasWriteLock();
1562      }
1563    
1564      public int getReadHoldCount() {
1565        return this.fsLock.getReadHoldCount();
1566      }
1567    
1568      public int getWriteHoldCount() {
1569        return this.fsLock.getWriteHoldCount();
1570      }
1571    
1572      NamespaceInfo getNamespaceInfo() {
1573        readLock();
1574        try {
1575          return unprotectedGetNamespaceInfo();
1576        } finally {
1577          readUnlock();
1578        }
1579      }
1580    
1581      /**
1582       * Version of @see #getNamespaceInfo() that is not protected by a lock.
1583       */
1584      NamespaceInfo unprotectedGetNamespaceInfo() {
1585        return new NamespaceInfo(getFSImage().getStorage().getNamespaceID(),
1586            getClusterId(), getBlockPoolId(),
1587            getFSImage().getStorage().getCTime());
1588      }
1589    
1590      /**
1591       * Close down this file system manager.
1592       * Causes heartbeat and lease daemons to stop; waits briefly for
1593       * them to finish, but a short timeout returns control back to caller.
1594       */
1595      void close() {
1596        fsRunning = false;
1597        try {
1598          stopCommonServices();
1599          if (smmthread != null) smmthread.interrupt();
1600        } finally {
1601          // using finally to ensure we also wait for lease daemon
1602          try {
1603            stopActiveServices();
1604            stopStandbyServices();
1605          } catch (IOException ie) {
1606          } finally {
1607            IOUtils.cleanup(LOG, dir);
1608            IOUtils.cleanup(LOG, fsImage);
1609          }
1610        }
1611      }
1612    
1613      @Override
1614      public boolean isRunning() {
1615        return fsRunning;
1616      }
1617      
1618      @Override
1619      public boolean isInStandbyState() {
1620        if (haContext == null || haContext.getState() == null) {
1621          // We're still starting up. In this case, if HA is
1622          // on for the cluster, we always start in standby. Otherwise
1623          // start in active.
1624          return haEnabled;
1625        }
1626    
1627        return HAServiceState.STANDBY == haContext.getState().getServiceState();
1628      }
1629    
1630      /**
1631       * Dump all metadata into specified file
1632       */
1633      void metaSave(String filename) throws IOException {
1634        checkSuperuserPrivilege();
1635        checkOperation(OperationCategory.UNCHECKED);
1636        writeLock();
1637        try {
1638          checkOperation(OperationCategory.UNCHECKED);
1639          File file = new File(System.getProperty("hadoop.log.dir"), filename);
1640          PrintWriter out = new PrintWriter(new BufferedWriter(
1641              new OutputStreamWriter(new FileOutputStream(file), Charsets.UTF_8)));
1642          metaSave(out);
1643          out.flush();
1644          out.close();
1645        } finally {
1646          writeUnlock();
1647        }
1648      }
1649    
1650      private void metaSave(PrintWriter out) {
1651        assert hasWriteLock();
1652        long totalInodes = this.dir.totalInodes();
1653        long totalBlocks = this.getBlocksTotal();
1654        out.println(totalInodes + " files and directories, " + totalBlocks
1655            + " blocks = " + (totalInodes + totalBlocks) + " total");
1656    
1657        blockManager.metaSave(out);
1658      }
1659    
1660      private String metaSaveAsString() {
1661        StringWriter sw = new StringWriter();
1662        PrintWriter pw = new PrintWriter(sw);
1663        metaSave(pw);
1664        pw.flush();
1665        return sw.toString();
1666      }
1667      
1668    
1669      long getDefaultBlockSize() {
1670        return serverDefaults.getBlockSize();
1671      }
1672    
1673      FsServerDefaults getServerDefaults() throws StandbyException {
1674        checkOperation(OperationCategory.READ);
1675        return serverDefaults;
1676      }
1677    
1678      long getAccessTimePrecision() {
1679        return accessTimePrecision;
1680      }
1681    
1682      private boolean isAccessTimeSupported() {
1683        return accessTimePrecision > 0;
1684      }
1685    
1686      /////////////////////////////////////////////////////////
1687      //
1688      // These methods are called by HadoopFS clients
1689      //
1690      /////////////////////////////////////////////////////////
1691      /**
1692       * Set permissions for an existing file.
1693       * @throws IOException
1694       */
1695      void setPermission(String src, FsPermission permission)
1696          throws AccessControlException, FileNotFoundException, SafeModeException,
1697          UnresolvedLinkException, IOException {
1698        try {
1699          setPermissionInt(src, permission);
1700        } catch (AccessControlException e) {
1701          logAuditEvent(false, "setPermission", src);
1702          throw e;
1703        }
1704      }
1705    
1706      private void setPermissionInt(final String srcArg, FsPermission permission)
1707          throws AccessControlException, FileNotFoundException, SafeModeException,
1708          UnresolvedLinkException, IOException {
1709        String src = srcArg;
1710        HdfsFileStatus resultingStat = null;
1711        FSPermissionChecker pc = getPermissionChecker();
1712        checkOperation(OperationCategory.WRITE);
1713        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
1714        writeLock();
1715        try {
1716          checkOperation(OperationCategory.WRITE);
1717          checkNameNodeSafeMode("Cannot set permission for " + src);
1718          src = resolvePath(src, pathComponents);
1719          checkOwner(pc, src);
1720          dir.setPermission(src, permission);
1721          getEditLog().logSetPermissions(src, permission);
1722          resultingStat = getAuditFileInfo(src, false);
1723        } finally {
1724          writeUnlock();
1725        }
1726        getEditLog().logSync();
1727        logAuditEvent(true, "setPermission", srcArg, null, resultingStat);
1728      }
1729    
1730      /**
1731       * Set owner for an existing file.
1732       * @throws IOException
1733       */
1734      void setOwner(String src, String username, String group)
1735          throws AccessControlException, FileNotFoundException, SafeModeException,
1736          UnresolvedLinkException, IOException {
1737        try {
1738          setOwnerInt(src, username, group);
1739        } catch (AccessControlException e) {
1740          logAuditEvent(false, "setOwner", src);
1741          throw e;
1742        } 
1743      }
1744    
1745      private void setOwnerInt(final String srcArg, String username, String group)
1746          throws AccessControlException, FileNotFoundException, SafeModeException,
1747          UnresolvedLinkException, IOException {
1748        String src = srcArg;
1749        HdfsFileStatus resultingStat = null;
1750        FSPermissionChecker pc = getPermissionChecker();
1751        checkOperation(OperationCategory.WRITE);
1752        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
1753        writeLock();
1754        try {
1755          checkOperation(OperationCategory.WRITE);
1756          checkNameNodeSafeMode("Cannot set owner for " + src);
1757          src = resolvePath(src, pathComponents);
1758          checkOwner(pc, src);
1759          if (!pc.isSuperUser()) {
1760            if (username != null && !pc.getUser().equals(username)) {
1761              throw new AccessControlException("Non-super user cannot change owner");
1762            }
1763            if (group != null && !pc.containsGroup(group)) {
1764              throw new AccessControlException("User does not belong to " + group);
1765            }
1766          }
1767          dir.setOwner(src, username, group);
1768          getEditLog().logSetOwner(src, username, group);
1769          resultingStat = getAuditFileInfo(src, false);
1770        } finally {
1771          writeUnlock();
1772        }
1773        getEditLog().logSync();
1774        logAuditEvent(true, "setOwner", srcArg, null, resultingStat);
1775      }
1776    
1777      /**
1778       * Get block locations within the specified range.
1779       * @see ClientProtocol#getBlockLocations(String, long, long)
1780       */
1781      LocatedBlocks getBlockLocations(String clientMachine, String src,
1782          long offset, long length) throws AccessControlException,
1783          FileNotFoundException, UnresolvedLinkException, IOException {
1784        LocatedBlocks blocks = getBlockLocations(src, offset, length, true, true,
1785            true);
1786        if (blocks != null) {
1787          blockManager.getDatanodeManager().sortLocatedBlocks(clientMachine,
1788              blocks.getLocatedBlocks());
1789    
1790          // lastBlock is not part of getLocatedBlocks(), might need to sort it too
1791          LocatedBlock lastBlock = blocks.getLastLocatedBlock();
1792          if (lastBlock != null) {
1793            ArrayList<LocatedBlock> lastBlockList =
1794                Lists.newArrayListWithCapacity(1);
1795            lastBlockList.add(lastBlock);
1796            blockManager.getDatanodeManager().sortLocatedBlocks(clientMachine,
1797                lastBlockList);
1798          }
1799        }
1800        return blocks;
1801      }
1802    
1803      /**
1804       * Get block locations within the specified range.
1805       * @see ClientProtocol#getBlockLocations(String, long, long)
1806       * @throws FileNotFoundException, UnresolvedLinkException, IOException
1807       */
1808      LocatedBlocks getBlockLocations(String src, long offset, long length,
1809          boolean doAccessTime, boolean needBlockToken, boolean checkSafeMode)
1810          throws FileNotFoundException, UnresolvedLinkException, IOException {
1811        try {
1812          return getBlockLocationsInt(src, offset, length, doAccessTime,
1813                                      needBlockToken, checkSafeMode);
1814        } catch (AccessControlException e) {
1815          logAuditEvent(false, "open", src);
1816          throw e;
1817        }
1818      }
1819    
1820      private LocatedBlocks getBlockLocationsInt(String src, long offset,
1821          long length, boolean doAccessTime, boolean needBlockToken,
1822          boolean checkSafeMode)
1823          throws FileNotFoundException, UnresolvedLinkException, IOException {
1824        if (offset < 0) {
1825          throw new HadoopIllegalArgumentException(
1826              "Negative offset is not supported. File: " + src);
1827        }
1828        if (length < 0) {
1829          throw new HadoopIllegalArgumentException(
1830              "Negative length is not supported. File: " + src);
1831        }
1832        final LocatedBlocks ret = getBlockLocationsUpdateTimes(src,
1833            offset, length, doAccessTime, needBlockToken);  
1834        logAuditEvent(true, "open", src);
1835        if (checkSafeMode && isInSafeMode()) {
1836          for (LocatedBlock b : ret.getLocatedBlocks()) {
1837            // if safemode & no block locations yet then throw safemodeException
1838            if ((b.getLocations() == null) || (b.getLocations().length == 0)) {
1839              SafeModeException se = new SafeModeException(
1840                  "Zero blocklocations for " + src, safeMode);
1841              if (haEnabled && haContext != null && 
1842                  haContext.getState().getServiceState() == HAServiceState.ACTIVE) {
1843                throw new RetriableException(se);
1844              } else {
1845                throw se;
1846              }
1847            }
1848          }
1849        }
1850        return ret;
1851      }
1852    
1853      /*
1854       * Get block locations within the specified range, updating the
1855       * access times if necessary. 
1856       */
1857      private LocatedBlocks getBlockLocationsUpdateTimes(final String srcArg,
1858          long offset, long length, boolean doAccessTime, boolean needBlockToken)
1859          throws FileNotFoundException,
1860          UnresolvedLinkException, IOException {
1861        String src = srcArg;
1862        FSPermissionChecker pc = getPermissionChecker();
1863        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
1864        for (int attempt = 0; attempt < 2; attempt++) {
1865          boolean isReadOp = (attempt == 0);
1866          if (isReadOp) { // first attempt is with readlock
1867            checkOperation(OperationCategory.READ);
1868            readLock();
1869          }  else { // second attempt is with  write lock
1870            checkOperation(OperationCategory.WRITE);
1871            writeLock(); // writelock is needed to set accesstime
1872          }
1873          try {
1874            src = resolvePath(src, pathComponents);
1875            if (isReadOp) {
1876              checkOperation(OperationCategory.READ);
1877            } else {
1878              checkOperation(OperationCategory.WRITE);
1879            }
1880            if (isPermissionEnabled) {
1881              checkPathAccess(pc, src, FsAction.READ);
1882            }
1883    
1884            // if the namenode is in safemode, then do not update access time
1885            if (isInSafeMode()) {
1886              doAccessTime = false;
1887            }
1888    
1889            final INodesInPath iip = dir.getINodesInPath(src, true);
1890            final INode[] inodes = iip.getINodes();
1891            final INodeFile inode = INodeFile.valueOf(
1892                inodes[inodes.length - 1], src);
1893            if (isPermissionEnabled) {
1894              checkUnreadableBySuperuser(pc, inode, iip.getPathSnapshotId());
1895            }
1896            if (!iip.isSnapshot() //snapshots are readonly, so don't update atime.
1897                && doAccessTime && isAccessTimeSupported()) {
1898              final long now = now();
1899              if (now > inode.getAccessTime() + getAccessTimePrecision()) {
1900                // if we have to set access time but we only have the readlock, then
1901                // restart this entire operation with the writeLock.
1902                if (isReadOp) {
1903                  continue;
1904                }
1905                boolean changed = dir.setTimes(inode, -1, now, false,
1906                        iip.getLatestSnapshotId());
1907                if (changed) {
1908                  getEditLog().logTimes(src, -1, now);
1909                }
1910              }
1911            }
1912            final long fileSize = iip.isSnapshot() ?
1913                inode.computeFileSize(iip.getPathSnapshotId())
1914                : inode.computeFileSizeNotIncludingLastUcBlock();
1915            boolean isUc = inode.isUnderConstruction();
1916            if (iip.isSnapshot()) {
1917              // if src indicates a snapshot file, we need to make sure the returned
1918              // blocks do not exceed the size of the snapshot file.
1919              length = Math.min(length, fileSize - offset);
1920              isUc = false;
1921            }
1922    
1923            final FileEncryptionInfo feInfo =
1924              FSDirectory.isReservedRawName(srcArg) ?
1925              null : dir.getFileEncryptionInfo(inode, iip.getPathSnapshotId(),
1926                  iip);
1927    
1928            final LocatedBlocks blocks =
1929              blockManager.createLocatedBlocks(inode.getBlocks(), fileSize,
1930                isUc, offset, length, needBlockToken, iip.isSnapshot(), feInfo);
1931            // Set caching information for the located blocks.
1932            for (LocatedBlock lb: blocks.getLocatedBlocks()) {
1933              cacheManager.setCachedLocations(lb);
1934            }
1935            return blocks;
1936          } finally {
1937            if (isReadOp) {
1938              readUnlock();
1939            } else {
1940              writeUnlock();
1941            }
1942          }
1943        }
1944        return null; // can never reach here
1945      }
1946    
1947      /**
1948       * Moves all the blocks from {@code srcs} and appends them to {@code target}
1949       * To avoid rollbacks we will verify validity of ALL of the args
1950       * before we start actual move.
1951       * 
1952       * This does not support ".inodes" relative path
1953       * @param target target to concat into
1954       * @param srcs file that will be concatenated
1955       * @throws IOException on error
1956       */
1957      void concat(String target, String [] srcs) 
1958          throws IOException, UnresolvedLinkException {
1959        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
1960        if (cacheEntry != null && cacheEntry.isSuccess()) {
1961          return; // Return previous response
1962        }
1963        
1964        // Either there is no previous request in progress or it has failed
1965        if(FSNamesystem.LOG.isDebugEnabled()) {
1966          FSNamesystem.LOG.debug("concat " + Arrays.toString(srcs) +
1967              " to " + target);
1968        }
1969        
1970        boolean success = false;
1971        try {
1972          concatInt(target, srcs, cacheEntry != null);
1973          success = true;
1974        } catch (AccessControlException e) {
1975          logAuditEvent(false, "concat", Arrays.toString(srcs), target, null);
1976          throw e;
1977        } finally {
1978          RetryCache.setState(cacheEntry, success);
1979        }
1980      }
1981    
1982      private void concatInt(String target, String [] srcs, 
1983          boolean logRetryCache) throws IOException, UnresolvedLinkException {
1984        // verify args
1985        if(target.isEmpty()) {
1986          throw new IllegalArgumentException("Target file name is empty");
1987        }
1988        if(srcs == null || srcs.length == 0) {
1989          throw new IllegalArgumentException("No sources given");
1990        }
1991        
1992        // We require all files be in the same directory
1993        String trgParent = 
1994          target.substring(0, target.lastIndexOf(Path.SEPARATOR_CHAR));
1995        for (String s : srcs) {
1996          String srcParent = s.substring(0, s.lastIndexOf(Path.SEPARATOR_CHAR));
1997          if (!srcParent.equals(trgParent)) {
1998            throw new IllegalArgumentException(
1999               "Sources and target are not in the same directory");
2000          }
2001        }
2002    
2003        HdfsFileStatus resultingStat = null;
2004        FSPermissionChecker pc = getPermissionChecker();
2005        checkOperation(OperationCategory.WRITE);
2006        waitForLoadingFSImage();
2007        writeLock();
2008        try {
2009          checkOperation(OperationCategory.WRITE);
2010          checkNameNodeSafeMode("Cannot concat " + target);
2011          concatInternal(pc, target, srcs, logRetryCache);
2012          resultingStat = getAuditFileInfo(target, false);
2013        } finally {
2014          writeUnlock();
2015        }
2016        getEditLog().logSync();
2017        logAuditEvent(true, "concat", Arrays.toString(srcs), target, resultingStat);
2018      }
2019    
2020      /** See {@link #concat(String, String[])} */
2021      private void concatInternal(FSPermissionChecker pc, String target,
2022          String[] srcs, boolean logRetryCache) throws IOException,
2023          UnresolvedLinkException {
2024        assert hasWriteLock();
2025    
2026        // write permission for the target
2027        if (isPermissionEnabled) {
2028          checkPathAccess(pc, target, FsAction.WRITE);
2029    
2030          // and srcs
2031          for(String aSrc: srcs) {
2032            checkPathAccess(pc, aSrc, FsAction.READ); // read the file
2033            checkParentAccess(pc, aSrc, FsAction.WRITE); // for delete 
2034          }
2035        }
2036    
2037        // to make sure no two files are the same
2038        Set<INode> si = new HashSet<INode>();
2039    
2040        // we put the following prerequisite for the operation
2041        // replication and blocks sizes should be the same for ALL the blocks
2042    
2043        // check the target
2044        final INodesInPath trgIip = dir.getINodesInPath4Write(target);
2045        if (dir.getEZForPath(trgIip) != null) {
2046          throw new HadoopIllegalArgumentException(
2047              "concat can not be called for files in an encryption zone.");
2048        }
2049        final INodeFile trgInode = INodeFile.valueOf(trgIip.getLastINode(),
2050            target);
2051        if(trgInode.isUnderConstruction()) {
2052          throw new HadoopIllegalArgumentException("concat: target file "
2053              + target + " is under construction");
2054        }
2055        // per design target shouldn't be empty and all the blocks same size
2056        if(trgInode.numBlocks() == 0) {
2057          throw new HadoopIllegalArgumentException("concat: target file "
2058              + target + " is empty");
2059        }
2060        if (trgInode.isWithSnapshot()) {
2061          throw new HadoopIllegalArgumentException("concat: target file "
2062              + target + " is in a snapshot");
2063        }
2064    
2065        long blockSize = trgInode.getPreferredBlockSize();
2066    
2067        // check the end block to be full
2068        final BlockInfo last = trgInode.getLastBlock();
2069        if(blockSize != last.getNumBytes()) {
2070          throw new HadoopIllegalArgumentException("The last block in " + target
2071              + " is not full; last block size = " + last.getNumBytes()
2072              + " but file block size = " + blockSize);
2073        }
2074    
2075        si.add(trgInode);
2076        final short repl = trgInode.getFileReplication();
2077    
2078        // now check the srcs
2079        boolean endSrc = false; // final src file doesn't have to have full end block
2080        for(int i=0; i<srcs.length; i++) {
2081          String src = srcs[i];
2082          if(i==srcs.length-1)
2083            endSrc=true;
2084    
2085          final INodeFile srcInode = INodeFile.valueOf(dir.getINode4Write(src), src);
2086          if(src.isEmpty() 
2087              || srcInode.isUnderConstruction()
2088              || srcInode.numBlocks() == 0) {
2089            throw new HadoopIllegalArgumentException("concat: source file " + src
2090                + " is invalid or empty or underConstruction");
2091          }
2092    
2093          // check replication and blocks size
2094          if(repl != srcInode.getBlockReplication()) {
2095            throw new HadoopIllegalArgumentException("concat: the source file "
2096                + src + " and the target file " + target
2097                + " should have the same replication: source replication is "
2098                + srcInode.getBlockReplication()
2099                + " but target replication is " + repl);
2100          }
2101    
2102          //boolean endBlock=false;
2103          // verify that all the blocks are of the same length as target
2104          // should be enough to check the end blocks
2105          final BlockInfo[] srcBlocks = srcInode.getBlocks();
2106          int idx = srcBlocks.length-1;
2107          if(endSrc)
2108            idx = srcBlocks.length-2; // end block of endSrc is OK not to be full
2109          if(idx >= 0 && srcBlocks[idx].getNumBytes() != blockSize) {
2110            throw new HadoopIllegalArgumentException("concat: the source file "
2111                + src + " and the target file " + target
2112                + " should have the same blocks sizes: target block size is "
2113                + blockSize + " but the size of source block " + idx + " is "
2114                + srcBlocks[idx].getNumBytes());
2115          }
2116    
2117          si.add(srcInode);
2118        }
2119    
2120        // make sure no two files are the same
2121        if(si.size() < srcs.length+1) { // trg + srcs
2122          // it means at least two files are the same
2123          throw new HadoopIllegalArgumentException(
2124              "concat: at least two of the source files are the same");
2125        }
2126    
2127        if(NameNode.stateChangeLog.isDebugEnabled()) {
2128          NameNode.stateChangeLog.debug("DIR* NameSystem.concat: " + 
2129              Arrays.toString(srcs) + " to " + target);
2130        }
2131    
2132        long timestamp = now();
2133        dir.concat(target, srcs, timestamp);
2134        getEditLog().logConcat(target, srcs, timestamp, logRetryCache);
2135      }
2136      
2137      /**
2138       * stores the modification and access time for this inode. 
2139       * The access time is precise up to an hour. The transaction, if needed, is
2140       * written to the edits log but is not flushed.
2141       */
2142      void setTimes(String src, long mtime, long atime) 
2143          throws IOException, UnresolvedLinkException {
2144        if (!isAccessTimeSupported() && atime != -1) {
2145          throw new IOException("Access time for hdfs is not configured. " +
2146                                " Please set " + DFS_NAMENODE_ACCESSTIME_PRECISION_KEY + " configuration parameter.");
2147        }
2148        try {
2149          setTimesInt(src, mtime, atime);
2150        } catch (AccessControlException e) {
2151          logAuditEvent(false, "setTimes", src);
2152          throw e;
2153        }
2154      }
2155    
2156      private void setTimesInt(final String srcArg, long mtime, long atime)
2157        throws IOException, UnresolvedLinkException {
2158        String src = srcArg;
2159        HdfsFileStatus resultingStat = null;
2160        FSPermissionChecker pc = getPermissionChecker();
2161        checkOperation(OperationCategory.WRITE);
2162        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2163        writeLock();
2164        try {
2165          checkOperation(OperationCategory.WRITE);
2166          checkNameNodeSafeMode("Cannot set times " + src);
2167          src = resolvePath(src, pathComponents);
2168    
2169          // Write access is required to set access and modification times
2170          if (isPermissionEnabled) {
2171            checkPathAccess(pc, src, FsAction.WRITE);
2172          }
2173          final INodesInPath iip = dir.getINodesInPath4Write(src);
2174          final INode inode = iip.getLastINode();
2175          if (inode != null) {
2176            boolean changed = dir.setTimes(inode, mtime, atime, true,
2177                    iip.getLatestSnapshotId());
2178            if (changed) {
2179              getEditLog().logTimes(src, mtime, atime);
2180            }
2181            resultingStat = getAuditFileInfo(src, false);
2182          } else {
2183            throw new FileNotFoundException("File/Directory " + src + " does not exist.");
2184          }
2185        } finally {
2186          writeUnlock();
2187        }
2188        logAuditEvent(true, "setTimes", srcArg, null, resultingStat);
2189      }
2190    
2191      /**
2192       * Create a symbolic link.
2193       */
2194      @SuppressWarnings("deprecation")
2195      void createSymlink(String target, String link,
2196          PermissionStatus dirPerms, boolean createParent) 
2197          throws IOException, UnresolvedLinkException {
2198        if (!FileSystem.areSymlinksEnabled()) {
2199          throw new UnsupportedOperationException("Symlinks not supported");
2200        }
2201        if (!DFSUtil.isValidName(link)) {
2202          throw new InvalidPathException("Invalid link name: " + link);
2203        }
2204        if (FSDirectory.isReservedName(target)) {
2205          throw new InvalidPathException("Invalid target name: " + target);
2206        }
2207        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
2208        if (cacheEntry != null && cacheEntry.isSuccess()) {
2209          return; // Return previous response
2210        }
2211        boolean success = false;
2212        try {
2213          createSymlinkInt(target, link, dirPerms, createParent, cacheEntry != null);
2214          success = true;
2215        } catch (AccessControlException e) {
2216          logAuditEvent(false, "createSymlink", link, target, null);
2217          throw e;
2218        } finally {
2219          RetryCache.setState(cacheEntry, success);
2220        }
2221      }
2222    
2223      private void createSymlinkInt(String target, final String linkArg,
2224          PermissionStatus dirPerms, boolean createParent, boolean logRetryCache) 
2225          throws IOException, UnresolvedLinkException {
2226        String link = linkArg;
2227        if (NameNode.stateChangeLog.isDebugEnabled()) {
2228          NameNode.stateChangeLog.debug("DIR* NameSystem.createSymlink: target="
2229              + target + " link=" + link);
2230        }
2231        HdfsFileStatus resultingStat = null;
2232        FSPermissionChecker pc = getPermissionChecker();
2233        checkOperation(OperationCategory.WRITE);
2234        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(link);
2235        writeLock();
2236        try {
2237          checkOperation(OperationCategory.WRITE);
2238          checkNameNodeSafeMode("Cannot create symlink " + link);
2239          link = resolvePath(link, pathComponents);
2240          if (!createParent) {
2241            verifyParentDir(link);
2242          }
2243          if (!dir.isValidToCreate(link)) {
2244            throw new IOException("failed to create link " + link 
2245                +" either because the filename is invalid or the file exists");
2246          }
2247          if (isPermissionEnabled) {
2248            checkAncestorAccess(pc, link, FsAction.WRITE);
2249          }
2250          // validate that we have enough inodes.
2251          checkFsObjectLimit();
2252    
2253          // add symbolic link to namespace
2254          addSymlink(link, target, dirPerms, createParent, logRetryCache);
2255          resultingStat = getAuditFileInfo(link, false);
2256        } finally {
2257          writeUnlock();
2258        }
2259        getEditLog().logSync();
2260        logAuditEvent(true, "createSymlink", linkArg, target, resultingStat);
2261      }
2262    
2263      /**
2264       * Set replication for an existing file.
2265       * 
2266       * The NameNode sets new replication and schedules either replication of 
2267       * under-replicated data blocks or removal of the excessive block copies 
2268       * if the blocks are over-replicated.
2269       * 
2270       * @see ClientProtocol#setReplication(String, short)
2271       * @param src file name
2272       * @param replication new replication
2273       * @return true if successful; 
2274       *         false if file does not exist or is a directory
2275       */
2276      boolean setReplication(final String src, final short replication)
2277          throws IOException {
2278        try {
2279          return setReplicationInt(src, replication);
2280        } catch (AccessControlException e) {
2281          logAuditEvent(false, "setReplication", src);
2282          throw e;
2283        }
2284      }
2285    
2286      private boolean setReplicationInt(final String srcArg,
2287          final short replication) throws IOException {
2288        String src = srcArg;
2289        blockManager.verifyReplication(src, replication, null);
2290        final boolean isFile;
2291        FSPermissionChecker pc = getPermissionChecker();
2292        checkOperation(OperationCategory.WRITE);
2293        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2294        waitForLoadingFSImage();
2295        writeLock();
2296        try {
2297          checkOperation(OperationCategory.WRITE);
2298          checkNameNodeSafeMode("Cannot set replication for " + src);
2299          src = resolvePath(src, pathComponents);
2300          if (isPermissionEnabled) {
2301            checkPathAccess(pc, src, FsAction.WRITE);
2302          }
2303    
2304          final short[] blockRepls = new short[2]; // 0: old, 1: new
2305          final Block[] blocks = dir.setReplication(src, replication, blockRepls);
2306          isFile = blocks != null;
2307          if (isFile) {
2308            getEditLog().logSetReplication(src, replication);
2309            blockManager.setReplication(blockRepls[0], blockRepls[1], src, blocks);
2310          }
2311        } finally {
2312          writeUnlock();
2313        }
2314    
2315        getEditLog().logSync();
2316        if (isFile) {
2317          logAuditEvent(true, "setReplication", srcArg);
2318        }
2319        return isFile;
2320      }
2321    
2322      /**
2323       * Set the storage policy for a file or a directory.
2324       *
2325       * @param src file/directory path
2326       * @param policyName storage policy name
2327       */
2328      void setStoragePolicy(String src, final String policyName)
2329          throws IOException {
2330        try {
2331          setStoragePolicyInt(src, policyName);
2332        } catch (AccessControlException e) {
2333          logAuditEvent(false, "setStoragePolicy", src);
2334          throw e;
2335        }
2336      }
2337    
2338      private void setStoragePolicyInt(String src, final String policyName)
2339          throws IOException, UnresolvedLinkException, AccessControlException {
2340    
2341        if (!isStoragePolicyEnabled) {
2342          throw new IOException("Failed to set storage policy since "
2343              + DFS_STORAGE_POLICY_ENABLED_KEY + " is set to false.");
2344        }
2345        FSPermissionChecker pc = null;
2346        if (isPermissionEnabled) {
2347          pc = getPermissionChecker();
2348        }
2349    
2350        checkOperation(OperationCategory.WRITE);
2351        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2352        waitForLoadingFSImage();
2353        HdfsFileStatus fileStat;
2354        writeLock();
2355        try {
2356          checkOperation(OperationCategory.WRITE);
2357          checkNameNodeSafeMode("Cannot set storage policy for " + src);
2358    
2359          if (pc != null) {
2360            checkPermission(pc, src, false, null, null, FsAction.WRITE, null,
2361                            false, true);
2362          }
2363    
2364          src = FSDirectory.resolvePath(src, pathComponents, dir);
2365    
2366          // get the corresponding policy and make sure the policy name is valid
2367          BlockStoragePolicy policy = blockManager.getStoragePolicy(policyName);
2368          if (policy == null) {
2369            throw new HadoopIllegalArgumentException(
2370                "Cannot find a block policy with the name " + policyName);
2371          }
2372          dir.setStoragePolicy(src, policy.getId());
2373          getEditLog().logSetStoragePolicy(src, policy.getId());
2374          fileStat = getAuditFileInfo(src, false);
2375        } finally {
2376          writeUnlock();
2377        }
2378    
2379        getEditLog().logSync();
2380        logAuditEvent(true, "setStoragePolicy", src, null, fileStat);
2381      }
2382    
2383      /**
2384       * @return All the existing block storage policies
2385       */
2386      BlockStoragePolicy[] getStoragePolicies() throws IOException {
2387        checkOperation(OperationCategory.READ);
2388        waitForLoadingFSImage();
2389        readLock();
2390        try {
2391          checkOperation(OperationCategory.READ);
2392          return blockManager.getStoragePolicies();
2393        } finally {
2394          readUnlock();
2395        }
2396      }
2397    
2398      long getPreferredBlockSize(String filename) 
2399          throws IOException, UnresolvedLinkException {
2400        FSPermissionChecker pc = getPermissionChecker();
2401        checkOperation(OperationCategory.READ);
2402        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(filename);
2403        readLock();
2404        try {
2405          checkOperation(OperationCategory.READ);
2406          filename = resolvePath(filename, pathComponents);
2407          if (isPermissionEnabled) {
2408            checkTraverse(pc, filename);
2409          }
2410          return dir.getPreferredBlockSize(filename);
2411        } finally {
2412          readUnlock();
2413        }
2414      }
2415    
2416      /**
2417       * Verify that parent directory of src exists.
2418       */
2419      private void verifyParentDir(String src) throws FileNotFoundException,
2420          ParentNotDirectoryException, UnresolvedLinkException {
2421        assert hasReadLock();
2422        Path parent = new Path(src).getParent();
2423        if (parent != null) {
2424          final INode parentNode = dir.getINode(parent.toString());
2425          if (parentNode == null) {
2426            throw new FileNotFoundException("Parent directory doesn't exist: "
2427                + parent);
2428          } else if (!parentNode.isDirectory() && !parentNode.isSymlink()) {
2429            throw new ParentNotDirectoryException("Parent path is not a directory: "
2430                + parent);
2431          }
2432        }
2433      }
2434    
2435      /**
2436       * If the file is within an encryption zone, select the appropriate 
2437       * CryptoProtocolVersion from the list provided by the client. Since the
2438       * client may be newer, we need to handle unknown versions.
2439       *
2440       * @param zone EncryptionZone of the file
2441       * @param supportedVersions List of supported protocol versions
2442       * @return chosen protocol version
2443       * @throws IOException
2444       */
2445      private CryptoProtocolVersion chooseProtocolVersion(EncryptionZone zone,
2446          CryptoProtocolVersion[] supportedVersions)
2447          throws UnknownCryptoProtocolVersionException, UnresolvedLinkException,
2448            SnapshotAccessControlException {
2449        Preconditions.checkNotNull(zone);
2450        Preconditions.checkNotNull(supportedVersions);
2451        // Right now, we only support a single protocol version,
2452        // so simply look for it in the list of provided options
2453        final CryptoProtocolVersion required = zone.getVersion();
2454    
2455        for (CryptoProtocolVersion c : supportedVersions) {
2456          if (c.equals(CryptoProtocolVersion.UNKNOWN)) {
2457            if (LOG.isDebugEnabled()) {
2458              LOG.debug("Ignoring unknown CryptoProtocolVersion provided by " +
2459                  "client: " + c.getUnknownValue());
2460            }
2461            continue;
2462          }
2463          if (c.equals(required)) {
2464            return c;
2465          }
2466        }
2467        throw new UnknownCryptoProtocolVersionException(
2468            "No crypto protocol versions provided by the client are supported."
2469                + " Client provided: " + Arrays.toString(supportedVersions)
2470                + " NameNode supports: " + Arrays.toString(CryptoProtocolVersion
2471                .values()));
2472      }
2473    
2474      /**
2475       * Invoke KeyProvider APIs to generate an encrypted data encryption key for an
2476       * encryption zone. Should not be called with any locks held.
2477       *
2478       * @param ezKeyName key name of an encryption zone
2479       * @return New EDEK, or null if ezKeyName is null
2480       * @throws IOException
2481       */
2482      private EncryptedKeyVersion generateEncryptedDataEncryptionKey(String
2483          ezKeyName) throws IOException {
2484        if (ezKeyName == null) {
2485          return null;
2486        }
2487        EncryptedKeyVersion edek = null;
2488        try {
2489          edek = provider.generateEncryptedKey(ezKeyName);
2490        } catch (GeneralSecurityException e) {
2491          throw new IOException(e);
2492        }
2493        Preconditions.checkNotNull(edek);
2494        return edek;
2495      }
2496    
2497      /**
2498       * Create a new file entry in the namespace.
2499       * 
2500       * For description of parameters and exceptions thrown see
2501       * {@link ClientProtocol#create}, except it returns valid file status upon
2502       * success
2503       */
2504      HdfsFileStatus startFile(String src, PermissionStatus permissions,
2505          String holder, String clientMachine, EnumSet<CreateFlag> flag,
2506          boolean createParent, short replication, long blockSize, 
2507          CryptoProtocolVersion[] supportedVersions)
2508          throws AccessControlException, SafeModeException,
2509          FileAlreadyExistsException, UnresolvedLinkException,
2510          FileNotFoundException, ParentNotDirectoryException, IOException {
2511        HdfsFileStatus status = null;
2512        CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
2513            null);
2514        if (cacheEntry != null && cacheEntry.isSuccess()) {
2515          return (HdfsFileStatus) cacheEntry.getPayload();
2516        }
2517        
2518        try {
2519          status = startFileInt(src, permissions, holder, clientMachine, flag,
2520              createParent, replication, blockSize, supportedVersions,
2521              cacheEntry != null);
2522        } catch (AccessControlException e) {
2523          logAuditEvent(false, "create", src);
2524          throw e;
2525        } finally {
2526          RetryCache.setState(cacheEntry, status != null, status);
2527        }
2528        return status;
2529      }
2530    
2531      private HdfsFileStatus startFileInt(final String srcArg,
2532          PermissionStatus permissions, String holder, String clientMachine,
2533          EnumSet<CreateFlag> flag, boolean createParent, short replication,
2534          long blockSize, CryptoProtocolVersion[] supportedVersions,
2535          boolean logRetryCache)
2536          throws AccessControlException, SafeModeException,
2537          FileAlreadyExistsException, UnresolvedLinkException,
2538          FileNotFoundException, ParentNotDirectoryException, IOException {
2539        String src = srcArg;
2540        if (NameNode.stateChangeLog.isDebugEnabled()) {
2541          StringBuilder builder = new StringBuilder();
2542          builder.append("DIR* NameSystem.startFile: src=" + src
2543                  + ", holder=" + holder
2544                  + ", clientMachine=" + clientMachine
2545                  + ", createParent=" + createParent
2546                  + ", replication=" + replication
2547                  + ", createFlag=" + flag.toString()
2548                  + ", blockSize=" + blockSize);
2549          builder.append(", supportedVersions=");
2550          if (supportedVersions != null) {
2551            builder.append(Arrays.toString(supportedVersions));
2552          } else {
2553            builder.append("null");
2554          }
2555          NameNode.stateChangeLog.debug(builder.toString());
2556        }
2557        if (!DFSUtil.isValidName(src)) {
2558          throw new InvalidPathException(src);
2559        }
2560        blockManager.verifyReplication(src, replication, clientMachine);
2561    
2562        boolean skipSync = false;
2563        HdfsFileStatus stat = null;
2564        FSPermissionChecker pc = getPermissionChecker();
2565        checkOperation(OperationCategory.WRITE);
2566        if (blockSize < minBlockSize) {
2567          throw new IOException("Specified block size is less than configured" +
2568              " minimum value (" + DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY
2569              + "): " + blockSize + " < " + minBlockSize);
2570        }
2571        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2572        boolean create = flag.contains(CreateFlag.CREATE);
2573        boolean overwrite = flag.contains(CreateFlag.OVERWRITE);
2574        boolean isLazyPersist = flag.contains(CreateFlag.LAZY_PERSIST);
2575    
2576        waitForLoadingFSImage();
2577    
2578        /**
2579         * If the file is in an encryption zone, we optimistically create an
2580         * EDEK for the file by calling out to the configured KeyProvider.
2581         * Since this typically involves doing an RPC, we take the readLock
2582         * initially, then drop it to do the RPC.
2583         * 
2584         * Since the path can flip-flop between being in an encryption zone and not
2585         * in the meantime, we need to recheck the preconditions when we retake the
2586         * lock to do the create. If the preconditions are not met, we throw a
2587         * special RetryStartFileException to ask the DFSClient to try the create
2588         * again later.
2589         */
2590        CryptoProtocolVersion protocolVersion = null;
2591        CipherSuite suite = null;
2592        String ezKeyName = null;
2593        readLock();
2594        try {
2595          src = resolvePath(src, pathComponents);
2596          INodesInPath iip = dir.getINodesInPath4Write(src);
2597          // Nothing to do if the path is not within an EZ
2598          if (dir.isInAnEZ(iip)) {
2599            EncryptionZone zone = dir.getEZForPath(iip);
2600            protocolVersion = chooseProtocolVersion(zone, supportedVersions);
2601            suite = zone.getSuite();
2602            ezKeyName = dir.getKeyName(iip);
2603    
2604            Preconditions.checkNotNull(protocolVersion);
2605            Preconditions.checkNotNull(suite);
2606            Preconditions.checkArgument(!suite.equals(CipherSuite.UNKNOWN),
2607                "Chose an UNKNOWN CipherSuite!");
2608            Preconditions.checkNotNull(ezKeyName);
2609          }
2610        } finally {
2611          readUnlock();
2612        }
2613    
2614        Preconditions.checkState(
2615            (suite == null && ezKeyName == null) ||
2616                (suite != null && ezKeyName != null),
2617            "Both suite and ezKeyName should both be null or not null");
2618    
2619        // Generate EDEK if necessary while not holding the lock
2620        EncryptedKeyVersion edek =
2621            generateEncryptedDataEncryptionKey(ezKeyName);
2622        EncryptionFaultInjector.getInstance().startFileAfterGenerateKey();
2623    
2624        // Proceed with the create, using the computed cipher suite and 
2625        // generated EDEK
2626        BlocksMapUpdateInfo toRemoveBlocks = null;
2627        writeLock();
2628        try {
2629          checkOperation(OperationCategory.WRITE);
2630          checkNameNodeSafeMode("Cannot create file" + src);
2631          src = resolvePath(src, pathComponents);
2632          toRemoveBlocks = startFileInternal(pc, src, permissions, holder, 
2633              clientMachine, create, overwrite, createParent, replication, 
2634              blockSize, isLazyPersist, suite, protocolVersion, edek, logRetryCache);
2635          stat = dir.getFileInfo(src, false,
2636              FSDirectory.isReservedRawName(srcArg), true);
2637        } catch (StandbyException se) {
2638          skipSync = true;
2639          throw se;
2640        } finally {
2641          writeUnlock();
2642          // There might be transactions logged while trying to recover the lease.
2643          // They need to be sync'ed even when an exception was thrown.
2644          if (!skipSync) {
2645            getEditLog().logSync();
2646            if (toRemoveBlocks != null) {
2647              removeBlocks(toRemoveBlocks);
2648              toRemoveBlocks.clear();
2649            }
2650          }
2651        }
2652    
2653        logAuditEvent(true, "create", srcArg, null, stat);
2654        return stat;
2655      }
2656    
2657      /**
2658       * Create a new file or overwrite an existing file<br>
2659       * 
2660       * Once the file is create the client then allocates a new block with the next
2661       * call using {@link ClientProtocol#addBlock}.
2662       * <p>
2663       * For description of parameters and exceptions thrown see
2664       * {@link ClientProtocol#create}
2665       */
2666      private BlocksMapUpdateInfo startFileInternal(FSPermissionChecker pc, 
2667          String src, PermissionStatus permissions, String holder, 
2668          String clientMachine, boolean create, boolean overwrite, 
2669          boolean createParent, short replication, long blockSize, 
2670          boolean isLazyPersist, CipherSuite suite, CryptoProtocolVersion version,
2671          EncryptedKeyVersion edek, boolean logRetryEntry)
2672          throws FileAlreadyExistsException, AccessControlException,
2673          UnresolvedLinkException, FileNotFoundException,
2674          ParentNotDirectoryException, RetryStartFileException, IOException {
2675        assert hasWriteLock();
2676        // Verify that the destination does not exist as a directory already.
2677        final INodesInPath iip = dir.getINodesInPath4Write(src);
2678        final INode inode = iip.getLastINode();
2679        if (inode != null && inode.isDirectory()) {
2680          throw new FileAlreadyExistsException(src +
2681              " already exists as a directory");
2682        }
2683    
2684        FileEncryptionInfo feInfo = null;
2685        if (dir.isInAnEZ(iip)) {
2686          // The path is now within an EZ, but we're missing encryption parameters
2687          if (suite == null || edek == null) {
2688            throw new RetryStartFileException();
2689          }
2690          // Path is within an EZ and we have provided encryption parameters.
2691          // Make sure that the generated EDEK matches the settings of the EZ.
2692          String ezKeyName = dir.getKeyName(iip);
2693          if (!ezKeyName.equals(edek.getEncryptionKeyName())) {
2694            throw new RetryStartFileException();
2695          }
2696          feInfo = new FileEncryptionInfo(suite, version,
2697              edek.getEncryptedKeyVersion().getMaterial(),
2698              edek.getEncryptedKeyIv(),
2699              ezKeyName, edek.getEncryptionKeyVersionName());
2700          Preconditions.checkNotNull(feInfo);
2701        }
2702    
2703        final INodeFile myFile = INodeFile.valueOf(inode, src, true);
2704        if (isPermissionEnabled) {
2705          if (overwrite && myFile != null) {
2706            checkPathAccess(pc, src, FsAction.WRITE);
2707          }
2708          /*
2709           * To overwrite existing file, need to check 'w' permission 
2710           * of parent (equals to ancestor in this case)
2711           */
2712          checkAncestorAccess(pc, src, FsAction.WRITE);
2713        }
2714    
2715        if (!createParent) {
2716          verifyParentDir(src);
2717        }
2718    
2719        try {
2720          BlocksMapUpdateInfo toRemoveBlocks = null;
2721          if (myFile == null) {
2722            if (!create) {
2723              throw new FileNotFoundException("Can't overwrite non-existent " +
2724                  src + " for client " + clientMachine);
2725            }
2726          } else {
2727            if (overwrite) {
2728              toRemoveBlocks = new BlocksMapUpdateInfo();
2729              List<INode> toRemoveINodes = new ChunkedArrayList<INode>();
2730              long ret = dir.delete(src, toRemoveBlocks, toRemoveINodes, now());
2731              if (ret >= 0) {
2732                incrDeletedFileCount(ret);
2733                removePathAndBlocks(src, null, toRemoveINodes, true);
2734              }
2735            } else {
2736              // If lease soft limit time is expired, recover the lease
2737              recoverLeaseInternal(myFile, src, holder, clientMachine, false);
2738              throw new FileAlreadyExistsException(src + " for client " +
2739                  clientMachine + " already exists");
2740            }
2741          }
2742    
2743          checkFsObjectLimit();
2744          INodeFile newNode = null;
2745    
2746          // Always do an implicit mkdirs for parent directory tree.
2747          Path parent = new Path(src).getParent();
2748          if (parent != null && mkdirsRecursively(parent.toString(),
2749                  permissions, true, now())) {
2750            newNode = dir.addFile(src, permissions, replication, blockSize,
2751                                  holder, clientMachine);
2752          }
2753    
2754          if (newNode == null) {
2755            throw new IOException("Unable to add " + src +  " to namespace");
2756          }
2757          leaseManager.addLease(newNode.getFileUnderConstructionFeature()
2758              .getClientName(), src);
2759    
2760          // Set encryption attributes if necessary
2761          if (feInfo != null) {
2762            dir.setFileEncryptionInfo(src, feInfo);
2763            newNode = dir.getInode(newNode.getId()).asFile();
2764          }
2765    
2766          setNewINodeStoragePolicy(newNode, iip, isLazyPersist);
2767    
2768          // record file record in log, record new generation stamp
2769          getEditLog().logOpenFile(src, newNode, overwrite, logRetryEntry);
2770          if (NameNode.stateChangeLog.isDebugEnabled()) {
2771            NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: added " +
2772                src + " inode " + newNode.getId() + " " + holder);
2773          }
2774          return toRemoveBlocks;
2775        } catch (IOException ie) {
2776          NameNode.stateChangeLog.warn("DIR* NameSystem.startFile: " + src + " " +
2777              ie.getMessage());
2778          throw ie;
2779        }
2780      }
2781    
2782      private void setNewINodeStoragePolicy(INodeFile inode,
2783                                            INodesInPath iip,
2784                                            boolean isLazyPersist)
2785          throws IOException {
2786    
2787        if (isLazyPersist) {
2788          BlockStoragePolicy lpPolicy =
2789              blockManager.getStoragePolicy("LAZY_PERSIST");
2790    
2791          // Set LAZY_PERSIST storage policy if the flag was passed to
2792          // CreateFile.
2793          if (lpPolicy == null) {
2794            throw new HadoopIllegalArgumentException(
2795                "The LAZY_PERSIST storage policy has been disabled " +
2796                "by the administrator.");
2797          }
2798          inode.setStoragePolicyID(lpPolicy.getId(),
2799                                     iip.getLatestSnapshotId());
2800        } else {
2801          BlockStoragePolicy effectivePolicy =
2802              blockManager.getStoragePolicy(inode.getStoragePolicyID());
2803    
2804          if (effectivePolicy != null &&
2805              effectivePolicy.isCopyOnCreateFile()) {
2806            // Copy effective policy from ancestor directory to current file.
2807            inode.setStoragePolicyID(effectivePolicy.getId(),
2808                                     iip.getLatestSnapshotId());
2809          }
2810        }
2811      }
2812    
2813      /**
2814       * Append to an existing file for append.
2815       * <p>
2816       * 
2817       * The method returns the last block of the file if this is a partial block,
2818       * which can still be used for writing more data. The client uses the returned
2819       * block locations to form the data pipeline for this block.<br>
2820       * The method returns null if the last block is full. The client then
2821       * allocates a new block with the next call using
2822       * {@link ClientProtocol#addBlock}.
2823       * <p>
2824       * 
2825       * For description of parameters and exceptions thrown see
2826       * {@link ClientProtocol#append(String, String)}
2827       * 
2828       * @return the last block locations if the block is partial or null otherwise
2829       */
2830      private LocatedBlock appendFileInternal(FSPermissionChecker pc, String src,
2831          String holder, String clientMachine, boolean logRetryCache)
2832          throws AccessControlException, UnresolvedLinkException,
2833          FileNotFoundException, IOException {
2834        assert hasWriteLock();
2835        // Verify that the destination does not exist as a directory already.
2836        final INodesInPath iip = dir.getINodesInPath4Write(src);
2837        final INode inode = iip.getLastINode();
2838        if (inode != null && inode.isDirectory()) {
2839          throw new FileAlreadyExistsException("Cannot append to directory " + src
2840              + "; already exists as a directory.");
2841        }
2842        if (isPermissionEnabled) {
2843          checkPathAccess(pc, src, FsAction.WRITE);
2844        }
2845    
2846        try {
2847          if (inode == null) {
2848            throw new FileNotFoundException("failed to append to non-existent file "
2849              + src + " for client " + clientMachine);
2850          }
2851          INodeFile myFile = INodeFile.valueOf(inode, src, true);
2852          final BlockStoragePolicy lpPolicy =
2853              blockManager.getStoragePolicy("LAZY_PERSIST");
2854    
2855          if (lpPolicy != null &&
2856              lpPolicy.getId() == myFile.getStoragePolicyID()) {
2857            throw new UnsupportedOperationException(
2858                "Cannot append to lazy persist file " + src);
2859          }
2860          // Opening an existing file for write - may need to recover lease.
2861          recoverLeaseInternal(myFile, src, holder, clientMachine, false);
2862          
2863          // recoverLeaseInternal may create a new InodeFile via 
2864          // finalizeINodeFileUnderConstruction so we need to refresh 
2865          // the referenced file.  
2866          myFile = INodeFile.valueOf(dir.getINode(src), src, true);
2867          final BlockInfo lastBlock = myFile.getLastBlock();
2868          // Check that the block has at least minimum replication.
2869          if(lastBlock != null && lastBlock.isComplete() &&
2870              !getBlockManager().isSufficientlyReplicated(lastBlock)) {
2871            throw new IOException("append: lastBlock=" + lastBlock +
2872                " of src=" + src + " is not sufficiently replicated yet.");
2873          }
2874          return prepareFileForWrite(src, myFile, holder, clientMachine, true,
2875                  iip.getLatestSnapshotId(), logRetryCache);
2876        } catch (IOException ie) {
2877          NameNode.stateChangeLog.warn("DIR* NameSystem.append: " +ie.getMessage());
2878          throw ie;
2879        }
2880      }
2881      
2882      /**
2883       * Replace current node with a INodeUnderConstruction.
2884       * Recreate in-memory lease record.
2885       * 
2886       * @param src path to the file
2887       * @param file existing file object
2888       * @param leaseHolder identifier of the lease holder on this file
2889       * @param clientMachine identifier of the client machine
2890       * @param writeToEditLog whether to persist this change to the edit log
2891       * @param logRetryCache whether to record RPC ids in editlog for retry cache
2892       *                      rebuilding
2893       * @return the last block locations if the block is partial or null otherwise
2894       * @throws UnresolvedLinkException
2895       * @throws IOException
2896       */
2897      LocatedBlock prepareFileForWrite(String src, INodeFile file,
2898                                       String leaseHolder, String clientMachine,
2899                                       boolean writeToEditLog,
2900                                       int latestSnapshot, boolean logRetryCache)
2901          throws IOException {
2902        file.recordModification(latestSnapshot);
2903        final INodeFile cons = file.toUnderConstruction(leaseHolder, clientMachine);
2904    
2905        leaseManager.addLease(cons.getFileUnderConstructionFeature()
2906            .getClientName(), src);
2907        
2908        LocatedBlock ret = blockManager.convertLastBlockToUnderConstruction(cons);
2909        if (ret != null) {
2910          // update the quota: use the preferred block size for UC block
2911          final long diff = file.getPreferredBlockSize() - ret.getBlockSize();
2912          dir.updateSpaceConsumed(src, 0, diff * file.getBlockReplication());
2913        }
2914    
2915        if (writeToEditLog) {
2916          getEditLog().logOpenFile(src, cons, false, logRetryCache);
2917        }
2918        return ret;
2919      }
2920    
2921      /**
2922       * Recover lease;
2923       * Immediately revoke the lease of the current lease holder and start lease
2924       * recovery so that the file can be forced to be closed.
2925       * 
2926       * @param src the path of the file to start lease recovery
2927       * @param holder the lease holder's name
2928       * @param clientMachine the client machine's name
2929       * @return true if the file is already closed
2930       * @throws IOException
2931       */
2932      boolean recoverLease(String src, String holder, String clientMachine)
2933          throws IOException {
2934        if (!DFSUtil.isValidName(src)) {
2935          throw new IOException("Invalid file name: " + src);
2936        }
2937      
2938        boolean skipSync = false;
2939        FSPermissionChecker pc = getPermissionChecker();
2940        checkOperation(OperationCategory.WRITE);
2941        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2942        writeLock();
2943        try {
2944          checkOperation(OperationCategory.WRITE);
2945          checkNameNodeSafeMode("Cannot recover the lease of " + src);
2946          src = resolvePath(src, pathComponents);
2947          final INodeFile inode = INodeFile.valueOf(dir.getINode(src), src);
2948          if (!inode.isUnderConstruction()) {
2949            return true;
2950          }
2951          if (isPermissionEnabled) {
2952            checkPathAccess(pc, src, FsAction.WRITE);
2953          }
2954      
2955          recoverLeaseInternal(inode, src, holder, clientMachine, true);
2956        } catch (StandbyException se) {
2957          skipSync = true;
2958          throw se;
2959        } finally {
2960          writeUnlock();
2961          // There might be transactions logged while trying to recover the lease.
2962          // They need to be sync'ed even when an exception was thrown.
2963          if (!skipSync) {
2964            getEditLog().logSync();
2965          }
2966        }
2967        return false;
2968      }
2969    
2970      private void recoverLeaseInternal(INodeFile fileInode, 
2971          String src, String holder, String clientMachine, boolean force)
2972          throws IOException {
2973        assert hasWriteLock();
2974        if (fileInode != null && fileInode.isUnderConstruction()) {
2975          //
2976          // If the file is under construction , then it must be in our
2977          // leases. Find the appropriate lease record.
2978          //
2979          Lease lease = leaseManager.getLease(holder);
2980          //
2981          // We found the lease for this file. And surprisingly the original
2982          // holder is trying to recreate this file. This should never occur.
2983          //
2984    
2985          if (!force && lease != null) {
2986            Lease leaseFile = leaseManager.getLeaseByPath(src);
2987            if (leaseFile != null && leaseFile.equals(lease)) {
2988              throw new AlreadyBeingCreatedException(
2989                "failed to create file " + src + " for " + holder +
2990                " for client " + clientMachine +
2991                " because current leaseholder is trying to recreate file.");
2992            }
2993          }
2994          //
2995          // Find the original holder.
2996          //
2997          FileUnderConstructionFeature uc = fileInode.getFileUnderConstructionFeature();
2998          String clientName = uc.getClientName();
2999          lease = leaseManager.getLease(clientName);
3000          if (lease == null) {
3001            throw new AlreadyBeingCreatedException(
3002              "failed to create file " + src + " for " + holder +
3003              " for client " + clientMachine +
3004              " because pendingCreates is non-null but no leases found.");
3005          }
3006          if (force) {
3007            // close now: no need to wait for soft lease expiration and 
3008            // close only the file src
3009            LOG.info("recoverLease: " + lease + ", src=" + src +
3010              " from client " + clientName);
3011            internalReleaseLease(lease, src, holder);
3012          } else {
3013            assert lease.getHolder().equals(clientName) :
3014              "Current lease holder " + lease.getHolder() +
3015              " does not match file creator " + clientName;
3016            //
3017            // If the original holder has not renewed in the last SOFTLIMIT 
3018            // period, then start lease recovery.
3019            //
3020            if (lease.expiredSoftLimit()) {
3021              LOG.info("startFile: recover " + lease + ", src=" + src + " client "
3022                  + clientName);
3023              boolean isClosed = internalReleaseLease(lease, src, null);
3024              if(!isClosed)
3025                throw new RecoveryInProgressException(
3026                    "Failed to close file " + src +
3027                    ". Lease recovery is in progress. Try again later.");
3028            } else {
3029              final BlockInfo lastBlock = fileInode.getLastBlock();
3030              if (lastBlock != null
3031                  && lastBlock.getBlockUCState() == BlockUCState.UNDER_RECOVERY) {
3032                throw new RecoveryInProgressException("Recovery in progress, file ["
3033                    + src + "], " + "lease owner [" + lease.getHolder() + "]");
3034              } else {
3035                throw new AlreadyBeingCreatedException("Failed to create file ["
3036                    + src + "] for [" + holder + "] for client [" + clientMachine
3037                    + "], because this file is already being created by ["
3038                    + clientName + "] on ["
3039                    + uc.getClientMachine() + "]");
3040              }
3041            }
3042          }
3043        }
3044      }
3045    
3046      /**
3047       * Append to an existing file in the namespace.
3048       */
3049      LocatedBlock appendFile(String src, String holder, String clientMachine)
3050          throws AccessControlException, SafeModeException,
3051          FileAlreadyExistsException, FileNotFoundException,
3052          ParentNotDirectoryException, IOException {
3053        LocatedBlock lb = null;
3054        CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
3055            null);
3056        if (cacheEntry != null && cacheEntry.isSuccess()) {
3057          return (LocatedBlock) cacheEntry.getPayload();
3058        }
3059          
3060        boolean success = false;
3061        try {
3062          lb = appendFileInt(src, holder, clientMachine, cacheEntry != null);
3063          success = true;
3064          return lb;
3065        } catch (AccessControlException e) {
3066          logAuditEvent(false, "append", src);
3067          throw e;
3068        } finally {
3069          RetryCache.setState(cacheEntry, success, lb);
3070        }
3071      }
3072    
3073      private LocatedBlock appendFileInt(final String srcArg, String holder,
3074          String clientMachine, boolean logRetryCache)
3075          throws AccessControlException, SafeModeException,
3076          FileAlreadyExistsException, FileNotFoundException,
3077          ParentNotDirectoryException, IOException {
3078        String src = srcArg;
3079        if (NameNode.stateChangeLog.isDebugEnabled()) {
3080          NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: src=" + src
3081              + ", holder=" + holder
3082              + ", clientMachine=" + clientMachine);
3083        }
3084        boolean skipSync = false;
3085        if (!supportAppends) {
3086          throw new UnsupportedOperationException(
3087              "Append is not enabled on this NameNode. Use the " +
3088              DFS_SUPPORT_APPEND_KEY + " configuration option to enable it.");
3089        }
3090    
3091        LocatedBlock lb = null;
3092        FSPermissionChecker pc = getPermissionChecker();
3093        checkOperation(OperationCategory.WRITE);
3094        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3095        writeLock();
3096        try {
3097          checkOperation(OperationCategory.WRITE);
3098          checkNameNodeSafeMode("Cannot append to file" + src);
3099          src = resolvePath(src, pathComponents);
3100          lb = appendFileInternal(pc, src, holder, clientMachine, logRetryCache);
3101        } catch (StandbyException se) {
3102          skipSync = true;
3103          throw se;
3104        } finally {
3105          writeUnlock();
3106          // There might be transactions logged while trying to recover the lease.
3107          // They need to be sync'ed even when an exception was thrown.
3108          if (!skipSync) {
3109            getEditLog().logSync();
3110          }
3111        }
3112        if (lb != null) {
3113          if (NameNode.stateChangeLog.isDebugEnabled()) {
3114            NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: file "
3115                +src+" for "+holder+" at "+clientMachine
3116                +" block " + lb.getBlock()
3117                +" block size " + lb.getBlock().getNumBytes());
3118          }
3119        }
3120        logAuditEvent(true, "append", srcArg);
3121        return lb;
3122      }
3123    
3124      ExtendedBlock getExtendedBlock(Block blk) {
3125        return new ExtendedBlock(blockPoolId, blk);
3126      }
3127      
3128      void setBlockPoolId(String bpid) {
3129        blockPoolId = bpid;
3130        blockManager.setBlockPoolId(blockPoolId);
3131      }
3132    
3133      /**
3134       * The client would like to obtain an additional block for the indicated
3135       * filename (which is being written-to).  Return an array that consists
3136       * of the block, plus a set of machines.  The first on this list should
3137       * be where the client writes data.  Subsequent items in the list must
3138       * be provided in the connection to the first datanode.
3139       *
3140       * Make sure the previous blocks have been reported by datanodes and
3141       * are replicated.  Will return an empty 2-elt array if we want the
3142       * client to "try again later".
3143       */
3144      LocatedBlock getAdditionalBlock(String src, long fileId, String clientName,
3145          ExtendedBlock previous, Set<Node> excludedNodes, 
3146          List<String> favoredNodes)
3147          throws LeaseExpiredException, NotReplicatedYetException,
3148          QuotaExceededException, SafeModeException, UnresolvedLinkException,
3149          IOException {
3150        final long blockSize;
3151        final int replication;
3152        final byte storagePolicyID;
3153        Node clientNode = null;
3154        String clientMachine = null;
3155    
3156        if(NameNode.stateChangeLog.isDebugEnabled()) {
3157          NameNode.stateChangeLog.debug("BLOCK* NameSystem.getAdditionalBlock: "
3158              + src + " inodeId " +  fileId  + " for " + clientName);
3159        }
3160    
3161        // Part I. Analyze the state of the file with respect to the input data.
3162        checkOperation(OperationCategory.READ);
3163        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3164        readLock();
3165        try {
3166          checkOperation(OperationCategory.READ);
3167          src = resolvePath(src, pathComponents);
3168          LocatedBlock[] onRetryBlock = new LocatedBlock[1];
3169          FileState fileState = analyzeFileState(
3170              src, fileId, clientName, previous, onRetryBlock);
3171          final INodeFile pendingFile = fileState.inode;
3172          src = fileState.path;
3173    
3174          if (onRetryBlock[0] != null && onRetryBlock[0].getLocations().length > 0) {
3175            // This is a retry. Just return the last block if having locations.
3176            return onRetryBlock[0];
3177          }
3178          if (pendingFile.getBlocks().length >= maxBlocksPerFile) {
3179            throw new IOException("File has reached the limit on maximum number of"
3180                + " blocks (" + DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_KEY
3181                + "): " + pendingFile.getBlocks().length + " >= "
3182                + maxBlocksPerFile);
3183          }
3184          blockSize = pendingFile.getPreferredBlockSize();
3185          clientMachine = pendingFile.getFileUnderConstructionFeature()
3186              .getClientMachine();
3187          clientNode = blockManager.getDatanodeManager().getDatanodeByHost(
3188              clientMachine);
3189          replication = pendingFile.getFileReplication();
3190          storagePolicyID = pendingFile.getStoragePolicyID();
3191        } finally {
3192          readUnlock();
3193        }
3194    
3195        if (clientNode == null) {
3196          clientNode = getClientNode(clientMachine);
3197        }
3198    
3199        // choose targets for the new block to be allocated.
3200        final DatanodeStorageInfo targets[] = getBlockManager().chooseTarget4NewBlock( 
3201            src, replication, clientNode, excludedNodes, blockSize, favoredNodes,
3202            storagePolicyID);
3203    
3204        // Part II.
3205        // Allocate a new block, add it to the INode and the BlocksMap. 
3206        Block newBlock = null;
3207        long offset;
3208        checkOperation(OperationCategory.WRITE);
3209        waitForLoadingFSImage();
3210        writeLock();
3211        try {
3212          checkOperation(OperationCategory.WRITE);
3213          // Run the full analysis again, since things could have changed
3214          // while chooseTarget() was executing.
3215          LocatedBlock[] onRetryBlock = new LocatedBlock[1];
3216          FileState fileState = 
3217              analyzeFileState(src, fileId, clientName, previous, onRetryBlock);
3218          final INodeFile pendingFile = fileState.inode;
3219          src = fileState.path;
3220    
3221          if (onRetryBlock[0] != null) {
3222            if (onRetryBlock[0].getLocations().length > 0) {
3223              // This is a retry. Just return the last block if having locations.
3224              return onRetryBlock[0];
3225            } else {
3226              // add new chosen targets to already allocated block and return
3227              BlockInfo lastBlockInFile = pendingFile.getLastBlock();
3228              ((BlockInfoUnderConstruction) lastBlockInFile)
3229                  .setExpectedLocations(targets);
3230              offset = pendingFile.computeFileSize();
3231              return makeLocatedBlock(lastBlockInFile, targets, offset);
3232            }
3233          }
3234    
3235          // commit the last block and complete it if it has minimum replicas
3236          commitOrCompleteLastBlock(pendingFile,
3237                                    ExtendedBlock.getLocalBlock(previous));
3238    
3239          // allocate new block, record block locations in INode.
3240          newBlock = createNewBlock();
3241          INodesInPath inodesInPath = INodesInPath.fromINode(pendingFile);
3242          saveAllocatedBlock(src, inodesInPath, newBlock, targets);
3243    
3244          persistNewBlock(src, pendingFile);
3245          offset = pendingFile.computeFileSize();
3246        } finally {
3247          writeUnlock();
3248        }
3249        getEditLog().logSync();
3250    
3251        // Return located block
3252        return makeLocatedBlock(newBlock, targets, offset);
3253      }
3254    
3255      /*
3256       * Resolve clientmachine address to get a network location path
3257       */
3258      private Node getClientNode(String clientMachine) {
3259        List<String> hosts = new ArrayList<String>(1);
3260        hosts.add(clientMachine);
3261        List<String> rName = getBlockManager().getDatanodeManager()
3262            .resolveNetworkLocation(hosts);
3263        Node clientNode = null;
3264        if (rName != null) {
3265          // Able to resolve clientMachine mapping.
3266          // Create a temp node to findout the rack local nodes
3267          clientNode = new NodeBase(rName.get(0) + NodeBase.PATH_SEPARATOR_STR
3268              + clientMachine);
3269        }
3270        return clientNode;
3271      }
3272    
3273      static class FileState {
3274        public final INodeFile inode;
3275        public final String path;
3276    
3277        public FileState(INodeFile inode, String fullPath) {
3278          this.inode = inode;
3279          this.path = fullPath;
3280        }
3281      }
3282    
3283      FileState analyzeFileState(String src,
3284                                    long fileId,
3285                                    String clientName,
3286                                    ExtendedBlock previous,
3287                                    LocatedBlock[] onRetryBlock)
3288              throws IOException  {
3289        assert hasReadLock();
3290    
3291        checkBlock(previous);
3292        onRetryBlock[0] = null;
3293        checkOperation(OperationCategory.WRITE);
3294        checkNameNodeSafeMode("Cannot add block to " + src);
3295    
3296        // have we exceeded the configured limit of fs objects.
3297        checkFsObjectLimit();
3298    
3299        Block previousBlock = ExtendedBlock.getLocalBlock(previous);
3300        INode inode;
3301        if (fileId == INodeId.GRANDFATHER_INODE_ID) {
3302          // Older clients may not have given us an inode ID to work with.
3303          // In this case, we have to try to resolve the path and hope it
3304          // hasn't changed or been deleted since the file was opened for write.
3305          final INodesInPath iip = dir.getINodesInPath4Write(src);
3306          inode = iip.getLastINode();
3307        } else {
3308          // Newer clients pass the inode ID, so we can just get the inode
3309          // directly.
3310          inode = dir.getInode(fileId);
3311          if (inode != null) src = inode.getFullPathName();
3312        }
3313        final INodeFile pendingFile = checkLease(src, clientName, inode, fileId);
3314        BlockInfo lastBlockInFile = pendingFile.getLastBlock();
3315        if (!Block.matchingIdAndGenStamp(previousBlock, lastBlockInFile)) {
3316          // The block that the client claims is the current last block
3317          // doesn't match up with what we think is the last block. There are
3318          // four possibilities:
3319          // 1) This is the first block allocation of an append() pipeline
3320          //    which started appending exactly at a block boundary.
3321          //    In this case, the client isn't passed the previous block,
3322          //    so it makes the allocateBlock() call with previous=null.
3323          //    We can distinguish this since the last block of the file
3324          //    will be exactly a full block.
3325          // 2) This is a retry from a client that missed the response of a
3326          //    prior getAdditionalBlock() call, perhaps because of a network
3327          //    timeout, or because of an HA failover. In that case, we know
3328          //    by the fact that the client is re-issuing the RPC that it
3329          //    never began to write to the old block. Hence it is safe to
3330          //    to return the existing block.
3331          // 3) This is an entirely bogus request/bug -- we should error out
3332          //    rather than potentially appending a new block with an empty
3333          //    one in the middle, etc
3334          // 4) This is a retry from a client that timed out while
3335          //    the prior getAdditionalBlock() is still being processed,
3336          //    currently working on chooseTarget(). 
3337          //    There are no means to distinguish between the first and 
3338          //    the second attempts in Part I, because the first one hasn't
3339          //    changed the namesystem state yet.
3340          //    We run this analysis again in Part II where case 4 is impossible.
3341    
3342          BlockInfo penultimateBlock = pendingFile.getPenultimateBlock();
3343          if (previous == null &&
3344              lastBlockInFile != null &&
3345              lastBlockInFile.getNumBytes() == pendingFile.getPreferredBlockSize() &&
3346              lastBlockInFile.isComplete()) {
3347            // Case 1
3348            if (NameNode.stateChangeLog.isDebugEnabled()) {
3349               NameNode.stateChangeLog.debug(
3350                   "BLOCK* NameSystem.allocateBlock: handling block allocation" +
3351                   " writing to a file with a complete previous block: src=" +
3352                   src + " lastBlock=" + lastBlockInFile);
3353            }
3354          } else if (Block.matchingIdAndGenStamp(penultimateBlock, previousBlock)) {
3355            if (lastBlockInFile.getNumBytes() != 0) {
3356              throw new IOException(
3357                  "Request looked like a retry to allocate block " +
3358                  lastBlockInFile + " but it already contains " +
3359                  lastBlockInFile.getNumBytes() + " bytes");
3360            }
3361    
3362            // Case 2
3363            // Return the last block.
3364            NameNode.stateChangeLog.info("BLOCK* allocateBlock: " +
3365                "caught retry for allocation of a new block in " +
3366                src + ". Returning previously allocated block " + lastBlockInFile);
3367            long offset = pendingFile.computeFileSize();
3368            onRetryBlock[0] = makeLocatedBlock(lastBlockInFile,
3369                ((BlockInfoUnderConstruction)lastBlockInFile).getExpectedStorageLocations(),
3370                offset);
3371            return new FileState(pendingFile, src);
3372          } else {
3373            // Case 3
3374            throw new IOException("Cannot allocate block in " + src + ": " +
3375                "passed 'previous' block " + previous + " does not match actual " +
3376                "last block in file " + lastBlockInFile);
3377          }
3378        }
3379    
3380        // Check if the penultimate block is minimally replicated
3381        if (!checkFileProgress(pendingFile, false)) {
3382          throw new NotReplicatedYetException("Not replicated yet: " + src);
3383        }
3384        return new FileState(pendingFile, src);
3385      }
3386    
3387      LocatedBlock makeLocatedBlock(Block blk, DatanodeStorageInfo[] locs,
3388                                            long offset) throws IOException {
3389        LocatedBlock lBlk = new LocatedBlock(
3390            getExtendedBlock(blk), locs, offset, false);
3391        getBlockManager().setBlockToken(
3392            lBlk, BlockTokenSecretManager.AccessMode.WRITE);
3393        return lBlk;
3394      }
3395    
3396      /** @see ClientProtocol#getAdditionalDatanode */
3397      LocatedBlock getAdditionalDatanode(String src, long fileId,
3398          final ExtendedBlock blk, final DatanodeInfo[] existings,
3399          final String[] storageIDs,
3400          final Set<Node> excludes,
3401          final int numAdditionalNodes, final String clientName
3402          ) throws IOException {
3403        //check if the feature is enabled
3404        dtpReplaceDatanodeOnFailure.checkEnabled();
3405    
3406        Node clientnode = null;
3407        String clientMachine;
3408        final long preferredblocksize;
3409        final byte storagePolicyID;
3410        final List<DatanodeStorageInfo> chosen;
3411        checkOperation(OperationCategory.READ);
3412        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3413        readLock();
3414        try {
3415          checkOperation(OperationCategory.READ);
3416          //check safe mode
3417          checkNameNodeSafeMode("Cannot add datanode; src=" + src + ", blk=" + blk);
3418          src = resolvePath(src, pathComponents);
3419    
3420          //check lease
3421          final INode inode;
3422          if (fileId == INodeId.GRANDFATHER_INODE_ID) {
3423            // Older clients may not have given us an inode ID to work with.
3424            // In this case, we have to try to resolve the path and hope it
3425            // hasn't changed or been deleted since the file was opened for write.
3426            inode = dir.getINode(src);
3427          } else {
3428            inode = dir.getInode(fileId);
3429            if (inode != null) src = inode.getFullPathName();
3430          }
3431          final INodeFile file = checkLease(src, clientName, inode, fileId);
3432          clientMachine = file.getFileUnderConstructionFeature().getClientMachine();
3433          clientnode = blockManager.getDatanodeManager().getDatanodeByHost(clientMachine);
3434          preferredblocksize = file.getPreferredBlockSize();
3435          storagePolicyID = file.getStoragePolicyID();
3436    
3437          //find datanode storages
3438          final DatanodeManager dm = blockManager.getDatanodeManager();
3439          chosen = Arrays.asList(dm.getDatanodeStorageInfos(existings, storageIDs));
3440        } finally {
3441          readUnlock();
3442        }
3443    
3444        if (clientnode == null) {
3445          clientnode = getClientNode(clientMachine);
3446        }
3447    
3448        // choose new datanodes.
3449        final DatanodeStorageInfo[] targets = blockManager.chooseTarget4AdditionalDatanode(
3450            src, numAdditionalNodes, clientnode, chosen, 
3451            excludes, preferredblocksize, storagePolicyID);
3452        final LocatedBlock lb = new LocatedBlock(blk, targets);
3453        blockManager.setBlockToken(lb, AccessMode.COPY);
3454        return lb;
3455      }
3456    
3457      /**
3458       * The client would like to let go of the given block
3459       */
3460      boolean abandonBlock(ExtendedBlock b, long fileId, String src, String holder)
3461          throws LeaseExpiredException, FileNotFoundException,
3462          UnresolvedLinkException, IOException {
3463        if(NameNode.stateChangeLog.isDebugEnabled()) {
3464          NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: " + b
3465              + "of file " + src);
3466        }
3467        checkOperation(OperationCategory.WRITE);
3468        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3469        waitForLoadingFSImage();
3470        writeLock();
3471        try {
3472          checkOperation(OperationCategory.WRITE);
3473          checkNameNodeSafeMode("Cannot abandon block " + b + " for file" + src);
3474          src = resolvePath(src, pathComponents);
3475    
3476          final INode inode;
3477          if (fileId == INodeId.GRANDFATHER_INODE_ID) {
3478            // Older clients may not have given us an inode ID to work with.
3479            // In this case, we have to try to resolve the path and hope it
3480            // hasn't changed or been deleted since the file was opened for write.
3481            inode = dir.getINode(src);
3482          } else {
3483            inode = dir.getInode(fileId);
3484            if (inode != null) src = inode.getFullPathName();
3485          }
3486          final INodeFile file = checkLease(src, holder, inode, fileId);
3487    
3488          //
3489          // Remove the block from the pending creates list
3490          //
3491          boolean removed = dir.removeBlock(src, file,
3492              ExtendedBlock.getLocalBlock(b));
3493          if (!removed) {
3494            return true;
3495          }
3496          if(NameNode.stateChangeLog.isDebugEnabled()) {
3497            NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: "
3498                                          + b + " is removed from pendingCreates");
3499          }
3500          persistBlocks(src, file, false);
3501        } finally {
3502          writeUnlock();
3503        }
3504        getEditLog().logSync();
3505    
3506        return true;
3507      }
3508    
3509      private INodeFile checkLease(String src, String holder, INode inode,
3510                                   long fileId)
3511          throws LeaseExpiredException, FileNotFoundException {
3512        assert hasReadLock();
3513        final String ident = src + " (inode " + fileId + ")";
3514        if (inode == null) {
3515          Lease lease = leaseManager.getLease(holder);
3516          throw new LeaseExpiredException(
3517              "No lease on " + ident + ": File does not exist. "
3518              + (lease != null ? lease.toString()
3519                  : "Holder " + holder + " does not have any open files."));
3520        }
3521        if (!inode.isFile()) {
3522          Lease lease = leaseManager.getLease(holder);
3523          throw new LeaseExpiredException(
3524              "No lease on " + ident + ": INode is not a regular file. "
3525                  + (lease != null ? lease.toString()
3526                  : "Holder " + holder + " does not have any open files."));
3527        }
3528        final INodeFile file = inode.asFile();
3529        if (!file.isUnderConstruction()) {
3530          Lease lease = leaseManager.getLease(holder);
3531          throw new LeaseExpiredException(
3532              "No lease on " + ident + ": File is not open for writing. "
3533              + (lease != null ? lease.toString()
3534                  : "Holder " + holder + " does not have any open files."));
3535        }
3536        // No further modification is allowed on a deleted file.
3537        // A file is considered deleted, if it is not in the inodeMap or is marked
3538        // as deleted in the snapshot feature.
3539        if (isFileDeleted(file)) {
3540          throw new FileNotFoundException(src);
3541        }
3542        String clientName = file.getFileUnderConstructionFeature().getClientName();
3543        if (holder != null && !clientName.equals(holder)) {
3544          throw new LeaseExpiredException("Lease mismatch on " + ident +
3545              " owned by " + clientName + " but is accessed by " + holder);
3546        }
3547        return file;
3548      }
3549     
3550      /**
3551       * Complete in-progress write to the given file.
3552       * @return true if successful, false if the client should continue to retry
3553       *         (e.g if not all blocks have reached minimum replication yet)
3554       * @throws IOException on error (eg lease mismatch, file not open, file deleted)
3555       */
3556      boolean completeFile(final String srcArg, String holder,
3557                           ExtendedBlock last, long fileId)
3558        throws SafeModeException, UnresolvedLinkException, IOException {
3559        String src = srcArg;
3560        if (NameNode.stateChangeLog.isDebugEnabled()) {
3561          NameNode.stateChangeLog.debug("DIR* NameSystem.completeFile: " +
3562              src + " for " + holder);
3563        }
3564        checkBlock(last);
3565        boolean success = false;
3566        checkOperation(OperationCategory.WRITE);
3567        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3568        waitForLoadingFSImage();
3569        writeLock();
3570        try {
3571          checkOperation(OperationCategory.WRITE);
3572          checkNameNodeSafeMode("Cannot complete file " + src);
3573          src = resolvePath(src, pathComponents);
3574          success = completeFileInternal(src, holder,
3575            ExtendedBlock.getLocalBlock(last), fileId);
3576        } finally {
3577          writeUnlock();
3578        }
3579        getEditLog().logSync();
3580        if (success) {
3581          NameNode.stateChangeLog.info("DIR* completeFile: " + srcArg
3582              + " is closed by " + holder);
3583        }
3584        return success;
3585      }
3586    
3587      private boolean completeFileInternal(String src, 
3588          String holder, Block last, long fileId) throws SafeModeException,
3589          UnresolvedLinkException, IOException {
3590        assert hasWriteLock();
3591        final INodeFile pendingFile;
3592        try {
3593          final INode inode;
3594          if (fileId == INodeId.GRANDFATHER_INODE_ID) {
3595            // Older clients may not have given us an inode ID to work with.
3596            // In this case, we have to try to resolve the path and hope it
3597            // hasn't changed or been deleted since the file was opened for write.
3598            final INodesInPath iip = dir.getLastINodeInPath(src);
3599            inode = iip.getINode(0);
3600          } else {
3601            inode = dir.getInode(fileId);
3602            if (inode != null) src = inode.getFullPathName();
3603          }
3604          pendingFile = checkLease(src, holder, inode, fileId);
3605        } catch (LeaseExpiredException lee) {
3606          final INode inode = dir.getINode(src);
3607          if (inode != null
3608              && inode.isFile()
3609              && !inode.asFile().isUnderConstruction()) {
3610            // This could be a retry RPC - i.e the client tried to close
3611            // the file, but missed the RPC response. Thus, it is trying
3612            // again to close the file. If the file still exists and
3613            // the client's view of the last block matches the actual
3614            // last block, then we'll treat it as a successful close.
3615            // See HDFS-3031.
3616            final Block realLastBlock = inode.asFile().getLastBlock();
3617            if (Block.matchingIdAndGenStamp(last, realLastBlock)) {
3618              NameNode.stateChangeLog.info("DIR* completeFile: " +
3619                  "request from " + holder + " to complete inode " + fileId +
3620                  "(" + src + ") which is already closed. But, it appears to be " +
3621                  "an RPC retry. Returning success");
3622              return true;
3623            }
3624          }
3625          throw lee;
3626        }
3627        // Check the state of the penultimate block. It should be completed
3628        // before attempting to complete the last one.
3629        if (!checkFileProgress(pendingFile, false)) {
3630          return false;
3631        }
3632    
3633        // commit the last block and complete it if it has minimum replicas
3634        commitOrCompleteLastBlock(pendingFile, last);
3635    
3636        if (!checkFileProgress(pendingFile, true)) {
3637          return false;
3638        }
3639    
3640        finalizeINodeFileUnderConstruction(src, pendingFile,
3641            Snapshot.CURRENT_STATE_ID);
3642        return true;
3643      }
3644    
3645      /**
3646       * Save allocated block at the given pending filename
3647       * 
3648       * @param src path to the file
3649       * @param inodesInPath representing each of the components of src.
3650       *                     The last INode is the INode for {@code src} file.
3651       * @param newBlock newly allocated block to be save
3652       * @param targets target datanodes where replicas of the new block is placed
3653       * @throws QuotaExceededException If addition of block exceeds space quota
3654       */
3655      BlockInfo saveAllocatedBlock(String src, INodesInPath inodes,
3656          Block newBlock, DatanodeStorageInfo[] targets)
3657              throws IOException {
3658        assert hasWriteLock();
3659        BlockInfo b = dir.addBlock(src, inodes, newBlock, targets);
3660        NameNode.stateChangeLog.info("BLOCK* allocateBlock: " + src + ". "
3661            + getBlockPoolId() + " " + b);
3662        DatanodeStorageInfo.incrementBlocksScheduled(targets);
3663        return b;
3664      }
3665    
3666      /**
3667       * Create new block with a unique block id and a new generation stamp.
3668       */
3669      Block createNewBlock() throws IOException {
3670        assert hasWriteLock();
3671        Block b = new Block(nextBlockId(), 0, 0);
3672        // Increment the generation stamp for every new block.
3673        b.setGenerationStamp(nextGenerationStamp(false));
3674        return b;
3675      }
3676    
3677      /**
3678       * Check that the indicated file's blocks are present and
3679       * replicated.  If not, return false. If checkall is true, then check
3680       * all blocks, otherwise check only penultimate block.
3681       */
3682      boolean checkFileProgress(INodeFile v, boolean checkall) {
3683        readLock();
3684        try {
3685          if (checkall) {
3686            //
3687            // check all blocks of the file.
3688            //
3689            for (BlockInfo block: v.getBlocks()) {
3690              if (!block.isComplete()) {
3691                LOG.info("BLOCK* checkFileProgress: " + block
3692                    + " has not reached minimal replication "
3693                    + blockManager.minReplication);
3694                return false;
3695              }
3696            }
3697          } else {
3698            //
3699            // check the penultimate block of this file
3700            //
3701            BlockInfo b = v.getPenultimateBlock();
3702            if (b != null && !b.isComplete()) {
3703              LOG.warn("BLOCK* checkFileProgress: " + b
3704                  + " has not reached minimal replication "
3705                  + blockManager.minReplication);
3706              return false;
3707            }
3708          }
3709          return true;
3710        } finally {
3711          readUnlock();
3712        }
3713      }
3714    
3715      ////////////////////////////////////////////////////////////////
3716      // Here's how to handle block-copy failure during client write:
3717      // -- As usual, the client's write should result in a streaming
3718      // backup write to a k-machine sequence.
3719      // -- If one of the backup machines fails, no worries.  Fail silently.
3720      // -- Before client is allowed to close and finalize file, make sure
3721      // that the blocks are backed up.  Namenode may have to issue specific backup
3722      // commands to make up for earlier datanode failures.  Once all copies
3723      // are made, edit namespace and return to client.
3724      ////////////////////////////////////////////////////////////////
3725    
3726      /** 
3727       * Change the indicated filename. 
3728       * @deprecated Use {@link #renameTo(String, String, Options.Rename...)} instead.
3729       */
3730      @Deprecated
3731      boolean renameTo(String src, String dst) 
3732          throws IOException, UnresolvedLinkException {
3733        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
3734        if (cacheEntry != null && cacheEntry.isSuccess()) {
3735          return true; // Return previous response
3736        }
3737        boolean ret = false;
3738        try {
3739          ret = renameToInt(src, dst, cacheEntry != null);
3740        } catch (AccessControlException e) {
3741          logAuditEvent(false, "rename", src, dst, null);
3742          throw e;
3743        } finally {
3744          RetryCache.setState(cacheEntry, ret);
3745        }
3746        return ret;
3747      }
3748    
3749      private boolean renameToInt(final String srcArg, final String dstArg,
3750        boolean logRetryCache)
3751        throws IOException, UnresolvedLinkException {
3752        String src = srcArg;
3753        String dst = dstArg;
3754        if (NameNode.stateChangeLog.isDebugEnabled()) {
3755          NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: " + src +
3756              " to " + dst);
3757        }
3758        if (!DFSUtil.isValidName(dst)) {
3759          throw new IOException("Invalid name: " + dst);
3760        }
3761        FSPermissionChecker pc = getPermissionChecker();
3762        checkOperation(OperationCategory.WRITE);
3763        byte[][] srcComponents = FSDirectory.getPathComponentsForReservedPath(src);
3764        byte[][] dstComponents = FSDirectory.getPathComponentsForReservedPath(dst);
3765        boolean status = false;
3766        HdfsFileStatus resultingStat = null;
3767        writeLock();
3768        try {
3769          checkOperation(OperationCategory.WRITE);
3770          checkNameNodeSafeMode("Cannot rename " + src);
3771          waitForLoadingFSImage();
3772          src = resolvePath(src, srcComponents);
3773          dst = resolvePath(dst, dstComponents);
3774          checkOperation(OperationCategory.WRITE);
3775          status = renameToInternal(pc, src, dst, logRetryCache);
3776          if (status) {
3777            resultingStat = getAuditFileInfo(dst, false);
3778          }
3779        } finally {
3780          writeUnlock();
3781        }
3782        getEditLog().logSync();
3783        if (status) {
3784          logAuditEvent(true, "rename", srcArg, dstArg, resultingStat);
3785        }
3786        return status;
3787      }
3788    
3789      /** @deprecated See {@link #renameTo(String, String)} */
3790      @Deprecated
3791      private boolean renameToInternal(FSPermissionChecker pc, String src,
3792          String dst, boolean logRetryCache) throws IOException,
3793          UnresolvedLinkException {
3794        assert hasWriteLock();
3795        if (isPermissionEnabled) {
3796          //We should not be doing this.  This is move() not renameTo().
3797          //but for now,
3798          //NOTE: yes, this is bad!  it's assuming much lower level behavior
3799          //      of rewriting the dst
3800          String actualdst = dir.isDir(dst)?
3801              dst + Path.SEPARATOR + new Path(src).getName(): dst;
3802          // Rename does not operates on link targets
3803          // Do not resolveLink when checking permissions of src and dst
3804          // Check write access to parent of src
3805          checkPermission(pc, src, false, null, FsAction.WRITE, null, null,
3806              false, false);
3807          // Check write access to ancestor of dst
3808          checkPermission(pc, actualdst, false, FsAction.WRITE, null, null, null,
3809              false, false);
3810        }
3811    
3812        long mtime = now();
3813        if (dir.renameTo(src, dst, mtime)) {
3814          getEditLog().logRename(src, dst, mtime, logRetryCache);
3815          return true;
3816        }
3817        return false;
3818      }
3819      
3820    
3821      /** Rename src to dst */
3822      void renameTo(final String srcArg, final String dstArg,
3823          Options.Rename... options) throws IOException, UnresolvedLinkException {
3824        String src = srcArg;
3825        String dst = dstArg;
3826        if (NameNode.stateChangeLog.isDebugEnabled()) {
3827          NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: with options - "
3828              + src + " to " + dst);
3829        }
3830        if (!DFSUtil.isValidName(dst)) {
3831          throw new InvalidPathException("Invalid name: " + dst);
3832        }
3833        final FSPermissionChecker pc = getPermissionChecker();
3834        
3835        checkOperation(OperationCategory.WRITE);
3836        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
3837        if (cacheEntry != null && cacheEntry.isSuccess()) {
3838          return; // Return previous response
3839        }
3840        byte[][] srcComponents = FSDirectory.getPathComponentsForReservedPath(src);
3841        byte[][] dstComponents = FSDirectory.getPathComponentsForReservedPath(dst);
3842        HdfsFileStatus resultingStat = null;
3843        boolean success = false;
3844        writeLock();
3845        BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
3846        try {
3847          checkOperation(OperationCategory.WRITE);
3848          checkNameNodeSafeMode("Cannot rename " + src);
3849          src = resolvePath(src, srcComponents);
3850          dst = resolvePath(dst, dstComponents);
3851          renameToInternal(pc, src, dst, cacheEntry != null, 
3852              collectedBlocks, options);
3853          resultingStat = getAuditFileInfo(dst, false);
3854          success = true;
3855        } finally {
3856          writeUnlock();
3857          RetryCache.setState(cacheEntry, success);
3858        }
3859        getEditLog().logSync();
3860        if (!collectedBlocks.getToDeleteList().isEmpty()) {
3861          removeBlocks(collectedBlocks);
3862          collectedBlocks.clear();
3863        }
3864        if (resultingStat != null) {
3865          StringBuilder cmd = new StringBuilder("rename options=");
3866          for (Rename option : options) {
3867            cmd.append(option.value()).append(" ");
3868          }
3869          logAuditEvent(true, cmd.toString(), srcArg, dstArg, resultingStat);
3870        }
3871      }
3872    
3873      private void renameToInternal(FSPermissionChecker pc, String src, 
3874          String dst, boolean logRetryCache, BlocksMapUpdateInfo collectedBlocks, 
3875          Options.Rename... options) throws IOException {
3876        assert hasWriteLock();
3877        if (isPermissionEnabled) {
3878          // Rename does not operates on link targets
3879          // Do not resolveLink when checking permissions of src and dst
3880          // Check write access to parent of src
3881          checkPermission(pc, src, false, null, FsAction.WRITE, null, null, false,
3882              false);
3883          // Check write access to ancestor of dst
3884          checkPermission(pc, dst, false, FsAction.WRITE, null, null, null, false,
3885              false);
3886        }
3887    
3888        waitForLoadingFSImage();
3889        long mtime = now();
3890        dir.renameTo(src, dst, mtime, collectedBlocks, options);
3891        getEditLog().logRename(src, dst, mtime, logRetryCache, options);
3892      }
3893      
3894      /**
3895       * Remove the indicated file from namespace.
3896       * 
3897       * @see ClientProtocol#delete(String, boolean) for detailed description and 
3898       * description of exceptions
3899       */
3900      boolean delete(String src, boolean recursive)
3901          throws AccessControlException, SafeModeException,
3902          UnresolvedLinkException, IOException {
3903        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
3904        if (cacheEntry != null && cacheEntry.isSuccess()) {
3905          return true; // Return previous response
3906        }
3907        boolean ret = false;
3908        try {
3909          ret = deleteInt(src, recursive, cacheEntry != null);
3910        } catch (AccessControlException e) {
3911          logAuditEvent(false, "delete", src);
3912          throw e;
3913        } finally {
3914          RetryCache.setState(cacheEntry, ret);
3915        }
3916        return ret;
3917      }
3918          
3919      private boolean deleteInt(String src, boolean recursive, boolean logRetryCache)
3920          throws AccessControlException, SafeModeException,
3921          UnresolvedLinkException, IOException {
3922        if (NameNode.stateChangeLog.isDebugEnabled()) {
3923          NameNode.stateChangeLog.debug("DIR* NameSystem.delete: " + src);
3924        }
3925        boolean status = deleteInternal(src, recursive, true, logRetryCache);
3926        if (status) {
3927          logAuditEvent(true, "delete", src);
3928        }
3929        return status;
3930      }
3931        
3932      private FSPermissionChecker getPermissionChecker()
3933          throws AccessControlException {
3934        try {
3935          return new FSPermissionChecker(fsOwnerShortUserName, supergroup, getRemoteUser());
3936        } catch (IOException ioe) {
3937          throw new AccessControlException(ioe);
3938        }
3939      }
3940      
3941      /**
3942       * Remove a file/directory from the namespace.
3943       * <p>
3944       * For large directories, deletion is incremental. The blocks under
3945       * the directory are collected and deleted a small number at a time holding
3946       * the {@link FSNamesystem} lock.
3947       * <p>
3948       * For small directory or file the deletion is done in one shot.
3949       * 
3950       * @see ClientProtocol#delete(String, boolean) for description of exceptions
3951       */
3952      private boolean deleteInternal(String src, boolean recursive,
3953          boolean enforcePermission, boolean logRetryCache)
3954          throws AccessControlException, SafeModeException, UnresolvedLinkException,
3955                 IOException {
3956        BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
3957        List<INode> removedINodes = new ChunkedArrayList<INode>();
3958        FSPermissionChecker pc = getPermissionChecker();
3959        checkOperation(OperationCategory.WRITE);
3960        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3961        boolean ret = false;
3962    
3963        waitForLoadingFSImage();
3964        writeLock();
3965        try {
3966          checkOperation(OperationCategory.WRITE);
3967          checkNameNodeSafeMode("Cannot delete " + src);
3968          src = resolvePath(src, pathComponents);
3969          if (!recursive && dir.isNonEmptyDirectory(src)) {
3970            throw new PathIsNotEmptyDirectoryException(src + " is non empty");
3971          }
3972          if (enforcePermission && isPermissionEnabled) {
3973            checkPermission(pc, src, false, null, FsAction.WRITE, null,
3974                FsAction.ALL, true, false);
3975          }
3976    
3977          long mtime = now();
3978          // Unlink the target directory from directory tree
3979          long filesRemoved = dir.delete(src, collectedBlocks, removedINodes,
3980                  mtime);
3981          if (filesRemoved < 0) {
3982            return false;
3983          }
3984          getEditLog().logDelete(src, mtime, logRetryCache);
3985          incrDeletedFileCount(filesRemoved);
3986          // Blocks/INodes will be handled later
3987          removePathAndBlocks(src, null, removedINodes, true);
3988          ret = true;
3989        } finally {
3990          writeUnlock();
3991        }
3992        getEditLog().logSync(); 
3993        removeBlocks(collectedBlocks); // Incremental deletion of blocks
3994        collectedBlocks.clear();
3995    
3996        if (NameNode.stateChangeLog.isDebugEnabled()) {
3997          NameNode.stateChangeLog.debug("DIR* Namesystem.delete: "
3998            + src +" is removed");
3999        }
4000        return ret;
4001      }
4002    
4003      /**
4004       * From the given list, incrementally remove the blocks from blockManager
4005       * Writelock is dropped and reacquired every BLOCK_DELETION_INCREMENT to
4006       * ensure that other waiters on the lock can get in. See HDFS-2938
4007       * 
4008       * @param blocks
4009       *          An instance of {@link BlocksMapUpdateInfo} which contains a list
4010       *          of blocks that need to be removed from blocksMap
4011       */
4012      void removeBlocks(BlocksMapUpdateInfo blocks) {
4013        List<Block> toDeleteList = blocks.getToDeleteList();
4014        Iterator<Block> iter = toDeleteList.iterator();
4015        while (iter.hasNext()) {
4016          writeLock();
4017          try {
4018            for (int i = 0; i < BLOCK_DELETION_INCREMENT && iter.hasNext(); i++) {
4019              blockManager.removeBlock(iter.next());
4020            }
4021          } finally {
4022            writeUnlock();
4023          }
4024        }
4025      }
4026      
4027      /**
4028       * Remove leases, inodes and blocks related to a given path
4029       * @param src The given path
4030       * @param blocks Containing the list of blocks to be deleted from blocksMap
4031       * @param removedINodes Containing the list of inodes to be removed from 
4032       *                      inodesMap
4033       * @param acquireINodeMapLock Whether to acquire the lock for inode removal
4034       */
4035      void removePathAndBlocks(String src, BlocksMapUpdateInfo blocks,
4036          List<INode> removedINodes, final boolean acquireINodeMapLock) {
4037        assert hasWriteLock();
4038        leaseManager.removeLeaseWithPrefixPath(src);
4039        // remove inodes from inodesMap
4040        if (removedINodes != null) {
4041          if (acquireINodeMapLock) {
4042            dir.writeLock();
4043          }
4044          try {
4045            dir.removeFromInodeMap(removedINodes);
4046          } finally {
4047            if (acquireINodeMapLock) {
4048              dir.writeUnlock();
4049            }
4050          }
4051          removedINodes.clear();
4052        }
4053        if (blocks == null) {
4054          return;
4055        }
4056        
4057        removeBlocksAndUpdateSafemodeTotal(blocks);
4058      }
4059    
4060      /**
4061       * Removes the blocks from blocksmap and updates the safemode blocks total
4062       * 
4063       * @param blocks
4064       *          An instance of {@link BlocksMapUpdateInfo} which contains a list
4065       *          of blocks that need to be removed from blocksMap
4066       */
4067      void removeBlocksAndUpdateSafemodeTotal(BlocksMapUpdateInfo blocks) {
4068        assert hasWriteLock();
4069        // In the case that we are a Standby tailing edits from the
4070        // active while in safe-mode, we need to track the total number
4071        // of blocks and safe blocks in the system.
4072        boolean trackBlockCounts = isSafeModeTrackingBlocks();
4073        int numRemovedComplete = 0, numRemovedSafe = 0;
4074    
4075        for (Block b : blocks.getToDeleteList()) {
4076          if (trackBlockCounts) {
4077            BlockInfo bi = getStoredBlock(b);
4078            if (bi.isComplete()) {
4079              numRemovedComplete++;
4080              if (bi.numNodes() >= blockManager.minReplication) {
4081                numRemovedSafe++;
4082              }
4083            }
4084          }
4085          blockManager.removeBlock(b);
4086        }
4087        if (trackBlockCounts) {
4088          if (LOG.isDebugEnabled()) {
4089            LOG.debug("Adjusting safe-mode totals for deletion."
4090                + "decreasing safeBlocks by " + numRemovedSafe
4091                + ", totalBlocks by " + numRemovedComplete);
4092          }
4093          adjustSafeModeBlockTotals(-numRemovedSafe, -numRemovedComplete);
4094        }
4095      }
4096    
4097      /**
4098       * @see SafeModeInfo#shouldIncrementallyTrackBlocks
4099       */
4100      private boolean isSafeModeTrackingBlocks() {
4101        if (!haEnabled) {
4102          // Never track blocks incrementally in non-HA code.
4103          return false;
4104        }
4105        SafeModeInfo sm = this.safeMode;
4106        return sm != null && sm.shouldIncrementallyTrackBlocks();
4107      }
4108    
4109      /**
4110       * Get the file info for a specific file.
4111       *
4112       * @param srcArg The string representation of the path to the file
4113       * @param resolveLink whether to throw UnresolvedLinkException 
4114       *        if src refers to a symlink
4115       *
4116       * @throws AccessControlException if access is denied
4117       * @throws UnresolvedLinkException if a symlink is encountered.
4118       *
4119       * @return object containing information regarding the file
4120       *         or null if file not found
4121       * @throws StandbyException 
4122       */
4123      HdfsFileStatus getFileInfo(final String srcArg, boolean resolveLink)
4124        throws AccessControlException, UnresolvedLinkException,
4125               StandbyException, IOException {
4126        String src = srcArg;
4127        if (!DFSUtil.isValidName(src)) {
4128          throw new InvalidPathException("Invalid file name: " + src);
4129        }
4130        HdfsFileStatus stat = null;
4131        FSPermissionChecker pc = getPermissionChecker();
4132        checkOperation(OperationCategory.READ);
4133        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
4134        readLock();
4135        try {
4136          checkOperation(OperationCategory.READ);
4137          src = resolvePath(src, pathComponents);
4138          boolean isSuperUser = true;
4139          if (isPermissionEnabled) {
4140            checkPermission(pc, src, false, null, null, null, null, false,
4141                resolveLink);
4142            isSuperUser = pc.isSuperUser();
4143          }
4144          stat = dir.getFileInfo(src, resolveLink,
4145              FSDirectory.isReservedRawName(srcArg), isSuperUser);
4146        } catch (AccessControlException e) {
4147          logAuditEvent(false, "getfileinfo", srcArg);
4148          throw e;
4149        } finally {
4150          readUnlock();
4151        }
4152        logAuditEvent(true, "getfileinfo", srcArg);
4153        return stat;
4154      }
4155      
4156      /**
4157       * Returns true if the file is closed
4158       */
4159      boolean isFileClosed(final String srcArg)
4160          throws AccessControlException, UnresolvedLinkException,
4161          StandbyException, IOException {
4162        String src = srcArg;
4163        FSPermissionChecker pc = getPermissionChecker();  
4164        checkOperation(OperationCategory.READ);
4165        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
4166        readLock();
4167        try {
4168          src = resolvePath(src, pathComponents);
4169          checkOperation(OperationCategory.READ);
4170          if (isPermissionEnabled) {
4171            checkTraverse(pc, src);
4172          }
4173          return !INodeFile.valueOf(dir.getINode(src), src).isUnderConstruction();
4174        } catch (AccessControlException e) {
4175          if (isAuditEnabled() && isExternalInvocation()) {
4176            logAuditEvent(false, "isFileClosed", srcArg);
4177          }
4178          throw e;
4179        } finally {
4180          readUnlock();
4181        }
4182      }
4183    
4184      /**
4185       * Create all the necessary directories
4186       */
4187      boolean mkdirs(String src, PermissionStatus permissions,
4188          boolean createParent) throws IOException, UnresolvedLinkException {
4189        boolean ret = false;
4190        try {
4191          ret = mkdirsInt(src, permissions, createParent);
4192        } catch (AccessControlException e) {
4193          logAuditEvent(false, "mkdirs", src);
4194          throw e;
4195        }
4196        return ret;
4197      }
4198    
4199      private boolean mkdirsInt(final String srcArg, PermissionStatus permissions,
4200          boolean createParent) throws IOException, UnresolvedLinkException {
4201        String src = srcArg;
4202        if(NameNode.stateChangeLog.isDebugEnabled()) {
4203          NameNode.stateChangeLog.debug("DIR* NameSystem.mkdirs: " + src);
4204        }
4205        if (!DFSUtil.isValidName(src)) {
4206          throw new InvalidPathException(src);
4207        }
4208        FSPermissionChecker pc = getPermissionChecker();
4209        checkOperation(OperationCategory.WRITE);
4210        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
4211        HdfsFileStatus resultingStat = null;
4212        boolean status = false;
4213        writeLock();
4214        try {
4215          checkOperation(OperationCategory.WRITE);   
4216          checkNameNodeSafeMode("Cannot create directory " + src);
4217          src = resolvePath(src, pathComponents);
4218          status = mkdirsInternal(pc, src, permissions, createParent);
4219          if (status) {
4220            resultingStat = getAuditFileInfo(src, false);
4221          }
4222        } finally {
4223          writeUnlock();
4224        }
4225        getEditLog().logSync();
4226        if (status) {
4227          logAuditEvent(true, "mkdirs", srcArg, null, resultingStat);
4228        }
4229        return status;
4230      }
4231        
4232      /**
4233       * Create all the necessary directories
4234       */
4235      private boolean mkdirsInternal(FSPermissionChecker pc, String src,
4236          PermissionStatus permissions, boolean createParent) 
4237          throws IOException, UnresolvedLinkException {
4238        assert hasWriteLock();
4239        if (isPermissionEnabled) {
4240          checkTraverse(pc, src);
4241        }
4242        if (dir.isDirMutable(src)) {
4243          // all the users of mkdirs() are used to expect 'true' even if
4244          // a new directory is not created.
4245          return true;
4246        }
4247        if (isPermissionEnabled) {
4248          checkAncestorAccess(pc, src, FsAction.WRITE);
4249        }
4250        if (!createParent) {
4251          verifyParentDir(src);
4252        }
4253    
4254        // validate that we have enough inodes. This is, at best, a 
4255        // heuristic because the mkdirs() operation might need to 
4256        // create multiple inodes.
4257        checkFsObjectLimit();
4258    
4259        if (!mkdirsRecursively(src, permissions, false, now())) {
4260          throw new IOException("Failed to create directory: " + src);
4261        }
4262        return true;
4263      }
4264    
4265      /**
4266       * Create a directory
4267       * If ancestor directories do not exist, automatically create them.
4268    
4269       * @param src string representation of the path to the directory
4270       * @param permissions the permission of the directory
4271       * @param inheritPermission if the permission of the directory should inherit
4272       *                          from its parent or not. u+wx is implicitly added to
4273       *                          the automatically created directories, and to the
4274       *                          given directory if inheritPermission is true
4275       * @param now creation time
4276       * @return true if the operation succeeds false otherwise
4277       * @throws QuotaExceededException if directory creation violates
4278       *                                any quota limit
4279       * @throws UnresolvedLinkException if a symlink is encountered in src.
4280       * @throws SnapshotAccessControlException if path is in RO snapshot
4281       */
4282      private boolean mkdirsRecursively(String src, PermissionStatus permissions,
4283                     boolean inheritPermission, long now)
4284              throws FileAlreadyExistsException, QuotaExceededException,
4285                     UnresolvedLinkException, SnapshotAccessControlException,
4286                     AclException {
4287        src = FSDirectory.normalizePath(src);
4288        String[] names = INode.getPathNames(src);
4289        byte[][] components = INode.getPathComponents(names);
4290        final int lastInodeIndex = components.length - 1;
4291    
4292        dir.writeLock();
4293        try {
4294          INodesInPath iip = dir.getExistingPathINodes(components);
4295          if (iip.isSnapshot()) {
4296            throw new SnapshotAccessControlException(
4297                    "Modification on RO snapshot is disallowed");
4298          }
4299          INode[] inodes = iip.getINodes();
4300    
4301          // find the index of the first null in inodes[]
4302          StringBuilder pathbuilder = new StringBuilder();
4303          int i = 1;
4304          for(; i < inodes.length && inodes[i] != null; i++) {
4305            pathbuilder.append(Path.SEPARATOR).append(names[i]);
4306            if (!inodes[i].isDirectory()) {
4307              throw new FileAlreadyExistsException(
4308                      "Parent path is not a directory: "
4309                      + pathbuilder + " "+inodes[i].getLocalName());
4310            }
4311          }
4312    
4313          // default to creating parent dirs with the given perms
4314          PermissionStatus parentPermissions = permissions;
4315    
4316          // if not inheriting and it's the last inode, there's no use in
4317          // computing perms that won't be used
4318          if (inheritPermission || (i < lastInodeIndex)) {
4319            // if inheriting (ie. creating a file or symlink), use the parent dir,
4320            // else the supplied permissions
4321            // NOTE: the permissions of the auto-created directories violate posix
4322            FsPermission parentFsPerm = inheritPermission
4323                    ? inodes[i-1].getFsPermission() : permissions.getPermission();
4324    
4325            // ensure that the permissions allow user write+execute
4326            if (!parentFsPerm.getUserAction().implies(FsAction.WRITE_EXECUTE)) {
4327              parentFsPerm = new FsPermission(
4328                      parentFsPerm.getUserAction().or(FsAction.WRITE_EXECUTE),
4329                      parentFsPerm.getGroupAction(),
4330                      parentFsPerm.getOtherAction()
4331              );
4332            }
4333    
4334            if (!parentPermissions.getPermission().equals(parentFsPerm)) {
4335              parentPermissions = new PermissionStatus(
4336                      parentPermissions.getUserName(),
4337                      parentPermissions.getGroupName(),
4338                      parentFsPerm
4339              );
4340              // when inheriting, use same perms for entire path
4341              if (inheritPermission) permissions = parentPermissions;
4342            }
4343          }
4344    
4345          // create directories beginning from the first null index
4346          for(; i < inodes.length; i++) {
4347            pathbuilder.append(Path.SEPARATOR).append(names[i]);
4348            dir.unprotectedMkdir(allocateNewInodeId(), iip, i, components[i],
4349                    (i < lastInodeIndex) ? parentPermissions : permissions, null,
4350                    now);
4351            if (inodes[i] == null) {
4352              return false;
4353            }
4354            // Directory creation also count towards FilesCreated
4355            // to match count of FilesDeleted metric.
4356            NameNode.getNameNodeMetrics().incrFilesCreated();
4357    
4358            final String cur = pathbuilder.toString();
4359            getEditLog().logMkDir(cur, inodes[i]);
4360            if(NameNode.stateChangeLog.isDebugEnabled()) {
4361              NameNode.stateChangeLog.debug(
4362                      "mkdirs: created directory " + cur);
4363            }
4364          }
4365        } finally {
4366          dir.writeUnlock();
4367        }
4368        return true;
4369      }
4370    
4371      /**
4372       * Get the content summary for a specific file/dir.
4373       *
4374       * @param srcArg The string representation of the path to the file
4375       *
4376       * @throws AccessControlException if access is denied
4377       * @throws UnresolvedLinkException if a symlink is encountered.
4378       * @throws FileNotFoundException if no file exists
4379       * @throws StandbyException
4380       * @throws IOException for issues with writing to the audit log
4381       *
4382       * @return object containing information regarding the file
4383       *         or null if file not found
4384       */
4385      ContentSummary getContentSummary(final String srcArg) throws IOException {
4386        String src = srcArg;
4387        FSPermissionChecker pc = getPermissionChecker();
4388        checkOperation(OperationCategory.READ);
4389        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
4390        readLock();
4391        boolean success = true;
4392        try {
4393          checkOperation(OperationCategory.READ);
4394          src = resolvePath(src, pathComponents);
4395          if (isPermissionEnabled) {
4396            checkPermission(pc, src, false, null, null, null, FsAction.READ_EXECUTE);
4397          }
4398          return dir.getContentSummary(src);
4399    
4400        } catch (AccessControlException ace) {
4401          success = false;
4402          throw ace;
4403        } finally {
4404          readUnlock();
4405          logAuditEvent(success, "contentSummary", srcArg);
4406        }
4407      }
4408    
4409      /**
4410       * Set the namespace quota and diskspace quota for a directory.
4411       * See {@link ClientProtocol#setQuota(String, long, long)} for the 
4412       * contract.
4413       * 
4414       * Note: This does not support ".inodes" relative path.
4415       */
4416      void setQuota(String path, long nsQuota, long dsQuota)
4417          throws IOException, UnresolvedLinkException {
4418        checkSuperuserPrivilege();
4419        checkOperation(OperationCategory.WRITE);
4420        writeLock();
4421        try {
4422          checkOperation(OperationCategory.WRITE);
4423          checkNameNodeSafeMode("Cannot set quota on " + path);
4424          INodeDirectory changed = dir.setQuota(path, nsQuota, dsQuota);
4425          if (changed != null) {
4426            final Quota.Counts q = changed.getQuotaCounts();
4427            getEditLog().logSetQuota(path,
4428                    q.get(Quota.NAMESPACE), q.get(Quota.DISKSPACE));
4429          }
4430        } finally {
4431          writeUnlock();
4432        }
4433        getEditLog().logSync();
4434      }
4435    
4436      /** Persist all metadata about this file.
4437       * @param src The string representation of the path
4438       * @param fileId The inode ID that we're fsyncing.  Older clients will pass
4439       *               INodeId.GRANDFATHER_INODE_ID here.
4440       * @param clientName The string representation of the client
4441       * @param lastBlockLength The length of the last block 
4442       *                        under construction reported from client.
4443       * @throws IOException if path does not exist
4444       */
4445      void fsync(String src, long fileId, String clientName, long lastBlockLength)
4446          throws IOException, UnresolvedLinkException {
4447        NameNode.stateChangeLog.info("BLOCK* fsync: " + src + " for " + clientName);
4448        checkOperation(OperationCategory.WRITE);
4449        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
4450    
4451        waitForLoadingFSImage();
4452        writeLock();
4453        try {
4454          checkOperation(OperationCategory.WRITE);
4455          checkNameNodeSafeMode("Cannot fsync file " + src);
4456          src = resolvePath(src, pathComponents);
4457          final INode inode;
4458          if (fileId == INodeId.GRANDFATHER_INODE_ID) {
4459            // Older clients may not have given us an inode ID to work with.
4460            // In this case, we have to try to resolve the path and hope it
4461            // hasn't changed or been deleted since the file was opened for write.
4462            inode = dir.getINode(src);
4463          } else {
4464            inode = dir.getInode(fileId);
4465            if (inode != null) src = inode.getFullPathName();
4466          }
4467          final INodeFile pendingFile = checkLease(src, clientName, inode, fileId);
4468          if (lastBlockLength > 0) {
4469            pendingFile.getFileUnderConstructionFeature().updateLengthOfLastBlock(
4470                pendingFile, lastBlockLength);
4471          }
4472          persistBlocks(src, pendingFile, false);
4473        } finally {
4474          writeUnlock();
4475        }
4476        getEditLog().logSync();
4477      }
4478    
4479      /**
4480       * Move a file that is being written to be immutable.
4481       * @param src The filename
4482       * @param lease The lease for the client creating the file
4483       * @param recoveryLeaseHolder reassign lease to this holder if the last block
4484       *        needs recovery; keep current holder if null.
4485       * @throws AlreadyBeingCreatedException if file is waiting to achieve minimal
4486       *         replication;<br>
4487       *         RecoveryInProgressException if lease recovery is in progress.<br>
4488       *         IOException in case of an error.
4489       * @return true  if file has been successfully finalized and closed or 
4490       *         false if block recovery has been initiated. Since the lease owner
4491       *         has been changed and logged, caller should call logSync().
4492       */
4493      boolean internalReleaseLease(Lease lease, String src, 
4494          String recoveryLeaseHolder) throws AlreadyBeingCreatedException, 
4495          IOException, UnresolvedLinkException {
4496        LOG.info("Recovering " + lease + ", src=" + src);
4497        assert !isInSafeMode();
4498        assert hasWriteLock();
4499    
4500        final INodesInPath iip = dir.getLastINodeInPath(src);
4501        final INodeFile pendingFile = iip.getINode(0).asFile();
4502        int nrBlocks = pendingFile.numBlocks();
4503        BlockInfo[] blocks = pendingFile.getBlocks();
4504    
4505        int nrCompleteBlocks;
4506        BlockInfo curBlock = null;
4507        for(nrCompleteBlocks = 0; nrCompleteBlocks < nrBlocks; nrCompleteBlocks++) {
4508          curBlock = blocks[nrCompleteBlocks];
4509          if(!curBlock.isComplete())
4510            break;
4511          assert blockManager.checkMinReplication(curBlock) :
4512                  "A COMPLETE block is not minimally replicated in " + src;
4513        }
4514    
4515        // If there are no incomplete blocks associated with this file,
4516        // then reap lease immediately and close the file.
4517        if(nrCompleteBlocks == nrBlocks) {
4518          finalizeINodeFileUnderConstruction(src, pendingFile,
4519              iip.getLatestSnapshotId());
4520          NameNode.stateChangeLog.warn("BLOCK*"
4521            + " internalReleaseLease: All existing blocks are COMPLETE,"
4522            + " lease removed, file closed.");
4523          return true;  // closed!
4524        }
4525    
4526        // Only the last and the penultimate blocks may be in non COMPLETE state.
4527        // If the penultimate block is not COMPLETE, then it must be COMMITTED.
4528        if(nrCompleteBlocks < nrBlocks - 2 ||
4529           nrCompleteBlocks == nrBlocks - 2 &&
4530             curBlock != null &&
4531             curBlock.getBlockUCState() != BlockUCState.COMMITTED) {
4532          final String message = "DIR* NameSystem.internalReleaseLease: "
4533            + "attempt to release a create lock on "
4534            + src + " but file is already closed.";
4535          NameNode.stateChangeLog.warn(message);
4536          throw new IOException(message);
4537        }
4538    
4539        // The last block is not COMPLETE, and
4540        // that the penultimate block if exists is either COMPLETE or COMMITTED
4541        final BlockInfo lastBlock = pendingFile.getLastBlock();
4542        BlockUCState lastBlockState = lastBlock.getBlockUCState();
4543        BlockInfo penultimateBlock = pendingFile.getPenultimateBlock();
4544    
4545        // If penultimate block doesn't exist then its minReplication is met
4546        boolean penultimateBlockMinReplication = penultimateBlock == null ? true :
4547            blockManager.checkMinReplication(penultimateBlock);
4548    
4549        switch(lastBlockState) {
4550        case COMPLETE:
4551          assert false : "Already checked that the last block is incomplete";
4552          break;
4553        case COMMITTED:
4554          // Close file if committed blocks are minimally replicated
4555          if(penultimateBlockMinReplication &&
4556              blockManager.checkMinReplication(lastBlock)) {
4557            finalizeINodeFileUnderConstruction(src, pendingFile,
4558                iip.getLatestSnapshotId());
4559            NameNode.stateChangeLog.warn("BLOCK*"
4560              + " internalReleaseLease: Committed blocks are minimally replicated,"
4561              + " lease removed, file closed.");
4562            return true;  // closed!
4563          }
4564          // Cannot close file right now, since some blocks 
4565          // are not yet minimally replicated.
4566          // This may potentially cause infinite loop in lease recovery
4567          // if there are no valid replicas on data-nodes.
4568          String message = "DIR* NameSystem.internalReleaseLease: " +
4569              "Failed to release lease for file " + src +
4570              ". Committed blocks are waiting to be minimally replicated." +
4571              " Try again later.";
4572          NameNode.stateChangeLog.warn(message);
4573          throw new AlreadyBeingCreatedException(message);
4574        case UNDER_CONSTRUCTION:
4575        case UNDER_RECOVERY:
4576          final BlockInfoUnderConstruction uc = (BlockInfoUnderConstruction)lastBlock;
4577          // setup the last block locations from the blockManager if not known
4578          if (uc.getNumExpectedLocations() == 0) {
4579            uc.setExpectedLocations(blockManager.getStorages(lastBlock));
4580          }
4581    
4582          if (uc.getNumExpectedLocations() == 0 && uc.getNumBytes() == 0) {
4583            // There is no datanode reported to this block.
4584            // may be client have crashed before writing data to pipeline.
4585            // This blocks doesn't need any recovery.
4586            // We can remove this block and close the file.
4587            pendingFile.removeLastBlock(lastBlock);
4588            finalizeINodeFileUnderConstruction(src, pendingFile,
4589                iip.getLatestSnapshotId());
4590            NameNode.stateChangeLog.warn("BLOCK* internalReleaseLease: "
4591                + "Removed empty last block and closed file.");
4592            return true;
4593          }
4594          // start recovery of the last block for this file
4595          long blockRecoveryId = nextGenerationStamp(isLegacyBlock(uc));
4596          lease = reassignLease(lease, src, recoveryLeaseHolder, pendingFile);
4597          uc.initializeBlockRecovery(blockRecoveryId);
4598          leaseManager.renewLease(lease);
4599          // Cannot close file right now, since the last block requires recovery.
4600          // This may potentially cause infinite loop in lease recovery
4601          // if there are no valid replicas on data-nodes.
4602          NameNode.stateChangeLog.warn(
4603                    "DIR* NameSystem.internalReleaseLease: " +
4604                    "File " + src + " has not been closed." +
4605                   " Lease recovery is in progress. " +
4606                    "RecoveryId = " + blockRecoveryId + " for block " + lastBlock);
4607          break;
4608        }
4609        return false;
4610      }
4611    
4612      private Lease reassignLease(Lease lease, String src, String newHolder,
4613          INodeFile pendingFile) {
4614        assert hasWriteLock();
4615        if(newHolder == null)
4616          return lease;
4617        // The following transaction is not synced. Make sure it's sync'ed later.
4618        logReassignLease(lease.getHolder(), src, newHolder);
4619        return reassignLeaseInternal(lease, src, newHolder, pendingFile);
4620      }
4621      
4622      Lease reassignLeaseInternal(Lease lease, String src, String newHolder,
4623          INodeFile pendingFile) {
4624        assert hasWriteLock();
4625        pendingFile.getFileUnderConstructionFeature().setClientName(newHolder);
4626        return leaseManager.reassignLease(lease, src, newHolder);
4627      }
4628    
4629      private void commitOrCompleteLastBlock(final INodeFile fileINode,
4630          final Block commitBlock) throws IOException {
4631        assert hasWriteLock();
4632        Preconditions.checkArgument(fileINode.isUnderConstruction());
4633        if (!blockManager.commitOrCompleteLastBlock(fileINode, commitBlock)) {
4634          return;
4635        }
4636    
4637        // Adjust disk space consumption if required
4638        final long diff = fileINode.getPreferredBlockSize() - commitBlock.getNumBytes();    
4639        if (diff > 0) {
4640          try {
4641            String path = fileINode.getFullPathName();
4642            dir.updateSpaceConsumed(path, 0, -diff*fileINode.getFileReplication());
4643          } catch (IOException e) {
4644            LOG.warn("Unexpected exception while updating disk space.", e);
4645          }
4646        }
4647      }
4648    
4649      private void finalizeINodeFileUnderConstruction(String src,
4650          INodeFile pendingFile, int latestSnapshot) throws IOException,
4651          UnresolvedLinkException {
4652        assert hasWriteLock();
4653    
4654        FileUnderConstructionFeature uc = pendingFile.getFileUnderConstructionFeature();
4655        Preconditions.checkArgument(uc != null);
4656        leaseManager.removeLease(uc.getClientName(), src);
4657        
4658        pendingFile.recordModification(latestSnapshot);
4659    
4660        // The file is no longer pending.
4661        // Create permanent INode, update blocks. No need to replace the inode here
4662        // since we just remove the uc feature from pendingFile
4663        final INodeFile newFile = pendingFile.toCompleteFile(now());
4664    
4665        waitForLoadingFSImage();
4666        // close file and persist block allocations for this file
4667        closeFile(src, newFile);
4668    
4669        blockManager.checkReplication(newFile);
4670      }
4671    
4672      @VisibleForTesting
4673      BlockInfo getStoredBlock(Block block) {
4674        return blockManager.getStoredBlock(block);
4675      }
4676      
4677      @Override
4678      public boolean isInSnapshot(BlockInfoUnderConstruction blockUC) {
4679        assert hasReadLock();
4680        final BlockCollection bc = blockUC.getBlockCollection();
4681        if (bc == null || !(bc instanceof INodeFile)
4682            || !bc.isUnderConstruction()) {
4683          return false;
4684        }
4685    
4686        INodeFile inodeUC = (INodeFile) bc;
4687        String fullName = inodeUC.getName();
4688        try {
4689          if (fullName != null && fullName.startsWith(Path.SEPARATOR)
4690              && dir.getINode(fullName) == inodeUC) {
4691            // If file exists in normal path then no need to look in snapshot
4692            return false;
4693          }
4694        } catch (UnresolvedLinkException e) {
4695          LOG.error("Error while resolving the link : " + fullName, e);
4696          return false;
4697        }
4698        /*
4699         * 1. if bc is an instance of INodeFileUnderConstructionWithSnapshot, and
4700         * bc is not in the current fsdirectory tree, bc must represent a snapshot
4701         * file. 
4702         * 2. if fullName is not an absolute path, bc cannot be existent in the 
4703         * current fsdirectory tree. 
4704         * 3. if bc is not the current node associated with fullName, bc must be a
4705         * snapshot inode.
4706         */
4707        return true;
4708      }
4709    
4710      void commitBlockSynchronization(ExtendedBlock lastblock,
4711          long newgenerationstamp, long newlength,
4712          boolean closeFile, boolean deleteblock, DatanodeID[] newtargets,
4713          String[] newtargetstorages)
4714          throws IOException, UnresolvedLinkException {
4715        LOG.info("commitBlockSynchronization(lastblock=" + lastblock
4716                 + ", newgenerationstamp=" + newgenerationstamp
4717                 + ", newlength=" + newlength
4718                 + ", newtargets=" + Arrays.asList(newtargets)
4719                 + ", closeFile=" + closeFile
4720                 + ", deleteBlock=" + deleteblock
4721                 + ")");
4722        checkOperation(OperationCategory.WRITE);
4723        String src = "";
4724        waitForLoadingFSImage();
4725        writeLock();
4726        try {
4727          checkOperation(OperationCategory.WRITE);
4728          // If a DN tries to commit to the standby, the recovery will
4729          // fail, and the next retry will succeed on the new NN.
4730      
4731          checkNameNodeSafeMode(
4732              "Cannot commitBlockSynchronization while in safe mode");
4733          final BlockInfo storedBlock = getStoredBlock(
4734              ExtendedBlock.getLocalBlock(lastblock));
4735          if (storedBlock == null) {
4736            if (deleteblock) {
4737              // This may be a retry attempt so ignore the failure
4738              // to locate the block.
4739              if (LOG.isDebugEnabled()) {
4740                LOG.debug("Block (=" + lastblock + ") not found");
4741              }
4742              return;
4743            } else {
4744              throw new IOException("Block (=" + lastblock + ") not found");
4745            }
4746          }
4747          //
4748          // The implementation of delete operation (see @deleteInternal method)
4749          // first removes the file paths from namespace, and delays the removal
4750          // of blocks to later time for better performance. When
4751          // commitBlockSynchronization (this method) is called in between, the
4752          // blockCollection of storedBlock could have been assigned to null by
4753          // the delete operation, throw IOException here instead of NPE; if the
4754          // file path is already removed from namespace by the delete operation,
4755          // throw FileNotFoundException here, so not to proceed to the end of
4756          // this method to add a CloseOp to the edit log for an already deleted
4757          // file (See HDFS-6825).
4758          //
4759          BlockCollection blockCollection = storedBlock.getBlockCollection();
4760          if (blockCollection == null) {
4761            throw new IOException("The blockCollection of " + storedBlock
4762                + " is null, likely because the file owning this block was"
4763                + " deleted and the block removal is delayed");
4764          }
4765          INodeFile iFile = ((INode)blockCollection).asFile();
4766          if (isFileDeleted(iFile)) {
4767            throw new FileNotFoundException("File not found: "
4768                + iFile.getFullPathName() + ", likely due to delayed block"
4769                + " removal");
4770          }
4771          if (!iFile.isUnderConstruction() || storedBlock.isComplete()) {
4772            if (LOG.isDebugEnabled()) {
4773              LOG.debug("Unexpected block (=" + lastblock
4774                        + ") since the file (=" + iFile.getLocalName()
4775                        + ") is not under construction");
4776            }
4777            return;
4778          }
4779    
4780          long recoveryId =
4781            ((BlockInfoUnderConstruction)storedBlock).getBlockRecoveryId();
4782          if(recoveryId != newgenerationstamp) {
4783            throw new IOException("The recovery id " + newgenerationstamp
4784                                  + " does not match current recovery id "
4785                                  + recoveryId + " for block " + lastblock); 
4786          }
4787    
4788          if (deleteblock) {
4789            Block blockToDel = ExtendedBlock.getLocalBlock(lastblock);
4790            boolean remove = iFile.removeLastBlock(blockToDel);
4791            if (remove) {
4792              blockManager.removeBlockFromMap(storedBlock);
4793            }
4794          }
4795          else {
4796            // update last block
4797            storedBlock.setGenerationStamp(newgenerationstamp);
4798            storedBlock.setNumBytes(newlength);
4799    
4800            // find the DatanodeDescriptor objects
4801            // There should be no locations in the blockManager till now because the
4802            // file is underConstruction
4803            ArrayList<DatanodeDescriptor> trimmedTargets =
4804                new ArrayList<DatanodeDescriptor>(newtargets.length);
4805            ArrayList<String> trimmedStorages =
4806                new ArrayList<String>(newtargets.length);
4807            if (newtargets.length > 0) {
4808              for (int i = 0; i < newtargets.length; ++i) {
4809                // try to get targetNode
4810                DatanodeDescriptor targetNode =
4811                    blockManager.getDatanodeManager().getDatanode(newtargets[i]);
4812                if (targetNode != null) {
4813                  trimmedTargets.add(targetNode);
4814                  trimmedStorages.add(newtargetstorages[i]);
4815                } else if (LOG.isDebugEnabled()) {
4816                  LOG.debug("DatanodeDescriptor (=" + newtargets[i] + ") not found");
4817                }
4818              }
4819            }
4820            if ((closeFile) && !trimmedTargets.isEmpty()) {
4821              // the file is getting closed. Insert block locations into blockManager.
4822              // Otherwise fsck will report these blocks as MISSING, especially if the
4823              // blocksReceived from Datanodes take a long time to arrive.
4824              for (int i = 0; i < trimmedTargets.size(); i++) {
4825                DatanodeStorageInfo storageInfo =
4826                    trimmedTargets.get(i).getStorageInfo(trimmedStorages.get(i));
4827                if (storageInfo != null) {
4828                  storageInfo.addBlock(storedBlock);
4829                }
4830              }
4831            }
4832    
4833            // add pipeline locations into the INodeUnderConstruction
4834            DatanodeStorageInfo[] trimmedStorageInfos =
4835                blockManager.getDatanodeManager().getDatanodeStorageInfos(
4836                    trimmedTargets.toArray(new DatanodeID[trimmedTargets.size()]),
4837                    trimmedStorages.toArray(new String[trimmedStorages.size()]));
4838            iFile.setLastBlock(storedBlock, trimmedStorageInfos);
4839          }
4840    
4841          if (closeFile) {
4842            src = closeFileCommitBlocks(iFile, storedBlock);
4843          } else {
4844            // If this commit does not want to close the file, persist blocks
4845            src = iFile.getFullPathName();
4846            persistBlocks(src, iFile, false);
4847          }
4848        } finally {
4849          writeUnlock();
4850        }
4851        getEditLog().logSync();
4852        if (closeFile) {
4853          LOG.info("commitBlockSynchronization(newblock=" + lastblock
4854              + ", file=" + src
4855              + ", newgenerationstamp=" + newgenerationstamp
4856              + ", newlength=" + newlength
4857              + ", newtargets=" + Arrays.asList(newtargets) + ") successful");
4858        } else {
4859          LOG.info("commitBlockSynchronization(" + lastblock + ") successful");
4860        }
4861      }
4862    
4863      /**
4864       * @param pendingFile open file that needs to be closed
4865       * @param storedBlock last block
4866       * @return Path of the file that was closed.
4867       * @throws IOException on error
4868       */
4869      @VisibleForTesting
4870      String closeFileCommitBlocks(INodeFile pendingFile, BlockInfo storedBlock)
4871          throws IOException {
4872        String src = pendingFile.getFullPathName();
4873    
4874        // commit the last block and complete it if it has minimum replicas
4875        commitOrCompleteLastBlock(pendingFile, storedBlock);
4876    
4877        //remove lease, close file
4878        finalizeINodeFileUnderConstruction(src, pendingFile,
4879            Snapshot.findLatestSnapshot(pendingFile, Snapshot.CURRENT_STATE_ID));
4880    
4881        return src;
4882      }
4883    
4884      /**
4885       * Renew the lease(s) held by the given client
4886       */
4887      void renewLease(String holder) throws IOException {
4888        checkOperation(OperationCategory.WRITE);
4889        readLock();
4890        try {
4891          checkOperation(OperationCategory.WRITE);
4892          checkNameNodeSafeMode("Cannot renew lease for " + holder);
4893          leaseManager.renewLease(holder);
4894        } finally {
4895          readUnlock();
4896        }
4897      }
4898    
4899      /**
4900       * Get a partial listing of the indicated directory
4901       *
4902       * @param src the directory name
4903       * @param startAfter the name to start after
4904       * @param needLocation if blockLocations need to be returned
4905       * @return a partial listing starting after startAfter
4906       * 
4907       * @throws AccessControlException if access is denied
4908       * @throws UnresolvedLinkException if symbolic link is encountered
4909       * @throws IOException if other I/O error occurred
4910       */
4911      DirectoryListing getListing(String src, byte[] startAfter,
4912          boolean needLocation) 
4913          throws AccessControlException, UnresolvedLinkException, IOException {
4914        try {
4915          return getListingInt(src, startAfter, needLocation);
4916        } catch (AccessControlException e) {
4917          logAuditEvent(false, "listStatus", src);
4918          throw e;
4919        }
4920      }
4921    
4922      private DirectoryListing getListingInt(final String srcArg, byte[] startAfter,
4923          boolean needLocation)
4924        throws AccessControlException, UnresolvedLinkException, IOException {
4925        String src = srcArg;
4926        DirectoryListing dl;
4927        FSPermissionChecker pc = getPermissionChecker();
4928        checkOperation(OperationCategory.READ);
4929        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
4930        String startAfterString = new String(startAfter);
4931        readLock();
4932        try {
4933          checkOperation(OperationCategory.READ);
4934          src = resolvePath(src, pathComponents);
4935    
4936          // Get file name when startAfter is an INodePath
4937          if (FSDirectory.isReservedName(startAfterString)) {
4938            byte[][] startAfterComponents = FSDirectory
4939                .getPathComponentsForReservedPath(startAfterString);
4940            try {
4941              String tmp = FSDirectory.resolvePath(src, startAfterComponents, dir);
4942              byte[][] regularPath = INode.getPathComponents(tmp);
4943              startAfter = regularPath[regularPath.length - 1];
4944            } catch (IOException e) {
4945              // Possibly the inode is deleted
4946              throw new DirectoryListingStartAfterNotFoundException(
4947                  "Can't find startAfter " + startAfterString);
4948            }
4949          }
4950    
4951          boolean isSuperUser = true;
4952          if (isPermissionEnabled) {
4953            if (dir.isDir(src)) {
4954              checkPathAccess(pc, src, FsAction.READ_EXECUTE);
4955            } else {
4956              checkTraverse(pc, src);
4957            }
4958            isSuperUser = pc.isSuperUser();
4959          }
4960          logAuditEvent(true, "listStatus", srcArg);
4961          dl = dir.getListing(src, startAfter, needLocation, isSuperUser);
4962        } finally {
4963          readUnlock();
4964        }
4965        return dl;
4966      }
4967    
4968      /////////////////////////////////////////////////////////
4969      //
4970      // These methods are called by datanodes
4971      //
4972      /////////////////////////////////////////////////////////
4973      /**
4974       * Register Datanode.
4975       * <p>
4976       * The purpose of registration is to identify whether the new datanode
4977       * serves a new data storage, and will report new data block copies,
4978       * which the namenode was not aware of; or the datanode is a replacement
4979       * node for the data storage that was previously served by a different
4980       * or the same (in terms of host:port) datanode.
4981       * The data storages are distinguished by their storageIDs. When a new
4982       * data storage is reported the namenode issues a new unique storageID.
4983       * <p>
4984       * Finally, the namenode returns its namespaceID as the registrationID
4985       * for the datanodes. 
4986       * namespaceID is a persistent attribute of the name space.
4987       * The registrationID is checked every time the datanode is communicating
4988       * with the namenode. 
4989       * Datanodes with inappropriate registrationID are rejected.
4990       * If the namenode stops, and then restarts it can restore its 
4991       * namespaceID and will continue serving the datanodes that has previously
4992       * registered with the namenode without restarting the whole cluster.
4993       * 
4994       * @see org.apache.hadoop.hdfs.server.datanode.DataNode
4995       */
4996      void registerDatanode(DatanodeRegistration nodeReg) throws IOException {
4997        writeLock();
4998        try {
4999          getBlockManager().getDatanodeManager().registerDatanode(nodeReg);
5000          checkSafeMode();
5001        } finally {
5002          writeUnlock();
5003        }
5004      }
5005      
5006      /**
5007       * Get registrationID for datanodes based on the namespaceID.
5008       * 
5009       * @see #registerDatanode(DatanodeRegistration)
5010       * @return registration ID
5011       */
5012      String getRegistrationID() {
5013        return Storage.getRegistrationID(getFSImage().getStorage());
5014      }
5015    
5016      /**
5017       * The given node has reported in.  This method should:
5018       * 1) Record the heartbeat, so the datanode isn't timed out
5019       * 2) Adjust usage stats for future block allocation
5020       * 
5021       * If a substantial amount of time passed since the last datanode 
5022       * heartbeat then request an immediate block report.  
5023       * 
5024       * @return an array of datanode commands 
5025       * @throws IOException
5026       */
5027      HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg,
5028          StorageReport[] reports, long cacheCapacity, long cacheUsed,
5029          int xceiverCount, int xmitsInProgress, int failedVolumes)
5030            throws IOException {
5031        readLock();
5032        try {
5033          //get datanode commands
5034          final int maxTransfer = blockManager.getMaxReplicationStreams()
5035              - xmitsInProgress;
5036          DatanodeCommand[] cmds = blockManager.getDatanodeManager().handleHeartbeat(
5037              nodeReg, reports, blockPoolId, cacheCapacity, cacheUsed,
5038              xceiverCount, maxTransfer, failedVolumes);
5039          
5040          //create ha status
5041          final NNHAStatusHeartbeat haState = new NNHAStatusHeartbeat(
5042              haContext.getState().getServiceState(),
5043              getFSImage().getLastAppliedOrWrittenTxId());
5044    
5045          return new HeartbeatResponse(cmds, haState, rollingUpgradeInfo);
5046        } finally {
5047          readUnlock();
5048        }
5049      }
5050    
5051      /**
5052       * Returns whether or not there were available resources at the last check of
5053       * resources.
5054       *
5055       * @return true if there were sufficient resources available, false otherwise.
5056       */
5057      boolean nameNodeHasResourcesAvailable() {
5058        return hasResourcesAvailable;
5059      }
5060    
5061      /**
5062       * Perform resource checks and cache the results.
5063       */
5064      void checkAvailableResources() {
5065        Preconditions.checkState(nnResourceChecker != null,
5066            "nnResourceChecker not initialized");
5067        hasResourcesAvailable = nnResourceChecker.hasAvailableDiskSpace();
5068      }
5069    
5070      /**
5071       * Persist the block list for the inode.
5072       * @param path
5073       * @param file
5074       * @param logRetryCache
5075       */
5076      private void persistBlocks(String path, INodeFile file,
5077                                 boolean logRetryCache) {
5078        assert hasWriteLock();
5079        Preconditions.checkArgument(file.isUnderConstruction());
5080        getEditLog().logUpdateBlocks(path, file, logRetryCache);
5081        if(NameNode.stateChangeLog.isDebugEnabled()) {
5082          NameNode.stateChangeLog.debug("persistBlocks: " + path
5083                  + " with " + file.getBlocks().length + " blocks is persisted to" +
5084                  " the file system");
5085        }
5086      }
5087    
5088      void incrDeletedFileCount(long count) {
5089        NameNode.getNameNodeMetrics().incrFilesDeleted(count);
5090      }
5091    
5092      /**
5093       * Close file.
5094       * @param path
5095       * @param file
5096       */
5097      private void closeFile(String path, INodeFile file) {
5098        assert hasWriteLock();
5099        waitForLoadingFSImage();
5100        // file is closed
5101        getEditLog().logCloseFile(path, file);
5102        if (NameNode.stateChangeLog.isDebugEnabled()) {
5103          NameNode.stateChangeLog.debug("closeFile: "
5104                  +path+" with "+ file.getBlocks().length
5105                  +" blocks is persisted to the file system");
5106        }
5107      }
5108    
5109      /**
5110       * Add the given symbolic link to the fs. Record it in the edits log.
5111       */
5112      private INodeSymlink addSymlink(String path, String target,
5113                                      PermissionStatus dirPerms,
5114                                      boolean createParent, boolean logRetryCache)
5115          throws UnresolvedLinkException, FileAlreadyExistsException,
5116          QuotaExceededException, SnapshotAccessControlException, AclException {
5117        waitForLoadingFSImage();
5118    
5119        final long modTime = now();
5120        if (createParent) {
5121          final String parent = new Path(path).getParent().toString();
5122          if (!mkdirsRecursively(parent, dirPerms, true, modTime)) {
5123            return null;
5124          }
5125        }
5126        final String userName = dirPerms.getUserName();
5127        long id = allocateNewInodeId();
5128        INodeSymlink newNode = dir.addSymlink(id, path, target, modTime, modTime,
5129                new PermissionStatus(userName, null, FsPermission.getDefault()));
5130        if (newNode == null) {
5131          NameNode.stateChangeLog.info("addSymlink: failed to add " + path);
5132          return null;
5133        }
5134        getEditLog().logSymlink(path, target, modTime, modTime, newNode,
5135            logRetryCache);
5136    
5137        if(NameNode.stateChangeLog.isDebugEnabled()) {
5138          NameNode.stateChangeLog.debug("addSymlink: " + path + " is added");
5139        }
5140        return newNode;
5141      }
5142    
5143      /**
5144       * Periodically calls hasAvailableResources of NameNodeResourceChecker, and if
5145       * there are found to be insufficient resources available, causes the NN to
5146       * enter safe mode. If resources are later found to have returned to
5147       * acceptable levels, this daemon will cause the NN to exit safe mode.
5148       */
5149      class NameNodeResourceMonitor implements Runnable  {
5150        boolean shouldNNRmRun = true;
5151        @Override
5152        public void run () {
5153          try {
5154            while (fsRunning && shouldNNRmRun) {
5155              checkAvailableResources();
5156              if(!nameNodeHasResourcesAvailable()) {
5157                String lowResourcesMsg = "NameNode low on available disk space. ";
5158                if (!isInSafeMode()) {
5159                  FSNamesystem.LOG.warn(lowResourcesMsg + "Entering safe mode.");
5160                } else {
5161                  FSNamesystem.LOG.warn(lowResourcesMsg + "Already in safe mode.");
5162                }
5163                enterSafeMode(true);
5164              }
5165              try {
5166                Thread.sleep(resourceRecheckInterval);
5167              } catch (InterruptedException ie) {
5168                // Deliberately ignore
5169              }
5170            }
5171          } catch (Exception e) {
5172            FSNamesystem.LOG.error("Exception in NameNodeResourceMonitor: ", e);
5173          }
5174        }
5175    
5176        public void stopMonitor() {
5177          shouldNNRmRun = false;
5178        }
5179     }
5180    
5181      class NameNodeEditLogRoller implements Runnable {
5182    
5183        private boolean shouldRun = true;
5184        private final long rollThreshold;
5185        private final long sleepIntervalMs;
5186    
5187        public NameNodeEditLogRoller(long rollThreshold, int sleepIntervalMs) {
5188            this.rollThreshold = rollThreshold;
5189            this.sleepIntervalMs = sleepIntervalMs;
5190        }
5191    
5192        @Override
5193        public void run() {
5194          while (fsRunning && shouldRun) {
5195            try {
5196              FSEditLog editLog = getFSImage().getEditLog();
5197              long numEdits =
5198                  editLog.getLastWrittenTxId() - editLog.getCurSegmentTxId();
5199              if (numEdits > rollThreshold) {
5200                FSNamesystem.LOG.info("NameNode rolling its own edit log because"
5201                    + " number of edits in open segment exceeds threshold of "
5202                    + rollThreshold);
5203                rollEditLog();
5204              }
5205              Thread.sleep(sleepIntervalMs);
5206            } catch (InterruptedException e) {
5207              FSNamesystem.LOG.info(NameNodeEditLogRoller.class.getSimpleName()
5208                  + " was interrupted, exiting");
5209              break;
5210            } catch (Exception e) {
5211              FSNamesystem.LOG.error("Swallowing exception in "
5212                  + NameNodeEditLogRoller.class.getSimpleName() + ":", e);
5213            }
5214          }
5215        }
5216    
5217        public void stop() {
5218          shouldRun = false;
5219        }
5220      }
5221    
5222      /**
5223       * Daemon to periodically scan the namespace for lazyPersist files
5224       * with missing blocks and unlink them.
5225       */
5226      class LazyPersistFileScrubber implements Runnable {
5227        private volatile boolean shouldRun = true;
5228        final int scrubIntervalSec;
5229        public LazyPersistFileScrubber(final int scrubIntervalSec) {
5230          this.scrubIntervalSec = scrubIntervalSec;
5231        }
5232    
5233        /**
5234         * Periodically go over the list of lazyPersist files with missing
5235         * blocks and unlink them from the namespace.
5236         */
5237        private void clearCorruptLazyPersistFiles()
5238            throws SafeModeException, AccessControlException,
5239            UnresolvedLinkException, IOException {
5240    
5241          BlockStoragePolicy lpPolicy = blockManager.getStoragePolicy("LAZY_PERSIST");
5242    
5243          List<BlockCollection> filesToDelete = new ArrayList<BlockCollection>();
5244    
5245          writeLock();
5246    
5247          try {
5248            final Iterator<Block> it = blockManager.getCorruptReplicaBlockIterator();
5249    
5250            while (it.hasNext()) {
5251              Block b = it.next();
5252              BlockInfo blockInfo = blockManager.getStoredBlock(b);
5253              if (blockInfo.getBlockCollection().getStoragePolicyID() == lpPolicy.getId()) {
5254                filesToDelete.add(blockInfo.getBlockCollection());
5255              }
5256            }
5257    
5258            for (BlockCollection bc : filesToDelete) {
5259              LOG.warn("Removing lazyPersist file " + bc.getName() + " with no replicas.");
5260              deleteInternal(bc.getName(), false, false, false);
5261            }
5262          } finally {
5263            writeUnlock();
5264          }
5265        }
5266    
5267        @Override
5268        public void run() {
5269          while (fsRunning && shouldRun) {
5270            try {
5271              clearCorruptLazyPersistFiles();
5272              Thread.sleep(scrubIntervalSec * 1000);
5273            } catch (InterruptedException e) {
5274              FSNamesystem.LOG.info(
5275                  "LazyPersistFileScrubber was interrupted, exiting");
5276              break;
5277            } catch (Exception e) {
5278              FSNamesystem.LOG.error(
5279                  "Ignoring exception in LazyPersistFileScrubber:", e);
5280            }
5281          }
5282        }
5283    
5284        public void stop() {
5285          shouldRun = false;
5286        }
5287      }
5288    
5289      public FSImage getFSImage() {
5290        return fsImage;
5291      }
5292    
5293      public FSEditLog getEditLog() {
5294        return getFSImage().getEditLog();
5295      }    
5296    
5297      private void checkBlock(ExtendedBlock block) throws IOException {
5298        if (block != null && !this.blockPoolId.equals(block.getBlockPoolId())) {
5299          throw new IOException("Unexpected BlockPoolId " + block.getBlockPoolId()
5300              + " - expected " + blockPoolId);
5301        }
5302      }
5303    
5304      @Metric({"MissingBlocks", "Number of missing blocks"})
5305      public long getMissingBlocksCount() {
5306        // not locking
5307        return blockManager.getMissingBlocksCount();
5308      }
5309      
5310      @Metric({"ExpiredHeartbeats", "Number of expired heartbeats"})
5311      public int getExpiredHeartbeats() {
5312        return datanodeStatistics.getExpiredHeartbeats();
5313      }
5314      
5315      @Metric({"TransactionsSinceLastCheckpoint",
5316          "Number of transactions since last checkpoint"})
5317      public long getTransactionsSinceLastCheckpoint() {
5318        return getEditLog().getLastWrittenTxId() -
5319            getFSImage().getStorage().getMostRecentCheckpointTxId();
5320      }
5321      
5322      @Metric({"TransactionsSinceLastLogRoll",
5323          "Number of transactions since last edit log roll"})
5324      public long getTransactionsSinceLastLogRoll() {
5325        if (isInStandbyState() || !getEditLog().isSegmentOpen()) {
5326          return 0;
5327        } else {
5328          return getEditLog().getLastWrittenTxId() -
5329            getEditLog().getCurSegmentTxId() + 1;
5330        }
5331      }
5332      
5333      @Metric({"LastWrittenTransactionId", "Transaction ID written to the edit log"})
5334      public long getLastWrittenTransactionId() {
5335        return getEditLog().getLastWrittenTxId();
5336      }
5337      
5338      @Metric({"LastCheckpointTime",
5339          "Time in milliseconds since the epoch of the last checkpoint"})
5340      public long getLastCheckpointTime() {
5341        return getFSImage().getStorage().getMostRecentCheckpointTime();
5342      }
5343    
5344      /** @see ClientProtocol#getStats() */
5345      long[] getStats() {
5346        final long[] stats = datanodeStatistics.getStats();
5347        stats[ClientProtocol.GET_STATS_UNDER_REPLICATED_IDX] = getUnderReplicatedBlocks();
5348        stats[ClientProtocol.GET_STATS_CORRUPT_BLOCKS_IDX] = getCorruptReplicaBlocks();
5349        stats[ClientProtocol.GET_STATS_MISSING_BLOCKS_IDX] = getMissingBlocksCount();
5350        return stats;
5351      }
5352    
5353      @Override // FSNamesystemMBean
5354      @Metric({"CapacityTotal",
5355          "Total raw capacity of data nodes in bytes"})
5356      public long getCapacityTotal() {
5357        return datanodeStatistics.getCapacityTotal();
5358      }
5359    
5360      @Metric({"CapacityTotalGB",
5361          "Total raw capacity of data nodes in GB"})
5362      public float getCapacityTotalGB() {
5363        return DFSUtil.roundBytesToGB(getCapacityTotal());
5364      }
5365    
5366      @Override // FSNamesystemMBean
5367      @Metric({"CapacityUsed",
5368          "Total used capacity across all data nodes in bytes"})
5369      public long getCapacityUsed() {
5370        return datanodeStatistics.getCapacityUsed();
5371      }
5372    
5373      @Metric({"CapacityUsedGB",
5374          "Total used capacity across all data nodes in GB"})
5375      public float getCapacityUsedGB() {
5376        return DFSUtil.roundBytesToGB(getCapacityUsed());
5377      }
5378    
5379      @Override // FSNamesystemMBean
5380      @Metric({"CapacityRemaining", "Remaining capacity in bytes"})
5381      public long getCapacityRemaining() {
5382        return datanodeStatistics.getCapacityRemaining();
5383      }
5384    
5385      @Metric({"CapacityRemainingGB", "Remaining capacity in GB"})
5386      public float getCapacityRemainingGB() {
5387        return DFSUtil.roundBytesToGB(getCapacityRemaining());
5388      }
5389    
5390      @Metric({"CapacityUsedNonDFS",
5391          "Total space used by data nodes for non DFS purposes in bytes"})
5392      public long getCapacityUsedNonDFS() {
5393        return datanodeStatistics.getCapacityUsedNonDFS();
5394      }
5395    
5396      /**
5397       * Total number of connections.
5398       */
5399      @Override // FSNamesystemMBean
5400      @Metric
5401      public int getTotalLoad() {
5402        return datanodeStatistics.getXceiverCount();
5403      }
5404      
5405      @Metric({ "SnapshottableDirectories", "Number of snapshottable directories" })
5406      public int getNumSnapshottableDirs() {
5407        return this.snapshotManager.getNumSnapshottableDirs();
5408      }
5409    
5410      @Metric({ "Snapshots", "The number of snapshots" })
5411      public int getNumSnapshots() {
5412        return this.snapshotManager.getNumSnapshots();
5413      }
5414    
5415      @Override
5416      public String getSnapshotStats() {
5417        Map<String, Object> info = new HashMap<String, Object>();
5418        info.put("SnapshottableDirectories", this.getNumSnapshottableDirs());
5419        info.put("Snapshots", this.getNumSnapshots());
5420        return JSON.toString(info);
5421      }
5422    
5423      int getNumberOfDatanodes(DatanodeReportType type) {
5424        readLock();
5425        try {
5426          return getBlockManager().getDatanodeManager().getDatanodeListForReport(
5427              type).size(); 
5428        } finally {
5429          readUnlock();
5430        }
5431      }
5432    
5433      DatanodeInfo[] datanodeReport(final DatanodeReportType type
5434          ) throws AccessControlException, StandbyException {
5435        checkSuperuserPrivilege();
5436        checkOperation(OperationCategory.UNCHECKED);
5437        readLock();
5438        try {
5439          checkOperation(OperationCategory.UNCHECKED);
5440          final DatanodeManager dm = getBlockManager().getDatanodeManager();      
5441          final List<DatanodeDescriptor> results = dm.getDatanodeListForReport(type);
5442    
5443          DatanodeInfo[] arr = new DatanodeInfo[results.size()];
5444          for (int i=0; i<arr.length; i++) {
5445            arr[i] = new DatanodeInfo(results.get(i));
5446          }
5447          return arr;
5448        } finally {
5449          readUnlock();
5450        }
5451      }
5452    
5453      DatanodeStorageReport[] getDatanodeStorageReport(final DatanodeReportType type
5454          ) throws AccessControlException, StandbyException {
5455        checkSuperuserPrivilege();
5456        checkOperation(OperationCategory.UNCHECKED);
5457        readLock();
5458        try {
5459          checkOperation(OperationCategory.UNCHECKED);
5460          final DatanodeManager dm = getBlockManager().getDatanodeManager();      
5461          final List<DatanodeDescriptor> datanodes = dm.getDatanodeListForReport(type);
5462    
5463          DatanodeStorageReport[] reports = new DatanodeStorageReport[datanodes.size()];
5464          for (int i = 0; i < reports.length; i++) {
5465            final DatanodeDescriptor d = datanodes.get(i);
5466            reports[i] = new DatanodeStorageReport(new DatanodeInfo(d),
5467                d.getStorageReports());
5468          }
5469          return reports;
5470        } finally {
5471          readUnlock();
5472        }
5473      }
5474    
5475      /**
5476       * Save namespace image.
5477       * This will save current namespace into fsimage file and empty edits file.
5478       * Requires superuser privilege and safe mode.
5479       * 
5480       * @throws AccessControlException if superuser privilege is violated.
5481       * @throws IOException if 
5482       */
5483      void saveNamespace() throws AccessControlException, IOException {
5484        checkOperation(OperationCategory.UNCHECKED);
5485        checkSuperuserPrivilege();
5486        
5487        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
5488        if (cacheEntry != null && cacheEntry.isSuccess()) {
5489          return; // Return previous response
5490        }
5491        boolean success = false;
5492        readLock();
5493        try {
5494          checkOperation(OperationCategory.UNCHECKED);
5495    
5496          if (!isInSafeMode()) {
5497            throw new IOException("Safe mode should be turned ON "
5498                + "in order to create namespace image.");
5499          }
5500          getFSImage().saveNamespace(this);
5501          success = true;
5502        } finally {
5503          readUnlock();
5504          RetryCache.setState(cacheEntry, success);
5505        }
5506        LOG.info("New namespace image has been created");
5507      }
5508      
5509      /**
5510       * Enables/Disables/Checks restoring failed storage replicas if the storage becomes available again.
5511       * Requires superuser privilege.
5512       * 
5513       * @throws AccessControlException if superuser privilege is violated.
5514       */
5515      boolean restoreFailedStorage(String arg) throws AccessControlException,
5516          StandbyException {
5517        checkSuperuserPrivilege();
5518        checkOperation(OperationCategory.UNCHECKED);
5519        writeLock();
5520        try {
5521          checkOperation(OperationCategory.UNCHECKED);
5522          
5523          // if it is disabled - enable it and vice versa.
5524          if(arg.equals("check"))
5525            return getFSImage().getStorage().getRestoreFailedStorage();
5526          
5527          boolean val = arg.equals("true");  // false if not
5528          getFSImage().getStorage().setRestoreFailedStorage(val);
5529          
5530          return val;
5531        } finally {
5532          writeUnlock();
5533        }
5534      }
5535    
5536      Date getStartTime() {
5537        return new Date(startTime); 
5538      }
5539        
5540      void finalizeUpgrade() throws IOException {
5541        checkSuperuserPrivilege();
5542        checkOperation(OperationCategory.UNCHECKED);
5543        writeLock();
5544        try {
5545          checkOperation(OperationCategory.UNCHECKED);
5546          getFSImage().finalizeUpgrade(this.isHaEnabled() && inActiveState());
5547        } finally {
5548          writeUnlock();
5549        }
5550      }
5551    
5552      void refreshNodes() throws IOException {
5553        checkOperation(OperationCategory.UNCHECKED);
5554        checkSuperuserPrivilege();
5555        getBlockManager().getDatanodeManager().refreshNodes(new HdfsConfiguration());
5556      }
5557    
5558      void setBalancerBandwidth(long bandwidth) throws IOException {
5559        checkOperation(OperationCategory.UNCHECKED);
5560        checkSuperuserPrivilege();
5561        getBlockManager().getDatanodeManager().setBalancerBandwidth(bandwidth);
5562      }
5563    
5564      /**
5565       * Persist the new block (the last block of the given file).
5566       * @param path
5567       * @param file
5568       */
5569      private void persistNewBlock(String path, INodeFile file) {
5570        Preconditions.checkArgument(file.isUnderConstruction());
5571        getEditLog().logAddBlock(path, file);
5572        if (NameNode.stateChangeLog.isDebugEnabled()) {
5573          NameNode.stateChangeLog.debug("persistNewBlock: "
5574                  + path + " with new block " + file.getLastBlock().toString()
5575                  + ", current total block count is " + file.getBlocks().length);
5576        }
5577      }
5578    
5579      /**
5580       * SafeModeInfo contains information related to the safe mode.
5581       * <p>
5582       * An instance of {@link SafeModeInfo} is created when the name node
5583       * enters safe mode.
5584       * <p>
5585       * During name node startup {@link SafeModeInfo} counts the number of
5586       * <em>safe blocks</em>, those that have at least the minimal number of
5587       * replicas, and calculates the ratio of safe blocks to the total number
5588       * of blocks in the system, which is the size of blocks in
5589       * {@link FSNamesystem#blockManager}. When the ratio reaches the
5590       * {@link #threshold} it starts the SafeModeMonitor daemon in order
5591       * to monitor whether the safe mode {@link #extension} is passed.
5592       * Then it leaves safe mode and destroys itself.
5593       * <p>
5594       * If safe mode is turned on manually then the number of safe blocks is
5595       * not tracked because the name node is not intended to leave safe mode
5596       * automatically in the case.
5597       *
5598       * @see ClientProtocol#setSafeMode(HdfsConstants.SafeModeAction, boolean)
5599       */
5600      public class SafeModeInfo {
5601        // configuration fields
5602        /** Safe mode threshold condition %.*/
5603        private final double threshold;
5604        /** Safe mode minimum number of datanodes alive */
5605        private final int datanodeThreshold;
5606        /**
5607         * Safe mode extension after the threshold.
5608         * Make it volatile so that getSafeModeTip can read the latest value
5609         * without taking a lock.
5610         */
5611        private volatile int extension;
5612        /** Min replication required by safe mode. */
5613        private final int safeReplication;
5614        /** threshold for populating needed replication queues */
5615        private final double replQueueThreshold;
5616        // internal fields
5617        /** Time when threshold was reached.
5618         * <br> -1 safe mode is off
5619         * <br> 0 safe mode is on, and threshold is not reached yet
5620         * <br> >0 safe mode is on, but we are in extension period 
5621         */
5622        private long reached = -1;  
5623        /** Total number of blocks. */
5624        int blockTotal; 
5625        /** Number of safe blocks. */
5626        int blockSafe;
5627        /** Number of blocks needed to satisfy safe mode threshold condition */
5628        private int blockThreshold;
5629        /** Number of blocks needed before populating replication queues */
5630        private int blockReplQueueThreshold;
5631        /** time of the last status printout */
5632        private long lastStatusReport = 0;
5633        /**
5634         * Was safemode entered automatically because available resources were low.
5635         * Make it volatile so that getSafeModeTip can read the latest value
5636         * without taking a lock.
5637         */
5638        private volatile boolean resourcesLow = false;
5639        /** Should safemode adjust its block totals as blocks come in */
5640        private boolean shouldIncrementallyTrackBlocks = false;
5641        /** counter for tracking startup progress of reported blocks */
5642        private Counter awaitingReportedBlocksCounter;
5643        
5644        /**
5645         * Creates SafeModeInfo when the name node enters
5646         * automatic safe mode at startup.
5647         *  
5648         * @param conf configuration
5649         */
5650        private SafeModeInfo(Configuration conf) {
5651          this.threshold = conf.getFloat(DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY,
5652              DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT);
5653          if(threshold > 1.0) {
5654            LOG.warn("The threshold value should't be greater than 1, threshold: " + threshold);
5655          }
5656          this.datanodeThreshold = conf.getInt(
5657            DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY,
5658            DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT);
5659          this.extension = conf.getInt(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 0);
5660          this.safeReplication = conf.getInt(DFS_NAMENODE_REPLICATION_MIN_KEY, 
5661                                             DFS_NAMENODE_REPLICATION_MIN_DEFAULT);
5662          
5663          LOG.info(DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY + " = " + threshold);
5664          LOG.info(DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY + " = " + datanodeThreshold);
5665          LOG.info(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY + "     = " + extension);
5666    
5667          // default to safe mode threshold (i.e., don't populate queues before leaving safe mode)
5668          this.replQueueThreshold = 
5669            conf.getFloat(DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY,
5670                          (float) threshold);
5671          this.blockTotal = 0; 
5672          this.blockSafe = 0;
5673        }
5674    
5675        /**
5676         * In the HA case, the StandbyNode can be in safemode while the namespace
5677         * is modified by the edit log tailer. In this case, the number of total
5678         * blocks changes as edits are processed (eg blocks are added and deleted).
5679         * However, we don't want to do the incremental tracking during the
5680         * startup-time loading process -- only once the initial total has been
5681         * set after the image has been loaded.
5682         */
5683        private boolean shouldIncrementallyTrackBlocks() {
5684          return shouldIncrementallyTrackBlocks;
5685        }
5686    
5687        /**
5688         * Creates SafeModeInfo when safe mode is entered manually, or because
5689         * available resources are low.
5690         *
5691         * The {@link #threshold} is set to 1.5 so that it could never be reached.
5692         * {@link #blockTotal} is set to -1 to indicate that safe mode is manual.
5693         * 
5694         * @see SafeModeInfo
5695         */
5696        private SafeModeInfo(boolean resourcesLow) {
5697          this.threshold = 1.5f;  // this threshold can never be reached
5698          this.datanodeThreshold = Integer.MAX_VALUE;
5699          this.extension = Integer.MAX_VALUE;
5700          this.safeReplication = Short.MAX_VALUE + 1; // more than maxReplication
5701          this.replQueueThreshold = 1.5f; // can never be reached
5702          this.blockTotal = -1;
5703          this.blockSafe = -1;
5704          this.resourcesLow = resourcesLow;
5705          enter();
5706          reportStatus("STATE* Safe mode is ON.", true);
5707        }
5708          
5709        /**
5710         * Check if safe mode is on.
5711         * @return true if in safe mode
5712         */
5713        private synchronized boolean isOn() {
5714          doConsistencyCheck();
5715          return this.reached >= 0;
5716        }
5717          
5718        /**
5719         * Enter safe mode.
5720         */
5721        private void enter() {
5722          this.reached = 0;
5723        }
5724          
5725        /**
5726         * Leave safe mode.
5727         * <p>
5728         * Check for invalid, under- & over-replicated blocks in the end of startup.
5729         */
5730        private synchronized void leave() {
5731          // if not done yet, initialize replication queues.
5732          // In the standby, do not populate repl queues
5733          if (!isPopulatingReplQueues() && shouldPopulateReplQueues()) {
5734            initializeReplQueues();
5735          }
5736          long timeInSafemode = now() - startTime;
5737          NameNode.stateChangeLog.info("STATE* Leaving safe mode after " 
5738                                        + timeInSafemode/1000 + " secs");
5739          NameNode.getNameNodeMetrics().setSafeModeTime((int) timeInSafemode);
5740    
5741          //Log the following only once (when transitioning from ON -> OFF)
5742          if (reached >= 0) {
5743            NameNode.stateChangeLog.info("STATE* Safe mode is OFF"); 
5744          }
5745          reached = -1;
5746          safeMode = null;
5747          final NetworkTopology nt = blockManager.getDatanodeManager().getNetworkTopology();
5748          NameNode.stateChangeLog.info("STATE* Network topology has "
5749              + nt.getNumOfRacks() + " racks and "
5750              + nt.getNumOfLeaves() + " datanodes");
5751          NameNode.stateChangeLog.info("STATE* UnderReplicatedBlocks has "
5752              + blockManager.numOfUnderReplicatedBlocks() + " blocks");
5753    
5754          startSecretManagerIfNecessary();
5755    
5756          // If startup has not yet completed, end safemode phase.
5757          StartupProgress prog = NameNode.getStartupProgress();
5758          if (prog.getStatus(Phase.SAFEMODE) != Status.COMPLETE) {
5759            prog.endStep(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS);
5760            prog.endPhase(Phase.SAFEMODE);
5761          }
5762        }
5763    
5764        /**
5765         * Check whether we have reached the threshold for 
5766         * initializing replication queues.
5767         */
5768        private synchronized boolean canInitializeReplQueues() {
5769          return shouldPopulateReplQueues()
5770              && blockSafe >= blockReplQueueThreshold;
5771        }
5772          
5773        /** 
5774         * Safe mode can be turned off iff 
5775         * the threshold is reached and 
5776         * the extension time have passed.
5777         * @return true if can leave or false otherwise.
5778         */
5779        private synchronized boolean canLeave() {
5780          if (reached == 0) {
5781            return false;
5782          }
5783    
5784          if (now() - reached < extension) {
5785            reportStatus("STATE* Safe mode ON, in safe mode extension.", false);
5786            return false;
5787          }
5788    
5789          if (needEnter()) {
5790            reportStatus("STATE* Safe mode ON, thresholds not met.", false);
5791            return false;
5792          }
5793    
5794          return true;
5795        }
5796          
5797        /** 
5798         * There is no need to enter safe mode 
5799         * if DFS is empty or {@link #threshold} == 0
5800         */
5801        private boolean needEnter() {
5802          return (threshold != 0 && blockSafe < blockThreshold) ||
5803            (datanodeThreshold != 0 && getNumLiveDataNodes() < datanodeThreshold) ||
5804            (!nameNodeHasResourcesAvailable());
5805        }
5806          
5807        /**
5808         * Check and trigger safe mode if needed. 
5809         */
5810        private void checkMode() {
5811          // Have to have write-lock since leaving safemode initializes
5812          // repl queues, which requires write lock
5813          assert hasWriteLock();
5814          if (inTransitionToActive()) {
5815            return;
5816          }
5817          // if smmthread is already running, the block threshold must have been 
5818          // reached before, there is no need to enter the safe mode again
5819          if (smmthread == null && needEnter()) {
5820            enter();
5821            // check if we are ready to initialize replication queues
5822            if (canInitializeReplQueues() && !isPopulatingReplQueues()
5823                && !haEnabled) {
5824              initializeReplQueues();
5825            }
5826            reportStatus("STATE* Safe mode ON.", false);
5827            return;
5828          }
5829          // the threshold is reached or was reached before
5830          if (!isOn() ||                           // safe mode is off
5831              extension <= 0 || threshold <= 0) {  // don't need to wait
5832            this.leave(); // leave safe mode
5833            return;
5834          }
5835          if (reached > 0) {  // threshold has already been reached before
5836            reportStatus("STATE* Safe mode ON.", false);
5837            return;
5838          }
5839          // start monitor
5840          reached = now();
5841          if (smmthread == null) {
5842            smmthread = new Daemon(new SafeModeMonitor());
5843            smmthread.start();
5844            reportStatus("STATE* Safe mode extension entered.", true);
5845          }
5846    
5847          // check if we are ready to initialize replication queues
5848          if (canInitializeReplQueues() && !isPopulatingReplQueues() && !haEnabled) {
5849            initializeReplQueues();
5850          }
5851        }
5852          
5853        /**
5854         * Set total number of blocks.
5855         */
5856        private synchronized void setBlockTotal(int total) {
5857          this.blockTotal = total;
5858          this.blockThreshold = (int) (blockTotal * threshold);
5859          this.blockReplQueueThreshold = 
5860            (int) (blockTotal * replQueueThreshold);
5861          if (haEnabled) {
5862            // After we initialize the block count, any further namespace
5863            // modifications done while in safe mode need to keep track
5864            // of the number of total blocks in the system.
5865            this.shouldIncrementallyTrackBlocks = true;
5866          }
5867          if(blockSafe < 0)
5868            this.blockSafe = 0;
5869          checkMode();
5870        }
5871          
5872        /**
5873         * Increment number of safe blocks if current block has 
5874         * reached minimal replication.
5875         * @param replication current replication 
5876         */
5877        private synchronized void incrementSafeBlockCount(short replication) {
5878          if (replication == safeReplication) {
5879            this.blockSafe++;
5880    
5881            // Report startup progress only if we haven't completed startup yet.
5882            StartupProgress prog = NameNode.getStartupProgress();
5883            if (prog.getStatus(Phase.SAFEMODE) != Status.COMPLETE) {
5884              if (this.awaitingReportedBlocksCounter == null) {
5885                this.awaitingReportedBlocksCounter = prog.getCounter(Phase.SAFEMODE,
5886                  STEP_AWAITING_REPORTED_BLOCKS);
5887              }
5888              this.awaitingReportedBlocksCounter.increment();
5889            }
5890    
5891            checkMode();
5892          }
5893        }
5894          
5895        /**
5896         * Decrement number of safe blocks if current block has 
5897         * fallen below minimal replication.
5898         * @param replication current replication 
5899         */
5900        private synchronized void decrementSafeBlockCount(short replication) {
5901          if (replication == safeReplication-1) {
5902            this.blockSafe--;
5903            //blockSafe is set to -1 in manual / low resources safemode
5904            assert blockSafe >= 0 || isManual() || areResourcesLow();
5905            checkMode();
5906          }
5907        }
5908    
5909        /**
5910         * Check if safe mode was entered manually
5911         */
5912        private boolean isManual() {
5913          return extension == Integer.MAX_VALUE;
5914        }
5915    
5916        /**
5917         * Set manual safe mode.
5918         */
5919        private synchronized void setManual() {
5920          extension = Integer.MAX_VALUE;
5921        }
5922    
5923        /**
5924         * Check if safe mode was entered due to resources being low.
5925         */
5926        private boolean areResourcesLow() {
5927          return resourcesLow;
5928        }
5929    
5930        /**
5931         * Set that resources are low for this instance of safe mode.
5932         */
5933        private void setResourcesLow() {
5934          resourcesLow = true;
5935        }
5936    
5937        /**
5938         * A tip on how safe mode is to be turned off: manually or automatically.
5939         */
5940        String getTurnOffTip() {
5941          if(!isOn()) {
5942            return "Safe mode is OFF.";
5943          }
5944    
5945          //Manual OR low-resource safemode. (Admin intervention required)
5946          String adminMsg = "It was turned on manually. ";
5947          if (areResourcesLow()) {
5948            adminMsg = "Resources are low on NN. Please add or free up more "
5949              + "resources then turn off safe mode manually. NOTE:  If you turn off"
5950              + " safe mode before adding resources, "
5951              + "the NN will immediately return to safe mode. ";
5952          }
5953          if (isManual() || areResourcesLow()) {
5954            return adminMsg
5955              + "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off.";
5956          }
5957    
5958          boolean thresholdsMet = true;
5959          int numLive = getNumLiveDataNodes();
5960          String msg = "";
5961          if (blockSafe < blockThreshold) {
5962            msg += String.format(
5963              "The reported blocks %d needs additional %d"
5964              + " blocks to reach the threshold %.4f of total blocks %d.%n",
5965              blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal);
5966            thresholdsMet = false;
5967          } else {
5968            msg += String.format("The reported blocks %d has reached the threshold"
5969                + " %.4f of total blocks %d. ", blockSafe, threshold, blockTotal);
5970          }
5971          if (numLive < datanodeThreshold) {
5972            msg += String.format(
5973              "The number of live datanodes %d needs an additional %d live "
5974              + "datanodes to reach the minimum number %d.%n",
5975              numLive, (datanodeThreshold - numLive), datanodeThreshold);
5976            thresholdsMet = false;
5977          } else {
5978            msg += String.format("The number of live datanodes %d has reached "
5979                + "the minimum number %d. ",
5980                numLive, datanodeThreshold);
5981          }
5982          msg += (reached > 0) ? "In safe mode extension. " : "";
5983          msg += "Safe mode will be turned off automatically ";
5984    
5985          if (!thresholdsMet) {
5986            msg += "once the thresholds have been reached.";
5987          } else if (reached + extension - now() > 0) {
5988            msg += ("in " + (reached + extension - now()) / 1000 + " seconds.");
5989          } else {
5990            msg += "soon.";
5991          }
5992    
5993          return msg;
5994        }
5995    
5996        /**
5997         * Print status every 20 seconds.
5998         */
5999        private void reportStatus(String msg, boolean rightNow) {
6000          long curTime = now();
6001          if(!rightNow && (curTime - lastStatusReport < 20 * 1000))
6002            return;
6003          NameNode.stateChangeLog.info(msg + " \n" + getTurnOffTip());
6004          lastStatusReport = curTime;
6005        }
6006    
6007        @Override
6008        public String toString() {
6009          String resText = "Current safe blocks = " 
6010            + blockSafe 
6011            + ". Target blocks = " + blockThreshold + " for threshold = %" + threshold
6012            + ". Minimal replication = " + safeReplication + ".";
6013          if (reached > 0) 
6014            resText += " Threshold was reached " + new Date(reached) + ".";
6015          return resText;
6016        }
6017          
6018        /**
6019         * Checks consistency of the class state.
6020         * This is costly so only runs if asserts are enabled.
6021         */
6022        private void doConsistencyCheck() {
6023          boolean assertsOn = false;
6024          assert assertsOn = true; // set to true if asserts are on
6025          if (!assertsOn) return;
6026          
6027          if (blockTotal == -1 && blockSafe == -1) {
6028            return; // manual safe mode
6029          }
6030          int activeBlocks = blockManager.getActiveBlockCount();
6031          if ((blockTotal != activeBlocks) &&
6032              !(blockSafe >= 0 && blockSafe <= blockTotal)) {
6033            throw new AssertionError(
6034                " SafeMode: Inconsistent filesystem state: "
6035            + "SafeMode data: blockTotal=" + blockTotal
6036            + " blockSafe=" + blockSafe + "; "
6037            + "BlockManager data: active="  + activeBlocks);
6038          }
6039        }
6040    
6041        private synchronized void adjustBlockTotals(int deltaSafe, int deltaTotal) {
6042          if (!shouldIncrementallyTrackBlocks) {
6043            return;
6044          }
6045          assert haEnabled;
6046          
6047          if (LOG.isDebugEnabled()) {
6048            LOG.debug("Adjusting block totals from " +
6049                blockSafe + "/" + blockTotal + " to " +
6050                (blockSafe + deltaSafe) + "/" + (blockTotal + deltaTotal));
6051          }
6052          assert blockSafe + deltaSafe >= 0 : "Can't reduce blockSafe " +
6053            blockSafe + " by " + deltaSafe + ": would be negative";
6054          assert blockTotal + deltaTotal >= 0 : "Can't reduce blockTotal " +
6055            blockTotal + " by " + deltaTotal + ": would be negative";
6056          
6057          blockSafe += deltaSafe;
6058          setBlockTotal(blockTotal + deltaTotal);
6059        }
6060      }
6061        
6062      /**
6063       * Periodically check whether it is time to leave safe mode.
6064       * This thread starts when the threshold level is reached.
6065       *
6066       */
6067      class SafeModeMonitor implements Runnable {
6068        /** interval in msec for checking safe mode: {@value} */
6069        private static final long recheckInterval = 1000;
6070          
6071        /**
6072         */
6073        @Override
6074        public void run() {
6075          while (fsRunning) {
6076            writeLock();
6077            try {
6078              if (safeMode == null) { // Not in safe mode.
6079                break;
6080              }
6081              if (safeMode.canLeave()) {
6082                // Leave safe mode.
6083                safeMode.leave();
6084                smmthread = null;
6085                break;
6086              }
6087            } finally {
6088              writeUnlock();
6089            }
6090    
6091            try {
6092              Thread.sleep(recheckInterval);
6093            } catch (InterruptedException ie) {
6094              // Ignored
6095            }
6096          }
6097          if (!fsRunning) {
6098            LOG.info("NameNode is being shutdown, exit SafeModeMonitor thread");
6099          }
6100        }
6101      }
6102        
6103      boolean setSafeMode(SafeModeAction action) throws IOException {
6104        if (action != SafeModeAction.SAFEMODE_GET) {
6105          checkSuperuserPrivilege();
6106          switch(action) {
6107          case SAFEMODE_LEAVE: // leave safe mode
6108            leaveSafeMode();
6109            break;
6110          case SAFEMODE_ENTER: // enter safe mode
6111            enterSafeMode(false);
6112            break;
6113          default:
6114            LOG.error("Unexpected safe mode action");
6115          }
6116        }
6117        return isInSafeMode();
6118      }
6119    
6120      @Override
6121      public void checkSafeMode() {
6122        // safeMode is volatile, and may be set to null at any time
6123        SafeModeInfo safeMode = this.safeMode;
6124        if (safeMode != null) {
6125          safeMode.checkMode();
6126        }
6127      }
6128    
6129      @Override
6130      public boolean isInSafeMode() {
6131        // safeMode is volatile, and may be set to null at any time
6132        SafeModeInfo safeMode = this.safeMode;
6133        if (safeMode == null)
6134          return false;
6135        return safeMode.isOn();
6136      }
6137    
6138      @Override
6139      public boolean isInStartupSafeMode() {
6140        // safeMode is volatile, and may be set to null at any time
6141        SafeModeInfo safeMode = this.safeMode;
6142        if (safeMode == null)
6143          return false;
6144        // If the NN is in safemode, and not due to manual / low resources, we
6145        // assume it must be because of startup. If the NN had low resources during
6146        // startup, we assume it came out of startup safemode and it is now in low
6147        // resources safemode
6148        return !safeMode.isManual() && !safeMode.areResourcesLow()
6149          && safeMode.isOn();
6150      }
6151    
6152      /**
6153       * Check if replication queues are to be populated
6154       * @return true when node is HAState.Active and not in the very first safemode
6155       */
6156      @Override
6157      public boolean isPopulatingReplQueues() {
6158        if (!shouldPopulateReplQueues()) {
6159          return false;
6160        }
6161        return initializedReplQueues;
6162      }
6163    
6164      private boolean shouldPopulateReplQueues() {
6165        if(haContext == null || haContext.getState() == null)
6166          return false;
6167        return haContext.getState().shouldPopulateReplQueues();
6168      }
6169    
6170      @Override
6171      public void incrementSafeBlockCount(int replication) {
6172        // safeMode is volatile, and may be set to null at any time
6173        SafeModeInfo safeMode = this.safeMode;
6174        if (safeMode == null)
6175          return;
6176        safeMode.incrementSafeBlockCount((short)replication);
6177      }
6178    
6179      @Override
6180      public void decrementSafeBlockCount(Block b) {
6181        // safeMode is volatile, and may be set to null at any time
6182        SafeModeInfo safeMode = this.safeMode;
6183        if (safeMode == null) // mostly true
6184          return;
6185        BlockInfo storedBlock = getStoredBlock(b);
6186        if (storedBlock.isComplete()) {
6187          safeMode.decrementSafeBlockCount((short)blockManager.countNodes(b).liveReplicas());
6188        }
6189      }
6190      
6191      /**
6192       * Adjust the total number of blocks safe and expected during safe mode.
6193       * If safe mode is not currently on, this is a no-op.
6194       * @param deltaSafe the change in number of safe blocks
6195       * @param deltaTotal the change i nnumber of total blocks expected
6196       */
6197      @Override
6198      public void adjustSafeModeBlockTotals(int deltaSafe, int deltaTotal) {
6199        // safeMode is volatile, and may be set to null at any time
6200        SafeModeInfo safeMode = this.safeMode;
6201        if (safeMode == null)
6202          return;
6203        safeMode.adjustBlockTotals(deltaSafe, deltaTotal);
6204      }
6205    
6206      /**
6207       * Set the total number of blocks in the system. 
6208       */
6209      public void setBlockTotal() {
6210        // safeMode is volatile, and may be set to null at any time
6211        SafeModeInfo safeMode = this.safeMode;
6212        if (safeMode == null)
6213          return;
6214        safeMode.setBlockTotal((int)getCompleteBlocksTotal());
6215      }
6216    
6217      /**
6218       * Get the total number of blocks in the system. 
6219       */
6220      @Override // FSNamesystemMBean
6221      @Metric
6222      public long getBlocksTotal() {
6223        return blockManager.getTotalBlocks();
6224      }
6225    
6226      /**
6227       * Get the total number of COMPLETE blocks in the system.
6228       * For safe mode only complete blocks are counted.
6229       */
6230      private long getCompleteBlocksTotal() {
6231        // Calculate number of blocks under construction
6232        long numUCBlocks = 0;
6233        readLock();
6234        try {
6235          for (Lease lease : leaseManager.getSortedLeases()) {
6236            for (String path : lease.getPaths()) {
6237              final INodeFile cons;
6238              try {
6239                cons = dir.getINode(path).asFile();
6240                Preconditions.checkState(cons.isUnderConstruction());
6241              } catch (UnresolvedLinkException e) {
6242                throw new AssertionError("Lease files should reside on this FS");
6243              }
6244              BlockInfo[] blocks = cons.getBlocks();
6245              if(blocks == null)
6246                continue;
6247              for(BlockInfo b : blocks) {
6248                if(!b.isComplete())
6249                  numUCBlocks++;
6250              }
6251            }
6252          }
6253          LOG.info("Number of blocks under construction: " + numUCBlocks);
6254          return getBlocksTotal() - numUCBlocks;
6255        } finally {
6256          readUnlock();
6257        }
6258      }
6259    
6260      /**
6261       * Enter safe mode. If resourcesLow is false, then we assume it is manual
6262       * @throws IOException
6263       */
6264      void enterSafeMode(boolean resourcesLow) throws IOException {
6265        writeLock();
6266        try {
6267          // Stop the secret manager, since rolling the master key would
6268          // try to write to the edit log
6269          stopSecretManager();
6270    
6271          // Ensure that any concurrent operations have been fully synced
6272          // before entering safe mode. This ensures that the FSImage
6273          // is entirely stable on disk as soon as we're in safe mode.
6274          boolean isEditlogOpenForWrite = getEditLog().isOpenForWrite();
6275          // Before Editlog is in OpenForWrite mode, editLogStream will be null. So,
6276          // logSyncAll call can be called only when Edlitlog is in OpenForWrite mode
6277          if (isEditlogOpenForWrite) {
6278            getEditLog().logSyncAll();
6279          }
6280          if (!isInSafeMode()) {
6281            safeMode = new SafeModeInfo(resourcesLow);
6282            return;
6283          }
6284          if (resourcesLow) {
6285            safeMode.setResourcesLow();
6286          } else {
6287            safeMode.setManual();
6288          }
6289          if (isEditlogOpenForWrite) {
6290            getEditLog().logSyncAll();
6291          }
6292          NameNode.stateChangeLog.info("STATE* Safe mode is ON"
6293              + safeMode.getTurnOffTip());
6294        } finally {
6295          writeUnlock();
6296        }
6297      }
6298    
6299      /**
6300       * Leave safe mode.
6301       */
6302      void leaveSafeMode() {
6303        writeLock();
6304        try {
6305          if (!isInSafeMode()) {
6306            NameNode.stateChangeLog.info("STATE* Safe mode is already OFF"); 
6307            return;
6308          }
6309          safeMode.leave();
6310        } finally {
6311          writeUnlock();
6312        }
6313      }
6314        
6315      String getSafeModeTip() {
6316        // There is no need to take readLock.
6317        // Don't use isInSafeMode as this.safeMode might be set to null.
6318        // after isInSafeMode returns.
6319        boolean inSafeMode;
6320        SafeModeInfo safeMode = this.safeMode;
6321        if (safeMode == null) {
6322          inSafeMode = false;
6323        } else {
6324          inSafeMode = safeMode.isOn();
6325        }
6326    
6327        if (!inSafeMode) {
6328          return "";
6329        } else {
6330          return safeMode.getTurnOffTip();
6331        }
6332      }
6333    
6334      CheckpointSignature rollEditLog() throws IOException {
6335        checkSuperuserPrivilege();
6336        checkOperation(OperationCategory.JOURNAL);
6337        writeLock();
6338        try {
6339          checkOperation(OperationCategory.JOURNAL);
6340          checkNameNodeSafeMode("Log not rolled");
6341          if (Server.isRpcInvocation()) {
6342            LOG.info("Roll Edit Log from " + Server.getRemoteAddress());
6343          }
6344          return getFSImage().rollEditLog();
6345        } finally {
6346          writeUnlock();
6347        }
6348      }
6349    
6350      NamenodeCommand startCheckpoint(NamenodeRegistration backupNode,
6351          NamenodeRegistration activeNamenode) throws IOException {
6352        checkOperation(OperationCategory.CHECKPOINT);
6353        CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
6354            null);
6355        if (cacheEntry != null && cacheEntry.isSuccess()) {
6356          return (NamenodeCommand) cacheEntry.getPayload();
6357        }
6358        writeLock();
6359        NamenodeCommand cmd = null;
6360        try {
6361          checkOperation(OperationCategory.CHECKPOINT);
6362          checkNameNodeSafeMode("Checkpoint not started");
6363          
6364          LOG.info("Start checkpoint for " + backupNode.getAddress());
6365          cmd = getFSImage().startCheckpoint(backupNode, activeNamenode);
6366          getEditLog().logSync();
6367          return cmd;
6368        } finally {
6369          writeUnlock();
6370          RetryCache.setState(cacheEntry, cmd != null, cmd);
6371        }
6372      }
6373    
6374      public void processIncrementalBlockReport(final DatanodeID nodeID,
6375          final StorageReceivedDeletedBlocks srdb)
6376          throws IOException {
6377        writeLock();
6378        try {
6379          blockManager.processIncrementalBlockReport(nodeID, srdb);
6380        } finally {
6381          writeUnlock();
6382        }
6383      }
6384      
6385      void endCheckpoint(NamenodeRegistration registration,
6386                                CheckpointSignature sig) throws IOException {
6387        checkOperation(OperationCategory.CHECKPOINT);
6388        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
6389        if (cacheEntry != null && cacheEntry.isSuccess()) {
6390          return; // Return previous response
6391        }
6392        boolean success = false;
6393        readLock();
6394        try {
6395          checkOperation(OperationCategory.CHECKPOINT);
6396    
6397          checkNameNodeSafeMode("Checkpoint not ended");
6398          LOG.info("End checkpoint for " + registration.getAddress());
6399          getFSImage().endCheckpoint(sig);
6400          success = true;
6401        } finally {
6402          readUnlock();
6403          RetryCache.setState(cacheEntry, success);
6404        }
6405      }
6406    
6407      PermissionStatus createFsOwnerPermissions(FsPermission permission) {
6408        return new PermissionStatus(fsOwner.getShortUserName(), supergroup, permission);
6409      }
6410    
6411      private void checkOwner(FSPermissionChecker pc, String path)
6412          throws AccessControlException, UnresolvedLinkException {
6413        checkPermission(pc, path, true, null, null, null, null);
6414      }
6415    
6416      private void checkPathAccess(FSPermissionChecker pc,
6417          String path, FsAction access) throws AccessControlException,
6418          UnresolvedLinkException {
6419        checkPermission(pc, path, false, null, null, access, null);
6420      }
6421    
6422      private void checkUnreadableBySuperuser(FSPermissionChecker pc,
6423          INode inode, int snapshotId)
6424          throws IOException {
6425        for (XAttr xattr : dir.getXAttrs(inode, snapshotId)) {
6426          if (XAttrHelper.getPrefixName(xattr).
6427              equals(SECURITY_XATTR_UNREADABLE_BY_SUPERUSER)) {
6428            if (pc.isSuperUser()) {
6429              throw new AccessControlException("Access is denied for " +
6430                  pc.getUser() + " since the superuser is not allowed to " +
6431                  "perform this operation.");
6432            }
6433          }
6434        }
6435      }
6436    
6437      private void checkParentAccess(FSPermissionChecker pc,
6438          String path, FsAction access) throws AccessControlException,
6439          UnresolvedLinkException {
6440        checkPermission(pc, path, false, null, access, null, null);
6441      }
6442    
6443      private void checkAncestorAccess(FSPermissionChecker pc,
6444          String path, FsAction access) throws AccessControlException,
6445          UnresolvedLinkException {
6446        checkPermission(pc, path, false, access, null, null, null);
6447      }
6448    
6449      private void checkTraverse(FSPermissionChecker pc, String path)
6450          throws AccessControlException, UnresolvedLinkException {
6451        checkPermission(pc, path, false, null, null, null, null);
6452      }
6453    
6454      /**
6455       * This is a wrapper for FSDirectory.resolvePath(). If the path passed
6456       * is prefixed with /.reserved/raw, then it checks to ensure that the caller
6457       * has super user privs.
6458       *
6459       * @param path The path to resolve.
6460       * @param pathComponents path components corresponding to the path
6461       * @return if the path indicates an inode, return path after replacing up to
6462       *         <inodeid> with the corresponding path of the inode, else the path
6463       *         in {@code src} as is. If the path refers to a path in the "raw"
6464       *         directory, return the non-raw pathname.
6465       * @throws FileNotFoundException
6466       * @throws AccessControlException
6467       */
6468      private String resolvePath(String path, byte[][] pathComponents)
6469          throws FileNotFoundException, AccessControlException {
6470        if (FSDirectory.isReservedRawName(path)) {
6471          checkSuperuserPrivilege();
6472        }
6473        return FSDirectory.resolvePath(path, pathComponents, dir);
6474      }
6475    
6476      @Override
6477      public void checkSuperuserPrivilege()
6478          throws AccessControlException {
6479        if (isPermissionEnabled) {
6480          FSPermissionChecker pc = getPermissionChecker();
6481          pc.checkSuperuserPrivilege();
6482        }
6483      }
6484    
6485      /**
6486       * Check whether current user have permissions to access the path. For more
6487       * details of the parameters, see
6488       * {@link FSPermissionChecker#checkPermission}.
6489       */
6490      private void checkPermission(FSPermissionChecker pc,
6491          String path, boolean doCheckOwner, FsAction ancestorAccess,
6492          FsAction parentAccess, FsAction access, FsAction subAccess)
6493          throws AccessControlException, UnresolvedLinkException {
6494            checkPermission(pc, path, doCheckOwner, ancestorAccess,
6495                parentAccess, access, subAccess, false, true);
6496      }
6497    
6498      /**
6499       * Check whether current user have permissions to access the path. For more
6500       * details of the parameters, see
6501       * {@link FSPermissionChecker#checkPermission}.
6502       */
6503      private void checkPermission(FSPermissionChecker pc,
6504          String path, boolean doCheckOwner, FsAction ancestorAccess,
6505          FsAction parentAccess, FsAction access, FsAction subAccess,
6506          boolean ignoreEmptyDir, boolean resolveLink)
6507          throws AccessControlException, UnresolvedLinkException {
6508        if (!pc.isSuperUser()) {
6509          waitForLoadingFSImage();
6510          readLock();
6511          try {
6512            pc.checkPermission(path, dir, doCheckOwner, ancestorAccess,
6513                parentAccess, access, subAccess, ignoreEmptyDir, resolveLink);
6514          } finally {
6515            readUnlock();
6516          }
6517        }
6518      }
6519      
6520      /**
6521       * Check to see if we have exceeded the limit on the number
6522       * of inodes.
6523       */
6524      void checkFsObjectLimit() throws IOException {
6525        if (maxFsObjects != 0 &&
6526            maxFsObjects <= dir.totalInodes() + getBlocksTotal()) {
6527          throw new IOException("Exceeded the configured number of objects " +
6528                                 maxFsObjects + " in the filesystem.");
6529        }
6530      }
6531    
6532      /**
6533       * Get the total number of objects in the system. 
6534       */
6535      @Override // FSNamesystemMBean
6536      public long getMaxObjects() {
6537        return maxFsObjects;
6538      }
6539    
6540      @Override // FSNamesystemMBean
6541      @Metric
6542      public long getFilesTotal() {
6543        // There is no need to take fSNamesystem's lock as
6544        // FSDirectory has its own lock.
6545        return this.dir.totalInodes();
6546      }
6547    
6548      @Override // FSNamesystemMBean
6549      @Metric
6550      public long getPendingReplicationBlocks() {
6551        return blockManager.getPendingReplicationBlocksCount();
6552      }
6553    
6554      @Override // FSNamesystemMBean
6555      @Metric
6556      public long getUnderReplicatedBlocks() {
6557        return blockManager.getUnderReplicatedBlocksCount();
6558      }
6559    
6560      /** Returns number of blocks with corrupt replicas */
6561      @Metric({"CorruptBlocks", "Number of blocks with corrupt replicas"})
6562      public long getCorruptReplicaBlocks() {
6563        return blockManager.getCorruptReplicaBlocksCount();
6564      }
6565    
6566      @Override // FSNamesystemMBean
6567      @Metric
6568      public long getScheduledReplicationBlocks() {
6569        return blockManager.getScheduledReplicationBlocksCount();
6570      }
6571    
6572      @Override
6573      @Metric
6574      public long getPendingDeletionBlocks() {
6575        return blockManager.getPendingDeletionBlocksCount();
6576      }
6577    
6578      @Override
6579      public long getBlockDeletionStartTime() {
6580        return startTime + blockManager.getStartupDelayBlockDeletionInMs();
6581      }
6582    
6583      @Metric
6584      public long getExcessBlocks() {
6585        return blockManager.getExcessBlocksCount();
6586      }
6587      
6588      // HA-only metric
6589      @Metric
6590      public long getPostponedMisreplicatedBlocks() {
6591        return blockManager.getPostponedMisreplicatedBlocksCount();
6592      }
6593    
6594      // HA-only metric
6595      @Metric
6596      public int getPendingDataNodeMessageCount() {
6597        return blockManager.getPendingDataNodeMessageCount();
6598      }
6599      
6600      // HA-only metric
6601      @Metric
6602      public String getHAState() {
6603        return haContext.getState().toString();
6604      }
6605    
6606      // HA-only metric
6607      @Metric
6608      public long getMillisSinceLastLoadedEdits() {
6609        if (isInStandbyState() && editLogTailer != null) {
6610          return now() - editLogTailer.getLastLoadTimestamp();
6611        } else {
6612          return 0;
6613        }
6614      }
6615      
6616      @Metric
6617      public int getBlockCapacity() {
6618        return blockManager.getCapacity();
6619      }
6620    
6621      @Override // FSNamesystemMBean
6622      public String getFSState() {
6623        return isInSafeMode() ? "safeMode" : "Operational";
6624      }
6625      
6626      private ObjectName mbeanName;
6627      private ObjectName mxbeanName;
6628    
6629      /**
6630       * Register the FSNamesystem MBean using the name
6631       *        "hadoop:service=NameNode,name=FSNamesystemState"
6632       */
6633      private void registerMBean() {
6634        // We can only implement one MXBean interface, so we keep the old one.
6635        try {
6636          StandardMBean bean = new StandardMBean(this, FSNamesystemMBean.class);
6637          mbeanName = MBeans.register("NameNode", "FSNamesystemState", bean);
6638        } catch (NotCompliantMBeanException e) {
6639          throw new RuntimeException("Bad MBean setup", e);
6640        }
6641    
6642        LOG.info("Registered FSNamesystemState MBean");
6643      }
6644    
6645      /**
6646       * shutdown FSNamesystem
6647       */
6648      void shutdown() {
6649        if (snapshotManager != null) {
6650          snapshotManager.shutdown();
6651        }
6652        if (mbeanName != null) {
6653          MBeans.unregister(mbeanName);
6654          mbeanName = null;
6655        }
6656        if (mxbeanName != null) {
6657          MBeans.unregister(mxbeanName);
6658          mxbeanName = null;
6659        }
6660        if (dir != null) {
6661          dir.shutdown();
6662        }
6663        if (blockManager != null) {
6664          blockManager.shutdown();
6665        }
6666      }
6667    
6668      @Override // FSNamesystemMBean
6669      public int getNumLiveDataNodes() {
6670        return getBlockManager().getDatanodeManager().getNumLiveDataNodes();
6671      }
6672    
6673      @Override // FSNamesystemMBean
6674      public int getNumDeadDataNodes() {
6675        return getBlockManager().getDatanodeManager().getNumDeadDataNodes();
6676      }
6677      
6678      @Override // FSNamesystemMBean
6679      public int getNumDecomLiveDataNodes() {
6680        final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
6681        getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true);
6682        int liveDecommissioned = 0;
6683        for (DatanodeDescriptor node : live) {
6684          liveDecommissioned += node.isDecommissioned() ? 1 : 0;
6685        }
6686        return liveDecommissioned;
6687      }
6688    
6689      @Override // FSNamesystemMBean
6690      public int getNumDecomDeadDataNodes() {
6691        final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
6692        getBlockManager().getDatanodeManager().fetchDatanodes(null, dead, true);
6693        int deadDecommissioned = 0;
6694        for (DatanodeDescriptor node : dead) {
6695          deadDecommissioned += node.isDecommissioned() ? 1 : 0;
6696        }
6697        return deadDecommissioned;
6698      }
6699    
6700      @Override // FSNamesystemMBean
6701      public int getNumDecommissioningDataNodes() {
6702        return getBlockManager().getDatanodeManager().getDecommissioningNodes()
6703            .size();
6704      }
6705    
6706      @Override // FSNamesystemMBean
6707      @Metric({"StaleDataNodes", 
6708        "Number of datanodes marked stale due to delayed heartbeat"})
6709      public int getNumStaleDataNodes() {
6710        return getBlockManager().getDatanodeManager().getNumStaleNodes();
6711      }
6712    
6713      /**
6714       * Storages are marked as "content stale" after NN restart or fails over and
6715       * before NN receives the first Heartbeat followed by the first Blockreport.
6716       */
6717      @Override // FSNamesystemMBean
6718      public int getNumStaleStorages() {
6719        return getBlockManager().getDatanodeManager().getNumStaleStorages();
6720      }
6721    
6722      /**
6723       * Sets the current generation stamp for legacy blocks
6724       */
6725      void setGenerationStampV1(long stamp) {
6726        generationStampV1.setCurrentValue(stamp);
6727      }
6728    
6729      /**
6730       * Gets the current generation stamp for legacy blocks
6731       */
6732      long getGenerationStampV1() {
6733        return generationStampV1.getCurrentValue();
6734      }
6735    
6736      /**
6737       * Gets the current generation stamp for this filesystem
6738       */
6739      void setGenerationStampV2(long stamp) {
6740        generationStampV2.setCurrentValue(stamp);
6741      }
6742    
6743      /**
6744       * Gets the current generation stamp for this filesystem
6745       */
6746      long getGenerationStampV2() {
6747        return generationStampV2.getCurrentValue();
6748      }
6749    
6750      /**
6751       * Upgrades the generation stamp for the filesystem
6752       * by reserving a sufficient range for all existing blocks.
6753       * Should be invoked only during the first upgrade to
6754       * sequential block IDs.
6755       */
6756      long upgradeGenerationStampToV2() {
6757        Preconditions.checkState(generationStampV2.getCurrentValue() ==
6758            GenerationStamp.LAST_RESERVED_STAMP);
6759    
6760        generationStampV2.skipTo(
6761            generationStampV1.getCurrentValue() +
6762            HdfsConstants.RESERVED_GENERATION_STAMPS_V1);
6763    
6764        generationStampV1Limit = generationStampV2.getCurrentValue();
6765        return generationStampV2.getCurrentValue();
6766      }
6767    
6768      /**
6769       * Sets the generation stamp that delineates random and sequentially
6770       * allocated block IDs.
6771       * @param stamp set generation stamp limit to this value
6772       */
6773      void setGenerationStampV1Limit(long stamp) {
6774        Preconditions.checkState(generationStampV1Limit ==
6775                                 GenerationStamp.GRANDFATHER_GENERATION_STAMP);
6776        generationStampV1Limit = stamp;
6777      }
6778    
6779      /**
6780       * Gets the value of the generation stamp that delineates sequential
6781       * and random block IDs.
6782       */
6783      long getGenerationStampAtblockIdSwitch() {
6784        return generationStampV1Limit;
6785      }
6786    
6787      @VisibleForTesting
6788      SequentialBlockIdGenerator getBlockIdGenerator() {
6789        return blockIdGenerator;
6790      }
6791    
6792      /**
6793       * Sets the maximum allocated block ID for this filesystem. This is
6794       * the basis for allocating new block IDs.
6795       */
6796      void setLastAllocatedBlockId(long blockId) {
6797        blockIdGenerator.skipTo(blockId);
6798      }
6799    
6800      /**
6801       * Gets the maximum sequentially allocated block ID for this filesystem
6802       */
6803      long getLastAllocatedBlockId() {
6804        return blockIdGenerator.getCurrentValue();
6805      }
6806    
6807      /**
6808       * Increments, logs and then returns the stamp
6809       */
6810      long nextGenerationStamp(boolean legacyBlock)
6811          throws IOException, SafeModeException {
6812        assert hasWriteLock();
6813        checkNameNodeSafeMode("Cannot get next generation stamp");
6814    
6815        long gs;
6816        if (legacyBlock) {
6817          gs = getNextGenerationStampV1();
6818          getEditLog().logGenerationStampV1(gs);
6819        } else {
6820          gs = getNextGenerationStampV2();
6821          getEditLog().logGenerationStampV2(gs);
6822        }
6823    
6824        // NB: callers sync the log
6825        return gs;
6826      }
6827    
6828      @VisibleForTesting
6829      long getNextGenerationStampV1() throws IOException {
6830        long genStampV1 = generationStampV1.nextValue();
6831    
6832        if (genStampV1 >= generationStampV1Limit) {
6833          // We ran out of generation stamps for legacy blocks. In practice, it
6834          // is extremely unlikely as we reserved 1T v1 generation stamps. The
6835          // result is that we can no longer append to the legacy blocks that
6836          // were created before the upgrade to sequential block IDs.
6837          throw new OutOfV1GenerationStampsException();
6838        }
6839    
6840        return genStampV1;
6841      }
6842    
6843      @VisibleForTesting
6844      long getNextGenerationStampV2() {
6845        return generationStampV2.nextValue();
6846      }
6847    
6848      long getGenerationStampV1Limit() {
6849        return generationStampV1Limit;
6850      }
6851    
6852      /**
6853       * Determine whether the block ID was randomly generated (legacy) or
6854       * sequentially generated. The generation stamp value is used to
6855       * make the distinction.
6856       * @return true if the block ID was randomly generated, false otherwise.
6857       */
6858      boolean isLegacyBlock(Block block) {
6859        return block.getGenerationStamp() < getGenerationStampV1Limit();
6860      }
6861    
6862      /**
6863       * Increments, logs and then returns the block ID
6864       */
6865      private long nextBlockId() throws IOException {
6866        assert hasWriteLock();
6867        checkNameNodeSafeMode("Cannot get next block ID");
6868        final long blockId = blockIdGenerator.nextValue();
6869        getEditLog().logAllocateBlockId(blockId);
6870        // NB: callers sync the log
6871        return blockId;
6872      }
6873    
6874      private boolean isFileDeleted(INodeFile file) {
6875        // Not in the inodeMap or in the snapshot but marked deleted.
6876        if (dir.getInode(file.getId()) == null) {
6877          return true;
6878        }
6879    
6880        // look at the path hierarchy to see if one parent is deleted by recursive
6881        // deletion
6882        INode tmpChild = file;
6883        INodeDirectory tmpParent = file.getParent();
6884        while (true) {
6885          if (tmpParent == null ||
6886              tmpParent.searchChildren(tmpChild.getLocalNameBytes()) < 0) {
6887            return true;
6888          }
6889          if (tmpParent.isRoot()) {
6890            break;
6891          }
6892          tmpChild = tmpParent;
6893          tmpParent = tmpParent.getParent();
6894        }
6895    
6896        if (file.isWithSnapshot() &&
6897            file.getFileWithSnapshotFeature().isCurrentFileDeleted()) {
6898          return true;
6899        }
6900        return false;
6901      }
6902    
6903      private INodeFile checkUCBlock(ExtendedBlock block,
6904          String clientName) throws IOException {
6905        assert hasWriteLock();
6906        checkNameNodeSafeMode("Cannot get a new generation stamp and an "
6907            + "access token for block " + block);
6908        
6909        // check stored block state
6910        BlockInfo storedBlock = getStoredBlock(ExtendedBlock.getLocalBlock(block));
6911        if (storedBlock == null || 
6912            storedBlock.getBlockUCState() != BlockUCState.UNDER_CONSTRUCTION) {
6913            throw new IOException(block + 
6914                " does not exist or is not under Construction" + storedBlock);
6915        }
6916        
6917        // check file inode
6918        final INodeFile file = ((INode)storedBlock.getBlockCollection()).asFile();
6919        if (file == null || !file.isUnderConstruction() || isFileDeleted(file)) {
6920          throw new IOException("The file " + storedBlock + 
6921              " belonged to does not exist or it is not under construction.");
6922        }
6923        
6924        // check lease
6925        if (clientName == null
6926            || !clientName.equals(file.getFileUnderConstructionFeature()
6927                .getClientName())) {
6928          throw new LeaseExpiredException("Lease mismatch: " + block + 
6929              " is accessed by a non lease holder " + clientName); 
6930        }
6931    
6932        return file;
6933      }
6934      
6935      /**
6936       * Client is reporting some bad block locations.
6937       */
6938      void reportBadBlocks(LocatedBlock[] blocks) throws IOException {
6939        checkOperation(OperationCategory.WRITE);
6940        NameNode.stateChangeLog.info("*DIR* reportBadBlocks");
6941        writeLock();
6942        try {
6943          checkOperation(OperationCategory.WRITE);
6944          for (int i = 0; i < blocks.length; i++) {
6945            ExtendedBlock blk = blocks[i].getBlock();
6946            DatanodeInfo[] nodes = blocks[i].getLocations();
6947            String[] storageIDs = blocks[i].getStorageIDs();
6948            for (int j = 0; j < nodes.length; j++) {
6949              blockManager.findAndMarkBlockAsCorrupt(blk, nodes[j],
6950                  storageIDs == null ? null: storageIDs[j], 
6951                  "client machine reported it");
6952            }
6953          }
6954        } finally {
6955          writeUnlock();
6956        }
6957      }
6958    
6959      /**
6960       * Get a new generation stamp together with an access token for 
6961       * a block under construction
6962       * 
6963       * This method is called for recovering a failed pipeline or setting up
6964       * a pipeline to append to a block.
6965       * 
6966       * @param block a block
6967       * @param clientName the name of a client
6968       * @return a located block with a new generation stamp and an access token
6969       * @throws IOException if any error occurs
6970       */
6971      LocatedBlock updateBlockForPipeline(ExtendedBlock block, 
6972          String clientName) throws IOException {
6973        LocatedBlock locatedBlock;
6974        checkOperation(OperationCategory.WRITE);
6975        writeLock();
6976        try {
6977          checkOperation(OperationCategory.WRITE);
6978    
6979          // check vadility of parameters
6980          checkUCBlock(block, clientName);
6981      
6982          // get a new generation stamp and an access token
6983          block.setGenerationStamp(
6984              nextGenerationStamp(isLegacyBlock(block.getLocalBlock())));
6985          locatedBlock = new LocatedBlock(block, new DatanodeInfo[0]);
6986          blockManager.setBlockToken(locatedBlock, AccessMode.WRITE);
6987        } finally {
6988          writeUnlock();
6989        }
6990        // Ensure we record the new generation stamp
6991        getEditLog().logSync();
6992        return locatedBlock;
6993      }
6994      
6995      /**
6996       * Update a pipeline for a block under construction
6997       * 
6998       * @param clientName the name of the client
6999       * @param oldBlock and old block
7000       * @param newBlock a new block with a new generation stamp and length
7001       * @param newNodes datanodes in the pipeline
7002       * @throws IOException if any error occurs
7003       */
7004      void updatePipeline(String clientName, ExtendedBlock oldBlock, 
7005          ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs)
7006          throws IOException {
7007        checkOperation(OperationCategory.WRITE);
7008        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7009        if (cacheEntry != null && cacheEntry.isSuccess()) {
7010          return; // Return previous response
7011        }
7012        LOG.info("updatePipeline(block=" + oldBlock
7013                 + ", newGenerationStamp=" + newBlock.getGenerationStamp()
7014                 + ", newLength=" + newBlock.getNumBytes()
7015                 + ", newNodes=" + Arrays.asList(newNodes)
7016                 + ", clientName=" + clientName
7017                 + ")");
7018        waitForLoadingFSImage();
7019        writeLock();
7020        boolean success = false;
7021        try {
7022          checkOperation(OperationCategory.WRITE);
7023          checkNameNodeSafeMode("Pipeline not updated");
7024          assert newBlock.getBlockId()==oldBlock.getBlockId() : newBlock + " and "
7025            + oldBlock + " has different block identifier";
7026          updatePipelineInternal(clientName, oldBlock, newBlock, newNodes,
7027              newStorageIDs, cacheEntry != null);
7028          success = true;
7029        } finally {
7030          writeUnlock();
7031          RetryCache.setState(cacheEntry, success);
7032        }
7033        getEditLog().logSync();
7034        LOG.info("updatePipeline(" + oldBlock + ") successfully to " + newBlock);
7035      }
7036    
7037      /**
7038       * @see #updatePipeline(String,  ExtendedBlock, ExtendedBlock, DatanodeID[], String[])
7039       */
7040      private void updatePipelineInternal(String clientName, ExtendedBlock oldBlock, 
7041          ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs,
7042          boolean logRetryCache)
7043          throws IOException {
7044        assert hasWriteLock();
7045        // check the vadility of the block and lease holder name
7046        final INodeFile pendingFile = checkUCBlock(oldBlock, clientName);
7047        final BlockInfoUnderConstruction blockinfo
7048            = (BlockInfoUnderConstruction)pendingFile.getLastBlock();
7049    
7050        // check new GS & length: this is not expected
7051        if (newBlock.getGenerationStamp() <= blockinfo.getGenerationStamp() ||
7052            newBlock.getNumBytes() < blockinfo.getNumBytes()) {
7053          String msg = "Update " + oldBlock + " (len = " + 
7054            blockinfo.getNumBytes() + ") to an older state: " + newBlock + 
7055            " (len = " + newBlock.getNumBytes() +")";
7056          LOG.warn(msg);
7057          throw new IOException(msg);
7058        }
7059    
7060        // Update old block with the new generation stamp and new length
7061        blockinfo.setNumBytes(newBlock.getNumBytes());
7062        blockinfo.setGenerationStampAndVerifyReplicas(newBlock.getGenerationStamp());
7063    
7064        // find the DatanodeDescriptor objects
7065        final DatanodeStorageInfo[] storages = blockManager.getDatanodeManager()
7066            .getDatanodeStorageInfos(newNodes, newStorageIDs);
7067        blockinfo.setExpectedLocations(storages);
7068    
7069        String src = pendingFile.getFullPathName();
7070        persistBlocks(src, pendingFile, logRetryCache);
7071      }
7072    
7073      // rename was successful. If any part of the renamed subtree had
7074      // files that were being written to, update with new filename.
7075      void unprotectedChangeLease(String src, String dst) {
7076        assert hasWriteLock();
7077        leaseManager.changeLease(src, dst);
7078      }
7079    
7080      /**
7081       * Serializes leases.
7082       */
7083      void saveFilesUnderConstruction(DataOutputStream out,
7084          Map<Long, INodeFile> snapshotUCMap) throws IOException {
7085        // This is run by an inferior thread of saveNamespace, which holds a read
7086        // lock on our behalf. If we took the read lock here, we could block
7087        // for fairness if a writer is waiting on the lock.
7088        synchronized (leaseManager) {
7089          Map<String, INodeFile> nodes = leaseManager.getINodesUnderConstruction();
7090          for (Map.Entry<String, INodeFile> entry : nodes.entrySet()) {
7091            // TODO: for HDFS-5428, because of rename operations, some
7092            // under-construction files that are
7093            // in the current fs directory can also be captured in the
7094            // snapshotUCMap. We should remove them from the snapshotUCMap.
7095            snapshotUCMap.remove(entry.getValue().getId());
7096          }
7097    
7098          out.writeInt(nodes.size() + snapshotUCMap.size()); // write the size
7099          for (Map.Entry<String, INodeFile> entry : nodes.entrySet()) {
7100            FSImageSerialization.writeINodeUnderConstruction(
7101                out, entry.getValue(), entry.getKey());
7102          }
7103          for (Map.Entry<Long, INodeFile> entry : snapshotUCMap.entrySet()) {
7104            // for those snapshot INodeFileUC, we use "/.reserved/.inodes/<inodeid>"
7105            // as their paths
7106            StringBuilder b = new StringBuilder();
7107            b.append(FSDirectory.DOT_RESERVED_PATH_PREFIX)
7108                .append(Path.SEPARATOR).append(FSDirectory.DOT_INODES_STRING)
7109                .append(Path.SEPARATOR).append(entry.getValue().getId());
7110            FSImageSerialization.writeINodeUnderConstruction(
7111                out, entry.getValue(), b.toString());
7112          }
7113        }
7114      }
7115    
7116      /**
7117       * @return all the under-construction files in the lease map
7118       */
7119      Map<String, INodeFile> getFilesUnderConstruction() {
7120        synchronized (leaseManager) {
7121          return leaseManager.getINodesUnderConstruction();
7122        }
7123      }
7124    
7125      /**
7126       * Register a Backup name-node, verifying that it belongs
7127       * to the correct namespace, and adding it to the set of
7128       * active journals if necessary.
7129       * 
7130       * @param bnReg registration of the new BackupNode
7131       * @param nnReg registration of this NameNode
7132       * @throws IOException if the namespace IDs do not match
7133       */
7134      void registerBackupNode(NamenodeRegistration bnReg,
7135          NamenodeRegistration nnReg) throws IOException {
7136        writeLock();
7137        try {
7138          if(getFSImage().getStorage().getNamespaceID() 
7139             != bnReg.getNamespaceID())
7140            throw new IOException("Incompatible namespaceIDs: "
7141                + " Namenode namespaceID = "
7142                + getFSImage().getStorage().getNamespaceID() + "; "
7143                + bnReg.getRole() +
7144                " node namespaceID = " + bnReg.getNamespaceID());
7145          if (bnReg.getRole() == NamenodeRole.BACKUP) {
7146            getFSImage().getEditLog().registerBackupNode(
7147                bnReg, nnReg);
7148          }
7149        } finally {
7150          writeUnlock();
7151        }
7152      }
7153    
7154      /**
7155       * Release (unregister) backup node.
7156       * <p>
7157       * Find and remove the backup stream corresponding to the node.
7158       * @throws IOException
7159       */
7160      void releaseBackupNode(NamenodeRegistration registration)
7161        throws IOException {
7162        checkOperation(OperationCategory.WRITE);
7163        writeLock();
7164        try {
7165          checkOperation(OperationCategory.WRITE);
7166          if(getFSImage().getStorage().getNamespaceID()
7167             != registration.getNamespaceID())
7168            throw new IOException("Incompatible namespaceIDs: "
7169                + " Namenode namespaceID = "
7170                + getFSImage().getStorage().getNamespaceID() + "; "
7171                + registration.getRole() +
7172                " node namespaceID = " + registration.getNamespaceID());
7173          getEditLog().releaseBackupStream(registration);
7174        } finally {
7175          writeUnlock();
7176        }
7177      }
7178    
7179      static class CorruptFileBlockInfo {
7180        final String path;
7181        final Block block;
7182        
7183        public CorruptFileBlockInfo(String p, Block b) {
7184          path = p;
7185          block = b;
7186        }
7187        
7188        @Override
7189        public String toString() {
7190          return block.getBlockName() + "\t" + path;
7191        }
7192      }
7193      /**
7194       * @param path Restrict corrupt files to this portion of namespace.
7195       * @param cookieTab Support for continuation; cookieTab  tells where
7196       *                  to start from
7197       * @return a list in which each entry describes a corrupt file/block
7198       * @throws IOException
7199       */
7200      Collection<CorruptFileBlockInfo> listCorruptFileBlocks(String path,
7201      String[] cookieTab) throws IOException {
7202        checkSuperuserPrivilege();
7203        checkOperation(OperationCategory.READ);
7204    
7205        int count = 0;
7206        ArrayList<CorruptFileBlockInfo> corruptFiles =
7207            new ArrayList<CorruptFileBlockInfo>();
7208        if (cookieTab == null) {
7209          cookieTab = new String[] { null };
7210        }
7211    
7212        // Do a quick check if there are any corrupt files without taking the lock
7213        if (blockManager.getMissingBlocksCount() == 0) {
7214          if (cookieTab[0] == null) {
7215            cookieTab[0] = String.valueOf(getIntCookie(cookieTab[0]));
7216          }
7217          LOG.info("there are no corrupt file blocks.");
7218          return corruptFiles;
7219        }
7220    
7221        readLock();
7222        try {
7223          checkOperation(OperationCategory.READ);
7224          if (!isPopulatingReplQueues()) {
7225            throw new IOException("Cannot run listCorruptFileBlocks because " +
7226                                  "replication queues have not been initialized.");
7227          }
7228          // print a limited # of corrupt files per call
7229    
7230          final Iterator<Block> blkIterator = blockManager.getCorruptReplicaBlockIterator();
7231    
7232          int skip = getIntCookie(cookieTab[0]);
7233          for (int i = 0; i < skip && blkIterator.hasNext(); i++) {
7234            blkIterator.next();
7235          }
7236    
7237          while (blkIterator.hasNext()) {
7238            Block blk = blkIterator.next();
7239            final INode inode = (INode)blockManager.getBlockCollection(blk);
7240            skip++;
7241            if (inode != null && blockManager.countNodes(blk).liveReplicas() == 0) {
7242              String src = FSDirectory.getFullPathName(inode);
7243              if (src.startsWith(path)){
7244                corruptFiles.add(new CorruptFileBlockInfo(src, blk));
7245                count++;
7246                if (count >= DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED)
7247                  break;
7248              }
7249            }
7250          }
7251          cookieTab[0] = String.valueOf(skip);
7252          LOG.info("list corrupt file blocks returned: " + count);
7253          return corruptFiles;
7254        } finally {
7255          readUnlock();
7256        }
7257      }
7258    
7259      /**
7260       * Convert string cookie to integer.
7261       */
7262      private static int getIntCookie(String cookie){
7263        int c;
7264        if(cookie == null){
7265          c = 0;
7266        } else {
7267          try{
7268            c = Integer.parseInt(cookie);
7269          }catch (NumberFormatException e) {
7270            c = 0;
7271          }
7272        }
7273        c = Math.max(0, c);
7274        return c;
7275      }
7276    
7277      /**
7278       * Create delegation token secret manager
7279       */
7280      private DelegationTokenSecretManager createDelegationTokenSecretManager(
7281          Configuration conf) {
7282        return new DelegationTokenSecretManager(conf.getLong(
7283            DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY,
7284            DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT),
7285            conf.getLong(DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY,
7286                DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT),
7287            conf.getLong(DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY,
7288                DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT),
7289            DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL,
7290            conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY,
7291                DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT),
7292            this);
7293      }
7294    
7295      /**
7296       * Returns the DelegationTokenSecretManager instance in the namesystem.
7297       * @return delegation token secret manager object
7298       */
7299      DelegationTokenSecretManager getDelegationTokenSecretManager() {
7300        return dtSecretManager;
7301      }
7302    
7303      /**
7304       * @param renewer Renewer information
7305       * @return delegation toek
7306       * @throws IOException on error
7307       */
7308      Token<DelegationTokenIdentifier> getDelegationToken(Text renewer)
7309          throws IOException {
7310        Token<DelegationTokenIdentifier> token;
7311        checkOperation(OperationCategory.WRITE);
7312        writeLock();
7313        try {
7314          checkOperation(OperationCategory.WRITE);
7315          checkNameNodeSafeMode("Cannot issue delegation token");
7316          if (!isAllowedDelegationTokenOp()) {
7317            throw new IOException(
7318              "Delegation Token can be issued only with kerberos or web authentication");
7319          }
7320          if (dtSecretManager == null || !dtSecretManager.isRunning()) {
7321            LOG.warn("trying to get DT with no secret manager running");
7322            return null;
7323          }
7324    
7325          UserGroupInformation ugi = getRemoteUser();
7326          String user = ugi.getUserName();
7327          Text owner = new Text(user);
7328          Text realUser = null;
7329          if (ugi.getRealUser() != null) {
7330            realUser = new Text(ugi.getRealUser().getUserName());
7331          }
7332          DelegationTokenIdentifier dtId = new DelegationTokenIdentifier(owner,
7333            renewer, realUser);
7334          token = new Token<DelegationTokenIdentifier>(
7335            dtId, dtSecretManager);
7336          long expiryTime = dtSecretManager.getTokenExpiryTime(dtId);
7337          getEditLog().logGetDelegationToken(dtId, expiryTime);
7338        } finally {
7339          writeUnlock();
7340        }
7341        getEditLog().logSync();
7342        return token;
7343      }
7344    
7345      /**
7346       * 
7347       * @param token token to renew
7348       * @return new expiryTime of the token
7349       * @throws InvalidToken if {@code token} is invalid
7350       * @throws IOException on other errors
7351       */
7352      long renewDelegationToken(Token<DelegationTokenIdentifier> token)
7353          throws InvalidToken, IOException {
7354        long expiryTime;
7355        checkOperation(OperationCategory.WRITE);
7356        writeLock();
7357        try {
7358          checkOperation(OperationCategory.WRITE);
7359    
7360          checkNameNodeSafeMode("Cannot renew delegation token");
7361          if (!isAllowedDelegationTokenOp()) {
7362            throw new IOException(
7363                "Delegation Token can be renewed only with kerberos or web authentication");
7364          }
7365          String renewer = getRemoteUser().getShortUserName();
7366          expiryTime = dtSecretManager.renewToken(token, renewer);
7367          DelegationTokenIdentifier id = new DelegationTokenIdentifier();
7368          ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier());
7369          DataInputStream in = new DataInputStream(buf);
7370          id.readFields(in);
7371          getEditLog().logRenewDelegationToken(id, expiryTime);
7372        } finally {
7373          writeUnlock();
7374        }
7375        getEditLog().logSync();
7376        return expiryTime;
7377      }
7378    
7379      /**
7380       * 
7381       * @param token token to cancel
7382       * @throws IOException on error
7383       */
7384      void cancelDelegationToken(Token<DelegationTokenIdentifier> token)
7385          throws IOException {
7386        checkOperation(OperationCategory.WRITE);
7387        writeLock();
7388        try {
7389          checkOperation(OperationCategory.WRITE);
7390    
7391          checkNameNodeSafeMode("Cannot cancel delegation token");
7392          String canceller = getRemoteUser().getUserName();
7393          DelegationTokenIdentifier id = dtSecretManager
7394            .cancelToken(token, canceller);
7395          getEditLog().logCancelDelegationToken(id);
7396        } finally {
7397          writeUnlock();
7398        }
7399        getEditLog().logSync();
7400      }
7401    
7402      /**
7403       * @param out save state of the secret manager
7404       * @param sdPath String storage directory path
7405       */
7406      void saveSecretManagerStateCompat(DataOutputStream out, String sdPath)
7407          throws IOException {
7408        dtSecretManager.saveSecretManagerStateCompat(out, sdPath);
7409      }
7410    
7411      SecretManagerState saveSecretManagerState() {
7412        return dtSecretManager.saveSecretManagerState();
7413      }
7414    
7415      /**
7416       * @param in load the state of secret manager from input stream
7417       */
7418      void loadSecretManagerStateCompat(DataInput in) throws IOException {
7419        dtSecretManager.loadSecretManagerStateCompat(in);
7420      }
7421    
7422      void loadSecretManagerState(SecretManagerSection s,
7423          List<SecretManagerSection.DelegationKey> keys,
7424          List<SecretManagerSection.PersistToken> tokens) throws IOException {
7425        dtSecretManager.loadSecretManagerState(new SecretManagerState(s, keys, tokens));
7426      }
7427    
7428      /**
7429       * Log the updateMasterKey operation to edit logs
7430       * 
7431       * @param key new delegation key.
7432       */
7433      public void logUpdateMasterKey(DelegationKey key) {
7434        
7435        assert !isInSafeMode() :
7436          "this should never be called while in safemode, since we stop " +
7437          "the DT manager before entering safemode!";
7438        // No need to hold FSN lock since we don't access any internal
7439        // structures, and this is stopped before the FSN shuts itself
7440        // down, etc.
7441        getEditLog().logUpdateMasterKey(key);
7442        getEditLog().logSync();
7443      }
7444      
7445      /**
7446       * Log the cancellation of expired tokens to edit logs
7447       * 
7448       * @param id token identifier to cancel
7449       */
7450      public void logExpireDelegationToken(DelegationTokenIdentifier id) {
7451        assert !isInSafeMode() :
7452          "this should never be called while in safemode, since we stop " +
7453          "the DT manager before entering safemode!";
7454        // No need to hold FSN lock since we don't access any internal
7455        // structures, and this is stopped before the FSN shuts itself
7456        // down, etc.
7457        getEditLog().logCancelDelegationToken(id);
7458      }  
7459      
7460      private void logReassignLease(String leaseHolder, String src,
7461          String newHolder) {
7462        assert hasWriteLock();
7463        getEditLog().logReassignLease(leaseHolder, src, newHolder);
7464      }
7465      
7466      /**
7467       * 
7468       * @return true if delegation token operation is allowed
7469       */
7470      private boolean isAllowedDelegationTokenOp() throws IOException {
7471        AuthenticationMethod authMethod = getConnectionAuthenticationMethod();
7472        if (UserGroupInformation.isSecurityEnabled()
7473            && (authMethod != AuthenticationMethod.KERBEROS)
7474            && (authMethod != AuthenticationMethod.KERBEROS_SSL)
7475            && (authMethod != AuthenticationMethod.CERTIFICATE)) {
7476          return false;
7477        }
7478        return true;
7479      }
7480      
7481      /**
7482       * Returns authentication method used to establish the connection
7483       * @return AuthenticationMethod used to establish connection
7484       * @throws IOException
7485       */
7486      private AuthenticationMethod getConnectionAuthenticationMethod()
7487          throws IOException {
7488        UserGroupInformation ugi = getRemoteUser();
7489        AuthenticationMethod authMethod = ugi.getAuthenticationMethod();
7490        if (authMethod == AuthenticationMethod.PROXY) {
7491          authMethod = ugi.getRealUser().getAuthenticationMethod();
7492        }
7493        return authMethod;
7494      }
7495      
7496      /**
7497       * Client invoked methods are invoked over RPC and will be in 
7498       * RPC call context even if the client exits.
7499       */
7500      private boolean isExternalInvocation() {
7501        return Server.isRpcInvocation() || NamenodeWebHdfsMethods.isWebHdfsInvocation();
7502      }
7503    
7504      private static InetAddress getRemoteIp() {
7505        InetAddress ip = Server.getRemoteIp();
7506        if (ip != null) {
7507          return ip;
7508        }
7509        return NamenodeWebHdfsMethods.getRemoteIp();
7510      }
7511      
7512      // optimize ugi lookup for RPC operations to avoid a trip through
7513      // UGI.getCurrentUser which is synch'ed
7514      private static UserGroupInformation getRemoteUser() throws IOException {
7515        return NameNode.getRemoteUser();
7516      }
7517      
7518      /**
7519       * Log fsck event in the audit log 
7520       */
7521      void logFsckEvent(String src, InetAddress remoteAddress) throws IOException {
7522        if (isAuditEnabled()) {
7523          logAuditEvent(true, getRemoteUser(),
7524                        remoteAddress,
7525                        "fsck", src, null, null);
7526        }
7527      }
7528      /**
7529       * Register NameNodeMXBean
7530       */
7531      private void registerMXBean() {
7532        mxbeanName = MBeans.register("NameNode", "NameNodeInfo", this);
7533      }
7534    
7535      /**
7536       * Class representing Namenode information for JMX interfaces
7537       */
7538      @Override // NameNodeMXBean
7539      public String getVersion() {
7540        return VersionInfo.getVersion() + ", r" + VersionInfo.getRevision();
7541      }
7542    
7543      @Override // NameNodeMXBean
7544      public long getUsed() {
7545        return this.getCapacityUsed();
7546      }
7547    
7548      @Override // NameNodeMXBean
7549      public long getFree() {
7550        return this.getCapacityRemaining();
7551      }
7552    
7553      @Override // NameNodeMXBean
7554      public long getTotal() {
7555        return this.getCapacityTotal();
7556      }
7557    
7558      @Override // NameNodeMXBean
7559      public String getSafemode() {
7560        if (!this.isInSafeMode())
7561          return "";
7562        return "Safe mode is ON. " + this.getSafeModeTip();
7563      }
7564    
7565      @Override // NameNodeMXBean
7566      public boolean isUpgradeFinalized() {
7567        return this.getFSImage().isUpgradeFinalized();
7568      }
7569    
7570      @Override // NameNodeMXBean
7571      public long getNonDfsUsedSpace() {
7572        return datanodeStatistics.getCapacityUsedNonDFS();
7573      }
7574    
7575      @Override // NameNodeMXBean
7576      public float getPercentUsed() {
7577        return datanodeStatistics.getCapacityUsedPercent();
7578      }
7579    
7580      @Override // NameNodeMXBean
7581      public long getBlockPoolUsedSpace() {
7582        return datanodeStatistics.getBlockPoolUsed();
7583      }
7584    
7585      @Override // NameNodeMXBean
7586      public float getPercentBlockPoolUsed() {
7587        return datanodeStatistics.getPercentBlockPoolUsed();
7588      }
7589    
7590      @Override // NameNodeMXBean
7591      public float getPercentRemaining() {
7592        return datanodeStatistics.getCapacityRemainingPercent();
7593      }
7594    
7595      @Override // NameNodeMXBean
7596      public long getCacheCapacity() {
7597        return datanodeStatistics.getCacheCapacity();
7598      }
7599    
7600      @Override // NameNodeMXBean
7601      public long getCacheUsed() {
7602        return datanodeStatistics.getCacheUsed();
7603      }
7604    
7605      @Override // NameNodeMXBean
7606      public long getTotalBlocks() {
7607        return getBlocksTotal();
7608      }
7609    
7610      @Override // NameNodeMXBean
7611      @Metric
7612      public long getTotalFiles() {
7613        return getFilesTotal();
7614      }
7615    
7616      @Override // NameNodeMXBean
7617      public long getNumberOfMissingBlocks() {
7618        return getMissingBlocksCount();
7619      }
7620      
7621      @Override // NameNodeMXBean
7622      public int getThreads() {
7623        return ManagementFactory.getThreadMXBean().getThreadCount();
7624      }
7625    
7626      /**
7627       * Returned information is a JSON representation of map with host name as the
7628       * key and value is a map of live node attribute keys to its values
7629       */
7630      @Override // NameNodeMXBean
7631      public String getLiveNodes() {
7632        final Map<String, Map<String,Object>> info = 
7633          new HashMap<String, Map<String,Object>>();
7634        final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
7635        blockManager.getDatanodeManager().fetchDatanodes(live, null, true);
7636        for (DatanodeDescriptor node : live) {
7637          Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder()
7638              .put("infoAddr", node.getInfoAddr())
7639              .put("infoSecureAddr", node.getInfoSecureAddr())
7640              .put("xferaddr", node.getXferAddr())
7641              .put("lastContact", getLastContact(node))
7642              .put("usedSpace", getDfsUsed(node))
7643              .put("adminState", node.getAdminState().toString())
7644              .put("nonDfsUsedSpace", node.getNonDfsUsed())
7645              .put("capacity", node.getCapacity())
7646              .put("numBlocks", node.numBlocks())
7647              .put("version", node.getSoftwareVersion())
7648              .put("used", node.getDfsUsed())
7649              .put("remaining", node.getRemaining())
7650              .put("blockScheduled", node.getBlocksScheduled())
7651              .put("blockPoolUsed", node.getBlockPoolUsed())
7652              .put("blockPoolUsedPercent", node.getBlockPoolUsedPercent())
7653              .put("volfails", node.getVolumeFailures())
7654              .build();
7655    
7656          info.put(node.getHostName(), innerinfo);
7657        }
7658        return JSON.toString(info);
7659      }
7660    
7661      /**
7662       * Returned information is a JSON representation of map with host name as the
7663       * key and value is a map of dead node attribute keys to its values
7664       */
7665      @Override // NameNodeMXBean
7666      public String getDeadNodes() {
7667        final Map<String, Map<String, Object>> info = 
7668          new HashMap<String, Map<String, Object>>();
7669        final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
7670        blockManager.getDatanodeManager().fetchDatanodes(null, dead, true);
7671        for (DatanodeDescriptor node : dead) {
7672          Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder()
7673              .put("lastContact", getLastContact(node))
7674              .put("decommissioned", node.isDecommissioned())
7675              .put("xferaddr", node.getXferAddr())
7676              .build();
7677          info.put(node.getHostName(), innerinfo);
7678        }
7679        return JSON.toString(info);
7680      }
7681    
7682      /**
7683       * Returned information is a JSON representation of map with host name as the
7684       * key and value is a map of decommissioning node attribute keys to its
7685       * values
7686       */
7687      @Override // NameNodeMXBean
7688      public String getDecomNodes() {
7689        final Map<String, Map<String, Object>> info = 
7690          new HashMap<String, Map<String, Object>>();
7691        final List<DatanodeDescriptor> decomNodeList = blockManager.getDatanodeManager(
7692            ).getDecommissioningNodes();
7693        for (DatanodeDescriptor node : decomNodeList) {
7694          Map<String, Object> innerinfo = ImmutableMap
7695              .<String, Object> builder()
7696              .put("xferaddr", node.getXferAddr())
7697              .put("underReplicatedBlocks",
7698                  node.decommissioningStatus.getUnderReplicatedBlocks())
7699              .put("decommissionOnlyReplicas",
7700                  node.decommissioningStatus.getDecommissionOnlyReplicas())
7701              .put("underReplicateInOpenFiles",
7702                  node.decommissioningStatus.getUnderReplicatedInOpenFiles())
7703              .build();
7704          info.put(node.getHostName(), innerinfo);
7705        }
7706        return JSON.toString(info);
7707      }
7708    
7709      private long getLastContact(DatanodeDescriptor alivenode) {
7710        return (Time.now() - alivenode.getLastUpdate())/1000;
7711      }
7712    
7713      private long getDfsUsed(DatanodeDescriptor alivenode) {
7714        return alivenode.getDfsUsed();
7715      }
7716    
7717      @Override  // NameNodeMXBean
7718      public String getClusterId() {
7719        return getFSImage().getStorage().getClusterID();
7720      }
7721      
7722      @Override  // NameNodeMXBean
7723      public String getBlockPoolId() {
7724        return blockPoolId;
7725      }
7726      
7727      @Override  // NameNodeMXBean
7728      public String getNameDirStatuses() {
7729        Map<String, Map<File, StorageDirType>> statusMap =
7730          new HashMap<String, Map<File, StorageDirType>>();
7731        
7732        Map<File, StorageDirType> activeDirs = new HashMap<File, StorageDirType>();
7733        for (Iterator<StorageDirectory> it
7734            = getFSImage().getStorage().dirIterator(); it.hasNext();) {
7735          StorageDirectory st = it.next();
7736          activeDirs.put(st.getRoot(), st.getStorageDirType());
7737        }
7738        statusMap.put("active", activeDirs);
7739        
7740        List<Storage.StorageDirectory> removedStorageDirs
7741            = getFSImage().getStorage().getRemovedStorageDirs();
7742        Map<File, StorageDirType> failedDirs = new HashMap<File, StorageDirType>();
7743        for (StorageDirectory st : removedStorageDirs) {
7744          failedDirs.put(st.getRoot(), st.getStorageDirType());
7745        }
7746        statusMap.put("failed", failedDirs);
7747        
7748        return JSON.toString(statusMap);
7749      }
7750    
7751      @Override // NameNodeMXBean
7752      public String getNodeUsage() {
7753        float median = 0;
7754        float max = 0;
7755        float min = 0;
7756        float dev = 0;
7757    
7758        final Map<String, Map<String,Object>> info =
7759            new HashMap<String, Map<String,Object>>();
7760        final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
7761        blockManager.getDatanodeManager().fetchDatanodes(live, null, true);
7762    
7763        if (live.size() > 0) {
7764          float totalDfsUsed = 0;
7765          float[] usages = new float[live.size()];
7766          int i = 0;
7767          for (DatanodeDescriptor dn : live) {
7768            usages[i++] = dn.getDfsUsedPercent();
7769            totalDfsUsed += dn.getDfsUsedPercent();
7770          }
7771          totalDfsUsed /= live.size();
7772          Arrays.sort(usages);
7773          median = usages[usages.length / 2];
7774          max = usages[usages.length - 1];
7775          min = usages[0];
7776    
7777          for (i = 0; i < usages.length; i++) {
7778            dev += (usages[i] - totalDfsUsed) * (usages[i] - totalDfsUsed);
7779          }
7780          dev = (float) Math.sqrt(dev / usages.length);
7781        }
7782    
7783        final Map<String, Object> innerInfo = new HashMap<String, Object>();
7784        innerInfo.put("min", StringUtils.format("%.2f%%", min));
7785        innerInfo.put("median", StringUtils.format("%.2f%%", median));
7786        innerInfo.put("max", StringUtils.format("%.2f%%", max));
7787        innerInfo.put("stdDev", StringUtils.format("%.2f%%", dev));
7788        info.put("nodeUsage", innerInfo);
7789    
7790        return JSON.toString(info);
7791      }
7792    
7793      @Override  // NameNodeMXBean
7794      public String getNameJournalStatus() {
7795        List<Map<String, String>> jasList = new ArrayList<Map<String, String>>();
7796        FSEditLog log = getFSImage().getEditLog();
7797        if (log != null) {
7798          boolean openForWrite = log.isOpenForWrite();
7799          for (JournalAndStream jas : log.getJournals()) {
7800            final Map<String, String> jasMap = new HashMap<String, String>();
7801            String manager = jas.getManager().toString();
7802    
7803            jasMap.put("required", String.valueOf(jas.isRequired()));
7804            jasMap.put("disabled", String.valueOf(jas.isDisabled()));
7805            jasMap.put("manager", manager);
7806    
7807            if (jas.isDisabled()) {
7808              jasMap.put("stream", "Failed");
7809            } else if (openForWrite) {
7810              EditLogOutputStream elos = jas.getCurrentStream();
7811              if (elos != null) {
7812                jasMap.put("stream", elos.generateReport());
7813              } else {
7814                jasMap.put("stream", "not currently writing");
7815              }
7816            } else {
7817              jasMap.put("stream", "open for read");
7818            }
7819            jasList.add(jasMap);
7820          }
7821        }
7822        return JSON.toString(jasList);
7823      }
7824    
7825      @Override // NameNodeMxBean
7826      public String getJournalTransactionInfo() {
7827        Map<String, String> txnIdMap = new HashMap<String, String>();
7828        txnIdMap.put("LastAppliedOrWrittenTxId",
7829            Long.toString(this.getFSImage().getLastAppliedOrWrittenTxId()));
7830        txnIdMap.put("MostRecentCheckpointTxId",
7831            Long.toString(this.getFSImage().getMostRecentCheckpointTxId()));
7832        return JSON.toString(txnIdMap);
7833      }
7834      
7835      @Override  // NameNodeMXBean
7836      public String getNNStarted() {
7837        return getStartTime().toString();
7838      }
7839    
7840      @Override  // NameNodeMXBean
7841      public String getCompileInfo() {
7842        return VersionInfo.getDate() + " by " + VersionInfo.getUser() +
7843            " from " + VersionInfo.getBranch();
7844      }
7845    
7846      /** @return the block manager. */
7847      public BlockManager getBlockManager() {
7848        return blockManager;
7849      }
7850      /** @return the FSDirectory. */
7851      public FSDirectory getFSDirectory() {
7852        return dir;
7853      }
7854      /** Set the FSDirectory. */
7855      @VisibleForTesting
7856      public void setFSDirectory(FSDirectory dir) {
7857        this.dir = dir;
7858      }
7859      /** @return the cache manager. */
7860      public CacheManager getCacheManager() {
7861        return cacheManager;
7862      }
7863    
7864      @Override  // NameNodeMXBean
7865      public String getCorruptFiles() {
7866        List<String> list = new ArrayList<String>();
7867        Collection<FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks;
7868        try {
7869          corruptFileBlocks = listCorruptFileBlocks("/", null);
7870          int corruptFileCount = corruptFileBlocks.size();
7871          if (corruptFileCount != 0) {
7872            for (FSNamesystem.CorruptFileBlockInfo c : corruptFileBlocks) {
7873              list.add(c.toString());
7874            }
7875          }
7876        } catch (IOException e) {
7877          LOG.warn("Get corrupt file blocks returned error: " + e.getMessage());
7878        }
7879        return JSON.toString(list);
7880      }
7881    
7882      @Override  //NameNodeMXBean
7883      public int getDistinctVersionCount() {
7884        return blockManager.getDatanodeManager().getDatanodesSoftwareVersions()
7885          .size();
7886      }
7887    
7888      @Override  //NameNodeMXBean
7889      public Map<String, Integer> getDistinctVersions() {
7890        return blockManager.getDatanodeManager().getDatanodesSoftwareVersions();
7891      }
7892    
7893      @Override  //NameNodeMXBean
7894      public String getSoftwareVersion() {
7895        return VersionInfo.getVersion();
7896      }
7897    
7898      /**
7899       * Verifies that the given identifier and password are valid and match.
7900       * @param identifier Token identifier.
7901       * @param password Password in the token.
7902       */
7903      public synchronized void verifyToken(DelegationTokenIdentifier identifier,
7904          byte[] password) throws InvalidToken, RetriableException {
7905        try {
7906          getDelegationTokenSecretManager().verifyToken(identifier, password);
7907        } catch (InvalidToken it) {
7908          if (inTransitionToActive()) {
7909            throw new RetriableException(it);
7910          }
7911          throw it;
7912        }
7913      }
7914      
7915      @Override
7916      public boolean isGenStampInFuture(Block block) {
7917        if (isLegacyBlock(block)) {
7918          return block.getGenerationStamp() > getGenerationStampV1();
7919        } else {
7920          return block.getGenerationStamp() > getGenerationStampV2();
7921        }
7922      }
7923    
7924      @VisibleForTesting
7925      public EditLogTailer getEditLogTailer() {
7926        return editLogTailer;
7927      }
7928      
7929      @VisibleForTesting
7930      public void setEditLogTailerForTests(EditLogTailer tailer) {
7931        this.editLogTailer = tailer;
7932      }
7933      
7934      @VisibleForTesting
7935      void setFsLockForTests(ReentrantReadWriteLock lock) {
7936        this.fsLock.coarseLock = lock;
7937      }
7938      
7939      @VisibleForTesting
7940      public ReentrantReadWriteLock getFsLockForTests() {
7941        return fsLock.coarseLock;
7942      }
7943      
7944      @VisibleForTesting
7945      public ReentrantLock getLongReadLockForTests() {
7946        return fsLock.longReadLock;
7947      }
7948    
7949      @VisibleForTesting
7950      public SafeModeInfo getSafeModeInfoForTests() {
7951        return safeMode;
7952      }
7953      
7954      @VisibleForTesting
7955      public void setNNResourceChecker(NameNodeResourceChecker nnResourceChecker) {
7956        this.nnResourceChecker = nnResourceChecker;
7957      }
7958    
7959      @Override
7960      public boolean isAvoidingStaleDataNodesForWrite() {
7961        return this.blockManager.getDatanodeManager()
7962            .shouldAvoidStaleDataNodesForWrite();
7963      }
7964    
7965      @Override // FSClusterStats
7966      public int getNumDatanodesInService() {
7967        return datanodeStatistics.getNumDatanodesInService();
7968      }
7969      
7970      @Override // for block placement strategy
7971      public double getInServiceXceiverAverage() {
7972        double avgLoad = 0;
7973        final int nodes = getNumDatanodesInService();
7974        if (nodes != 0) {
7975          final int xceivers = datanodeStatistics.getInServiceXceiverCount();
7976          avgLoad = (double)xceivers/nodes;
7977        }
7978        return avgLoad;
7979      }
7980    
7981      public SnapshotManager getSnapshotManager() {
7982        return snapshotManager;
7983      }
7984      
7985      /** Allow snapshot on a directory. */
7986      void allowSnapshot(String path) throws SafeModeException, IOException {
7987        checkOperation(OperationCategory.WRITE);
7988        writeLock();
7989        try {
7990          checkOperation(OperationCategory.WRITE);
7991          checkNameNodeSafeMode("Cannot allow snapshot for " + path);
7992          checkSuperuserPrivilege();
7993    
7994          dir.writeLock();
7995          try {
7996            snapshotManager.setSnapshottable(path, true);
7997          } finally {
7998            dir.writeUnlock();
7999          }
8000          getEditLog().logAllowSnapshot(path);
8001        } finally {
8002          writeUnlock();
8003        }
8004        getEditLog().logSync();
8005    
8006        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
8007          logAuditEvent(true, "allowSnapshot", path, null, null);
8008        }
8009      }
8010      
8011      /** Disallow snapshot on a directory. */
8012      void disallowSnapshot(String path) throws SafeModeException, IOException {
8013        checkOperation(OperationCategory.WRITE);
8014        writeLock();
8015        try {
8016          checkOperation(OperationCategory.WRITE);
8017          checkNameNodeSafeMode("Cannot disallow snapshot for " + path);
8018          checkSuperuserPrivilege();
8019    
8020          dir.writeLock();
8021          try {
8022            snapshotManager.resetSnapshottable(path);
8023          } finally {
8024            dir.writeUnlock();
8025          }
8026          getEditLog().logDisallowSnapshot(path);
8027        } finally {
8028          writeUnlock();
8029        }
8030        getEditLog().logSync();
8031        
8032        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
8033          logAuditEvent(true, "disallowSnapshot", path, null, null);
8034        }
8035      }
8036      
8037      /**
8038       * Create a snapshot
8039       * @param snapshotRoot The directory path where the snapshot is taken
8040       * @param snapshotName The name of the snapshot
8041       */
8042      String createSnapshot(String snapshotRoot, String snapshotName)
8043          throws SafeModeException, IOException {
8044        checkOperation(OperationCategory.WRITE);
8045        final FSPermissionChecker pc = getPermissionChecker();
8046        CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
8047            null);
8048        if (cacheEntry != null && cacheEntry.isSuccess()) {
8049          return (String) cacheEntry.getPayload();
8050        }
8051        String snapshotPath = null;
8052        writeLock();
8053        try {
8054          checkOperation(OperationCategory.WRITE);
8055          checkNameNodeSafeMode("Cannot create snapshot for " + snapshotRoot);
8056          if (isPermissionEnabled) {
8057            checkOwner(pc, snapshotRoot);
8058          }
8059    
8060          if (snapshotName == null || snapshotName.isEmpty()) {
8061            snapshotName = Snapshot.generateDefaultSnapshotName();
8062          }
8063          if(snapshotName != null){
8064            if (!DFSUtil.isValidNameForComponent(snapshotName)) {
8065                throw new InvalidPathException("Invalid snapshot name: "
8066                    + snapshotName);
8067            }
8068          }
8069          dir.verifySnapshotName(snapshotName, snapshotRoot);
8070          dir.writeLock();
8071          try {
8072            snapshotPath = snapshotManager.createSnapshot(snapshotRoot, snapshotName);
8073          } finally {
8074            dir.writeUnlock();
8075          }
8076          getEditLog().logCreateSnapshot(snapshotRoot, snapshotName,
8077              cacheEntry != null);
8078        } finally {
8079          writeUnlock();
8080          RetryCache.setState(cacheEntry, snapshotPath != null, snapshotPath);
8081        }
8082        getEditLog().logSync();
8083        
8084        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
8085          logAuditEvent(true, "createSnapshot", snapshotRoot, snapshotPath, null);
8086        }
8087        return snapshotPath;
8088      }
8089      
8090      /**
8091       * Rename a snapshot
8092       * @param path The directory path where the snapshot was taken
8093       * @param snapshotOldName Old snapshot name
8094       * @param snapshotNewName New snapshot name
8095       * @throws SafeModeException
8096       * @throws IOException 
8097       */
8098      void renameSnapshot(String path, String snapshotOldName,
8099          String snapshotNewName) throws SafeModeException, IOException {
8100        checkOperation(OperationCategory.WRITE);
8101        final FSPermissionChecker pc = getPermissionChecker();
8102        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
8103        if (cacheEntry != null && cacheEntry.isSuccess()) {
8104          return; // Return previous response
8105        }
8106        writeLock();
8107        boolean success = false;
8108        try {
8109          checkOperation(OperationCategory.WRITE);
8110          checkNameNodeSafeMode("Cannot rename snapshot for " + path);
8111          if (isPermissionEnabled) {
8112            checkOwner(pc, path);
8113          }
8114          dir.verifySnapshotName(snapshotNewName, path);
8115          
8116          snapshotManager.renameSnapshot(path, snapshotOldName, snapshotNewName);
8117          getEditLog().logRenameSnapshot(path, snapshotOldName, snapshotNewName,
8118              cacheEntry != null);
8119          success = true;
8120        } finally {
8121          writeUnlock();
8122          RetryCache.setState(cacheEntry, success);
8123        }
8124        getEditLog().logSync();
8125        
8126        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
8127          String oldSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotOldName);
8128          String newSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotNewName);
8129          logAuditEvent(true, "renameSnapshot", oldSnapshotRoot, newSnapshotRoot, null);
8130        }
8131      }
8132      
8133      /**
8134       * Get the list of snapshottable directories that are owned 
8135       * by the current user. Return all the snapshottable directories if the 
8136       * current user is a super user.
8137       * @return The list of all the current snapshottable directories
8138       * @throws IOException
8139       */
8140      public SnapshottableDirectoryStatus[] getSnapshottableDirListing()
8141          throws IOException {
8142        SnapshottableDirectoryStatus[] status = null;
8143        checkOperation(OperationCategory.READ);
8144        final FSPermissionChecker checker = getPermissionChecker();
8145        readLock();
8146        try {
8147          checkOperation(OperationCategory.READ);
8148          final String user = checker.isSuperUser()? null : checker.getUser();
8149          status = snapshotManager.getSnapshottableDirListing(user);
8150        } finally {
8151          readUnlock();
8152        }
8153        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
8154          logAuditEvent(true, "listSnapshottableDirectory", null, null, null);
8155        }
8156        return status;
8157      }
8158      
8159      /**
8160       * Get the difference between two snapshots (or between a snapshot and the
8161       * current status) of a snapshottable directory.
8162       * 
8163       * @param path The full path of the snapshottable directory.
8164       * @param fromSnapshot Name of the snapshot to calculate the diff from. Null
8165       *          or empty string indicates the current tree.
8166       * @param toSnapshot Name of the snapshot to calculated the diff to. Null or
8167       *          empty string indicates the current tree.
8168       * @return A report about the difference between {@code fromSnapshot} and 
8169       *         {@code toSnapshot}. Modified/deleted/created/renamed files and 
8170       *         directories belonging to the snapshottable directories are listed 
8171       *         and labeled as M/-/+/R respectively. 
8172       * @throws IOException
8173       */
8174      SnapshotDiffReport getSnapshotDiffReport(String path,
8175          String fromSnapshot, String toSnapshot) throws IOException {
8176        SnapshotDiffReport diffs;
8177        checkOperation(OperationCategory.READ);
8178        final FSPermissionChecker pc = getPermissionChecker();
8179        readLock();
8180        try {
8181          checkOperation(OperationCategory.READ);
8182          if (isPermissionEnabled) {
8183            checkSubtreeReadPermission(pc, path, fromSnapshot);
8184            checkSubtreeReadPermission(pc, path, toSnapshot);
8185          }
8186          diffs = snapshotManager.diff(path, fromSnapshot, toSnapshot);
8187        } finally {
8188          readUnlock();
8189        }
8190    
8191        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
8192          logAuditEvent(true, "computeSnapshotDiff", null, null, null);
8193        }
8194        return diffs;
8195      }
8196      
8197      private void checkSubtreeReadPermission(final FSPermissionChecker pc,
8198          final String snapshottablePath, final String snapshot)
8199              throws AccessControlException, UnresolvedLinkException {
8200        final String fromPath = snapshot == null?
8201            snapshottablePath: Snapshot.getSnapshotPath(snapshottablePath, snapshot);
8202        checkPermission(pc, fromPath, false, null, null, FsAction.READ, FsAction.READ);
8203      }
8204      
8205      /**
8206       * Delete a snapshot of a snapshottable directory
8207       * @param snapshotRoot The snapshottable directory
8208       * @param snapshotName The name of the to-be-deleted snapshot
8209       * @throws SafeModeException
8210       * @throws IOException
8211       */
8212      void deleteSnapshot(String snapshotRoot, String snapshotName)
8213          throws SafeModeException, IOException {
8214        checkOperation(OperationCategory.WRITE);
8215        final FSPermissionChecker pc = getPermissionChecker();
8216        
8217        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
8218        if (cacheEntry != null && cacheEntry.isSuccess()) {
8219          return; // Return previous response
8220        }
8221        boolean success = false;
8222        BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
8223        writeLock();
8224        try {
8225          checkOperation(OperationCategory.WRITE);
8226          checkNameNodeSafeMode("Cannot delete snapshot for " + snapshotRoot);
8227          if (isPermissionEnabled) {
8228            checkOwner(pc, snapshotRoot);
8229          }
8230    
8231          List<INode> removedINodes = new ChunkedArrayList<INode>();
8232          dir.writeLock();
8233          try {
8234            snapshotManager.deleteSnapshot(snapshotRoot, snapshotName,
8235                collectedBlocks, removedINodes);
8236            dir.removeFromInodeMap(removedINodes);
8237          } finally {
8238            dir.writeUnlock();
8239          }
8240          removedINodes.clear();
8241          getEditLog().logDeleteSnapshot(snapshotRoot, snapshotName,
8242              cacheEntry != null);
8243          success = true;
8244        } finally {
8245          writeUnlock();
8246          RetryCache.setState(cacheEntry, success);
8247        }
8248        getEditLog().logSync();
8249    
8250        removeBlocks(collectedBlocks);
8251        collectedBlocks.clear();
8252    
8253        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
8254          String rootPath = Snapshot.getSnapshotPath(snapshotRoot, snapshotName);
8255          logAuditEvent(true, "deleteSnapshot", rootPath, null, null);
8256        }
8257      }
8258    
8259      /**
8260       * Remove a list of INodeDirectorySnapshottable from the SnapshotManager
8261       * @param toRemove the list of INodeDirectorySnapshottable to be removed
8262       */
8263      void removeSnapshottableDirs(List<INodeDirectory> toRemove) {
8264        if (snapshotManager != null) {
8265          snapshotManager.removeSnapshottable(toRemove);
8266        }
8267      }
8268    
8269      RollingUpgradeInfo queryRollingUpgrade() throws IOException {
8270        checkSuperuserPrivilege();
8271        checkOperation(OperationCategory.READ);
8272        readLock();
8273        try {
8274          if (rollingUpgradeInfo != null) {
8275            boolean hasRollbackImage = this.getFSImage().hasRollbackFSImage();
8276            rollingUpgradeInfo.setCreatedRollbackImages(hasRollbackImage);
8277          }
8278          return rollingUpgradeInfo;
8279        } finally {
8280          readUnlock();
8281        }
8282      }
8283    
8284      RollingUpgradeInfo startRollingUpgrade() throws IOException {
8285        checkSuperuserPrivilege();
8286        checkOperation(OperationCategory.WRITE);
8287        writeLock();
8288        try {
8289          checkOperation(OperationCategory.WRITE);
8290          if (isRollingUpgrade()) {
8291            return rollingUpgradeInfo;
8292          }
8293          long startTime = now();
8294          if (!haEnabled) { // for non-HA, we require NN to be in safemode
8295            startRollingUpgradeInternalForNonHA(startTime);
8296          } else { // for HA, NN cannot be in safemode
8297            checkNameNodeSafeMode("Failed to start rolling upgrade");
8298            startRollingUpgradeInternal(startTime);
8299          }
8300    
8301          getEditLog().logStartRollingUpgrade(rollingUpgradeInfo.getStartTime());
8302          if (haEnabled) {
8303            // roll the edit log to make sure the standby NameNode can tail
8304            getFSImage().rollEditLog();
8305          }
8306        } finally {
8307          writeUnlock();
8308        }
8309    
8310        getEditLog().logSync();
8311        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
8312          logAuditEvent(true, "startRollingUpgrade", null, null, null);
8313        }
8314        return rollingUpgradeInfo;
8315      }
8316    
8317      /**
8318       * Update internal state to indicate that a rolling upgrade is in progress.
8319       * @param startTime rolling upgrade start time
8320       */
8321      void startRollingUpgradeInternal(long startTime)
8322          throws IOException {
8323        checkRollingUpgrade("start rolling upgrade");
8324        getFSImage().checkUpgrade(this);
8325        setRollingUpgradeInfo(false, startTime);
8326      }
8327    
8328      /**
8329       * Update internal state to indicate that a rolling upgrade is in progress for
8330       * non-HA setup. This requires the namesystem is in SafeMode and after doing a
8331       * checkpoint for rollback the namesystem will quit the safemode automatically 
8332       */
8333      private void startRollingUpgradeInternalForNonHA(long startTime)
8334          throws IOException {
8335        Preconditions.checkState(!haEnabled);
8336        if (!isInSafeMode()) {
8337          throw new IOException("Safe mode should be turned ON "
8338              + "in order to create namespace image.");
8339        }
8340        checkRollingUpgrade("start rolling upgrade");
8341        getFSImage().checkUpgrade(this);
8342        // in non-HA setup, we do an extra checkpoint to generate a rollback image
8343        getFSImage().saveNamespace(this, NameNodeFile.IMAGE_ROLLBACK, null);
8344        LOG.info("Successfully saved namespace for preparing rolling upgrade.");
8345    
8346        // leave SafeMode automatically
8347        setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
8348        setRollingUpgradeInfo(true, startTime);
8349      }
8350    
8351      void setRollingUpgradeInfo(boolean createdRollbackImages, long startTime) {
8352        rollingUpgradeInfo = new RollingUpgradeInfo(blockPoolId,
8353            createdRollbackImages, startTime, 0L);
8354      }
8355    
8356      public void setCreatedRollbackImages(boolean created) {
8357        if (rollingUpgradeInfo != null) {
8358          rollingUpgradeInfo.setCreatedRollbackImages(created);
8359        }
8360      }
8361    
8362      public RollingUpgradeInfo getRollingUpgradeInfo() {
8363        return rollingUpgradeInfo;
8364      }
8365    
8366      public boolean isNeedRollbackFsImage() {
8367        return needRollbackFsImage;
8368      }
8369    
8370      public void setNeedRollbackFsImage(boolean needRollbackFsImage) {
8371        this.needRollbackFsImage = needRollbackFsImage;
8372      }
8373    
8374      @Override  // NameNodeMXBean
8375      public RollingUpgradeInfo.Bean getRollingUpgradeStatus() {
8376        readLock();
8377        try {
8378          RollingUpgradeInfo upgradeInfo = getRollingUpgradeInfo();
8379          if (upgradeInfo != null) {
8380            return new RollingUpgradeInfo.Bean(upgradeInfo);
8381          }
8382          return null;
8383        } finally {
8384          readUnlock();
8385        }
8386      }
8387    
8388      /** Is rolling upgrade in progress? */
8389      public boolean isRollingUpgrade() {
8390        return rollingUpgradeInfo != null;
8391      }
8392    
8393      void checkRollingUpgrade(String action) throws RollingUpgradeException {
8394        if (isRollingUpgrade()) {
8395          throw new RollingUpgradeException("Failed to " + action
8396              + " since a rolling upgrade is already in progress."
8397              + " Existing rolling upgrade info:\n" + rollingUpgradeInfo);
8398        }
8399      }
8400    
8401      void finalizeRollingUpgrade() throws IOException {
8402        checkSuperuserPrivilege();
8403        checkOperation(OperationCategory.WRITE);
8404        writeLock();
8405        final RollingUpgradeInfo returnInfo;
8406        try {
8407          checkOperation(OperationCategory.WRITE);
8408          if (!isRollingUpgrade()) {
8409            return;
8410          }
8411          checkNameNodeSafeMode("Failed to finalize rolling upgrade");
8412    
8413          returnInfo = finalizeRollingUpgradeInternal(now());
8414          getEditLog().logFinalizeRollingUpgrade(returnInfo.getFinalizeTime());
8415          if (haEnabled) {
8416            // roll the edit log to make sure the standby NameNode can tail
8417            getFSImage().rollEditLog();
8418          }
8419          getFSImage().updateStorageVersion();
8420          getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK,
8421              NameNodeFile.IMAGE);
8422        } finally {
8423          writeUnlock();
8424        }
8425    
8426        if (!haEnabled) {
8427          // Sync not needed for ha since the edit was rolled after logging.
8428          getEditLog().logSync();
8429        }
8430    
8431        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
8432          logAuditEvent(true, "finalizeRollingUpgrade", null, null, null);
8433        }
8434        return;
8435      }
8436    
8437      RollingUpgradeInfo finalizeRollingUpgradeInternal(long finalizeTime)
8438          throws RollingUpgradeException {
8439        final long startTime = rollingUpgradeInfo.getStartTime();
8440        rollingUpgradeInfo = null;
8441        return new RollingUpgradeInfo(blockPoolId, false, startTime, finalizeTime);
8442      }
8443    
8444      long addCacheDirective(CacheDirectiveInfo directive, EnumSet<CacheFlag> flags)
8445          throws IOException {
8446        checkOperation(OperationCategory.WRITE);
8447        final FSPermissionChecker pc = isPermissionEnabled ?
8448            getPermissionChecker() : null;
8449        CacheEntryWithPayload cacheEntry =
8450            RetryCache.waitForCompletion(retryCache, null);
8451        if (cacheEntry != null && cacheEntry.isSuccess()) {
8452          return (Long) cacheEntry.getPayload();
8453        }
8454        boolean success = false;
8455        if (!flags.contains(CacheFlag.FORCE)) {
8456          cacheManager.waitForRescanIfNeeded();
8457        }
8458        writeLock();
8459        String effectiveDirectiveStr = null;
8460        Long result = null;
8461        try {
8462          checkOperation(OperationCategory.WRITE);
8463          if (isInSafeMode()) {
8464            throw new SafeModeException(
8465                "Cannot add cache directive", safeMode);
8466          }
8467          if (directive.getId() != null) {
8468            throw new IOException("addDirective: you cannot specify an ID " +
8469                "for this operation.");
8470          }
8471          CacheDirectiveInfo effectiveDirective =
8472              cacheManager.addDirective(directive, pc, flags);
8473          getEditLog().logAddCacheDirectiveInfo(effectiveDirective,
8474              cacheEntry != null);
8475          result = effectiveDirective.getId();
8476          effectiveDirectiveStr = effectiveDirective.toString();
8477          success = true;
8478        } finally {
8479          writeUnlock();
8480          if (success) {
8481            getEditLog().logSync();
8482          }
8483          if (isAuditEnabled() && isExternalInvocation()) {
8484            logAuditEvent(success, "addCacheDirective", effectiveDirectiveStr, null, null);
8485          }
8486          RetryCache.setState(cacheEntry, success, result);
8487        }
8488        return result;
8489      }
8490    
8491      void modifyCacheDirective(CacheDirectiveInfo directive,
8492          EnumSet<CacheFlag> flags) throws IOException {
8493        checkOperation(OperationCategory.WRITE);
8494        final FSPermissionChecker pc = isPermissionEnabled ?
8495            getPermissionChecker() : null;
8496        boolean success = false;
8497        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
8498        if (cacheEntry != null && cacheEntry.isSuccess()) {
8499          return;
8500        }
8501        if (!flags.contains(CacheFlag.FORCE)) {
8502          cacheManager.waitForRescanIfNeeded();
8503        }
8504        writeLock();
8505        try {
8506          checkOperation(OperationCategory.WRITE);
8507          if (isInSafeMode()) {
8508            throw new SafeModeException(
8509                "Cannot add cache directive", safeMode);
8510          }
8511          cacheManager.modifyDirective(directive, pc, flags);
8512          getEditLog().logModifyCacheDirectiveInfo(directive,
8513              cacheEntry != null);
8514          success = true;
8515        } finally {
8516          writeUnlock();
8517          if (success) {
8518            getEditLog().logSync();
8519          }
8520          if (isAuditEnabled() && isExternalInvocation()) {
8521            String idStr = "{id: " + directive.getId().toString() + "}";
8522            logAuditEvent(success, "modifyCacheDirective", idStr, directive.toString(), null);
8523          }
8524          RetryCache.setState(cacheEntry, success);
8525        }
8526      }
8527    
8528      void removeCacheDirective(Long id) throws IOException {
8529        checkOperation(OperationCategory.WRITE);
8530        final FSPermissionChecker pc = isPermissionEnabled ?
8531            getPermissionChecker() : null;
8532        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
8533        if (cacheEntry != null && cacheEntry.isSuccess()) {
8534          return;
8535        }
8536        boolean success = false;
8537        writeLock();
8538        try {
8539          checkOperation(OperationCategory.WRITE);
8540          if (isInSafeMode()) {
8541            throw new SafeModeException(
8542                "Cannot remove cache directives", safeMode);
8543          }
8544          cacheManager.removeDirective(id, pc);
8545          getEditLog().logRemoveCacheDirectiveInfo(id, cacheEntry != null);
8546          success = true;
8547        } finally {
8548          writeUnlock();
8549          if (isAuditEnabled() && isExternalInvocation()) {
8550            String idStr = "{id: " + id.toString() + "}";
8551            logAuditEvent(success, "removeCacheDirective", idStr, null,
8552                null);
8553          }
8554          RetryCache.setState(cacheEntry, success);
8555        }
8556        getEditLog().logSync();
8557      }
8558    
8559      BatchedListEntries<CacheDirectiveEntry> listCacheDirectives(
8560          long startId, CacheDirectiveInfo filter) throws IOException {
8561        checkOperation(OperationCategory.READ);
8562        final FSPermissionChecker pc = isPermissionEnabled ?
8563            getPermissionChecker() : null;
8564        BatchedListEntries<CacheDirectiveEntry> results;
8565        cacheManager.waitForRescanIfNeeded();
8566        readLock();
8567        boolean success = false;
8568        try {
8569          checkOperation(OperationCategory.READ);
8570          results =
8571              cacheManager.listCacheDirectives(startId, filter, pc);
8572          success = true;
8573        } finally {
8574          readUnlock();
8575          if (isAuditEnabled() && isExternalInvocation()) {
8576            logAuditEvent(success, "listCacheDirectives", filter.toString(), null,
8577                null);
8578          }
8579        }
8580        return results;
8581      }
8582    
8583      public void addCachePool(CachePoolInfo req) throws IOException {
8584        checkOperation(OperationCategory.WRITE);
8585        final FSPermissionChecker pc = isPermissionEnabled ?
8586            getPermissionChecker() : null;
8587        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
8588        if (cacheEntry != null && cacheEntry.isSuccess()) {
8589          return; // Return previous response
8590        }
8591        writeLock();
8592        boolean success = false;
8593        String poolInfoStr = null;
8594        try {
8595          checkOperation(OperationCategory.WRITE);
8596          if (isInSafeMode()) {
8597            throw new SafeModeException(
8598                "Cannot add cache pool " + req.getPoolName(), safeMode);
8599          }
8600          if (pc != null) {
8601            pc.checkSuperuserPrivilege();
8602          }
8603          CachePoolInfo info = cacheManager.addCachePool(req);
8604          poolInfoStr = info.toString();
8605          getEditLog().logAddCachePool(info, cacheEntry != null);
8606          success = true;
8607        } finally {
8608          writeUnlock();
8609          if (isAuditEnabled() && isExternalInvocation()) {
8610            logAuditEvent(success, "addCachePool", poolInfoStr, null, null);
8611          }
8612          RetryCache.setState(cacheEntry, success);
8613        }
8614        
8615        getEditLog().logSync();
8616      }
8617    
8618      public void modifyCachePool(CachePoolInfo req) throws IOException {
8619        checkOperation(OperationCategory.WRITE);
8620        final FSPermissionChecker pc =
8621            isPermissionEnabled ? getPermissionChecker() : null;
8622        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
8623        if (cacheEntry != null && cacheEntry.isSuccess()) {
8624          return; // Return previous response
8625        }
8626        writeLock();
8627        boolean success = false;
8628        try {
8629          checkOperation(OperationCategory.WRITE);
8630          if (isInSafeMode()) {
8631            throw new SafeModeException(
8632                "Cannot modify cache pool " + req.getPoolName(), safeMode);
8633          }
8634          if (pc != null) {
8635            pc.checkSuperuserPrivilege();
8636          }
8637          cacheManager.modifyCachePool(req);
8638          getEditLog().logModifyCachePool(req, cacheEntry != null);
8639          success = true;
8640        } finally {
8641          writeUnlock();
8642          if (isAuditEnabled() && isExternalInvocation()) {
8643            String poolNameStr = "{poolName: " + req.getPoolName() + "}";
8644            logAuditEvent(success, "modifyCachePool", poolNameStr, req.toString(), null);
8645          }
8646          RetryCache.setState(cacheEntry, success);
8647        }
8648    
8649        getEditLog().logSync();
8650      }
8651    
8652      public void removeCachePool(String cachePoolName) throws IOException {
8653        checkOperation(OperationCategory.WRITE);
8654        final FSPermissionChecker pc =
8655            isPermissionEnabled ? getPermissionChecker() : null;
8656        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
8657        if (cacheEntry != null && cacheEntry.isSuccess()) {
8658          return; // Return previous response
8659        }
8660        writeLock();
8661        boolean success = false;
8662        try {
8663          checkOperation(OperationCategory.WRITE);
8664          if (isInSafeMode()) {
8665            throw new SafeModeException(
8666                "Cannot remove cache pool " + cachePoolName, safeMode);
8667          }
8668          if (pc != null) {
8669            pc.checkSuperuserPrivilege();
8670          }
8671          cacheManager.removeCachePool(cachePoolName);
8672          getEditLog().logRemoveCachePool(cachePoolName, cacheEntry != null);
8673          success = true;
8674        } finally {
8675          writeUnlock();
8676          if (isAuditEnabled() && isExternalInvocation()) {
8677            String poolNameStr = "{poolName: " + cachePoolName + "}";
8678            logAuditEvent(success, "removeCachePool", poolNameStr, null, null);
8679          }
8680          RetryCache.setState(cacheEntry, success);
8681        }
8682        
8683        getEditLog().logSync();
8684      }
8685    
8686      public BatchedListEntries<CachePoolEntry> listCachePools(String prevKey)
8687          throws IOException {
8688        final FSPermissionChecker pc =
8689            isPermissionEnabled ? getPermissionChecker() : null;
8690        BatchedListEntries<CachePoolEntry> results;
8691        checkOperation(OperationCategory.READ);
8692        boolean success = false;
8693        cacheManager.waitForRescanIfNeeded();
8694        readLock();
8695        try {
8696          checkOperation(OperationCategory.READ);
8697          results = cacheManager.listCachePools(pc, prevKey);
8698          success = true;
8699        } finally {
8700          readUnlock();
8701          if (isAuditEnabled() && isExternalInvocation()) {
8702            logAuditEvent(success, "listCachePools", null, null, null);
8703          }
8704        }
8705        return results;
8706      }
8707    
8708      void modifyAclEntries(final String srcArg, List<AclEntry> aclSpec)
8709          throws IOException {
8710        String src = srcArg;
8711        nnConf.checkAclsConfigFlag();
8712        HdfsFileStatus resultingStat = null;
8713        FSPermissionChecker pc = getPermissionChecker();
8714        checkOperation(OperationCategory.WRITE);
8715        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
8716        writeLock();
8717        try {
8718          checkOperation(OperationCategory.WRITE);
8719          checkNameNodeSafeMode("Cannot modify ACL entries on " + src);
8720          src = resolvePath(src, pathComponents);
8721          checkOwner(pc, src);
8722          List<AclEntry> newAcl = dir.modifyAclEntries(src, aclSpec);
8723          getEditLog().logSetAcl(src, newAcl);
8724          resultingStat = getAuditFileInfo(src, false);
8725        } catch (AccessControlException e) {
8726          logAuditEvent(false, "modifyAclEntries", srcArg);
8727          throw e;
8728        } finally {
8729          writeUnlock();
8730        }
8731        getEditLog().logSync();
8732        logAuditEvent(true, "modifyAclEntries", srcArg, null, resultingStat);
8733      }
8734    
8735      void removeAclEntries(final String srcArg, List<AclEntry> aclSpec)
8736          throws IOException {
8737        String src = srcArg;
8738        nnConf.checkAclsConfigFlag();
8739        HdfsFileStatus resultingStat = null;
8740        FSPermissionChecker pc = getPermissionChecker();
8741        checkOperation(OperationCategory.WRITE);
8742        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
8743        writeLock();
8744        try {
8745          checkOperation(OperationCategory.WRITE);
8746          checkNameNodeSafeMode("Cannot remove ACL entries on " + src);
8747          src = resolvePath(src, pathComponents);
8748          checkOwner(pc, src);
8749          List<AclEntry> newAcl = dir.removeAclEntries(src, aclSpec);
8750          getEditLog().logSetAcl(src, newAcl);
8751          resultingStat = getAuditFileInfo(src, false);
8752        } catch (AccessControlException e) {
8753          logAuditEvent(false, "removeAclEntries", srcArg);
8754          throw e;
8755        } finally {
8756          writeUnlock();
8757        }
8758        getEditLog().logSync();
8759        logAuditEvent(true, "removeAclEntries", srcArg, null, resultingStat);
8760      }
8761    
8762      void removeDefaultAcl(final String srcArg) throws IOException {
8763        String src = srcArg;
8764        nnConf.checkAclsConfigFlag();
8765        HdfsFileStatus resultingStat = null;
8766        FSPermissionChecker pc = getPermissionChecker();
8767        checkOperation(OperationCategory.WRITE);
8768        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
8769        writeLock();
8770        try {
8771          checkOperation(OperationCategory.WRITE);
8772          checkNameNodeSafeMode("Cannot remove default ACL entries on " + src);
8773          src = resolvePath(src, pathComponents);
8774          checkOwner(pc, src);
8775          List<AclEntry> newAcl = dir.removeDefaultAcl(src);
8776          getEditLog().logSetAcl(src, newAcl);
8777          resultingStat = getAuditFileInfo(src, false);
8778        } catch (AccessControlException e) {
8779          logAuditEvent(false, "removeDefaultAcl", srcArg);
8780          throw e;
8781        } finally {
8782          writeUnlock();
8783        }
8784        getEditLog().logSync();
8785        logAuditEvent(true, "removeDefaultAcl", srcArg, null, resultingStat);
8786      }
8787    
8788      void removeAcl(final String srcArg) throws IOException {
8789        String src = srcArg;
8790        nnConf.checkAclsConfigFlag();
8791        HdfsFileStatus resultingStat = null;
8792        FSPermissionChecker pc = getPermissionChecker();
8793        checkOperation(OperationCategory.WRITE);
8794        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
8795        writeLock();
8796        try {
8797          checkOperation(OperationCategory.WRITE);
8798          checkNameNodeSafeMode("Cannot remove ACL on " + src);
8799          src = resolvePath(src, pathComponents);
8800          checkOwner(pc, src);
8801          dir.removeAcl(src);
8802          getEditLog().logSetAcl(src, AclFeature.EMPTY_ENTRY_LIST);
8803          resultingStat = getAuditFileInfo(src, false);
8804        } catch (AccessControlException e) {
8805          logAuditEvent(false, "removeAcl", srcArg);
8806          throw e;
8807        } finally {
8808          writeUnlock();
8809        }
8810        getEditLog().logSync();
8811        logAuditEvent(true, "removeAcl", srcArg, null, resultingStat);
8812      }
8813    
8814      void setAcl(final String srcArg, List<AclEntry> aclSpec) throws IOException {
8815        String src = srcArg;
8816        nnConf.checkAclsConfigFlag();
8817        HdfsFileStatus resultingStat = null;
8818        FSPermissionChecker pc = getPermissionChecker();
8819        checkOperation(OperationCategory.WRITE);
8820        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
8821        writeLock();
8822        try {
8823          checkOperation(OperationCategory.WRITE);
8824          checkNameNodeSafeMode("Cannot set ACL on " + src);
8825          src = resolvePath(src, pathComponents);
8826          checkOwner(pc, src);
8827          List<AclEntry> newAcl = dir.setAcl(src, aclSpec);
8828          getEditLog().logSetAcl(src, newAcl);
8829          resultingStat = getAuditFileInfo(src, false);
8830        } catch (AccessControlException e) {
8831          logAuditEvent(false, "setAcl", srcArg);
8832          throw e;
8833        } finally {
8834          writeUnlock();
8835        }
8836        getEditLog().logSync();
8837        logAuditEvent(true, "setAcl", srcArg, null, resultingStat);
8838      }
8839    
8840      AclStatus getAclStatus(String src) throws IOException {
8841        nnConf.checkAclsConfigFlag();
8842        FSPermissionChecker pc = getPermissionChecker();
8843        checkOperation(OperationCategory.READ);
8844        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
8845        boolean success = false;
8846        readLock();
8847        try {
8848          checkOperation(OperationCategory.READ);
8849          src = resolvePath(src, pathComponents);
8850          if (isPermissionEnabled) {
8851            checkPermission(pc, src, false, null, null, null, null);
8852          }
8853          final AclStatus ret = dir.getAclStatus(src);
8854          success = true;
8855          return ret;
8856        } finally {
8857          readUnlock();
8858          logAuditEvent(success, "getAclStatus", src);
8859        }
8860      }
8861    
8862      /**
8863       * Create an encryption zone on directory src using the specified key.
8864       *
8865       * @param src     the path of a directory which will be the root of the
8866       *                encryption zone. The directory must be empty.
8867       * @param keyName name of a key which must be present in the configured
8868       *                KeyProvider.
8869       * @throws AccessControlException  if the caller is not the superuser.
8870       * @throws UnresolvedLinkException if the path can't be resolved.
8871       * @throws SafeModeException       if the Namenode is in safe mode.
8872       */
8873      void createEncryptionZone(final String src, final String keyName)
8874        throws IOException, UnresolvedLinkException,
8875          SafeModeException, AccessControlException {
8876        final CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
8877        if (cacheEntry != null && cacheEntry.isSuccess()) {
8878          return; // Return previous response
8879        }
8880    
8881        boolean success = false;
8882        try {
8883          if (provider == null) {
8884            throw new IOException(
8885                "Can't create an encryption zone for " + src +
8886                " since no key provider is available.");
8887          }
8888          if (keyName == null || keyName.isEmpty()) {
8889            throw new IOException("Must specify a key name when creating an " +
8890                "encryption zone");
8891          }
8892          KeyProvider.Metadata metadata = provider.getMetadata(keyName);
8893          if (metadata == null) {
8894            /*
8895             * It would be nice if we threw something more specific than
8896             * IOException when the key is not found, but the KeyProvider API
8897             * doesn't provide for that. If that API is ever changed to throw
8898             * something more specific (e.g. UnknownKeyException) then we can
8899             * update this to match it, or better yet, just rethrow the
8900             * KeyProvider's exception.
8901             */
8902            throw new IOException("Key " + keyName + " doesn't exist.");
8903          }
8904          createEncryptionZoneInt(src, metadata.getCipher(),
8905              keyName, cacheEntry != null);
8906          success = true;
8907        } catch (AccessControlException e) {
8908          logAuditEvent(false, "createEncryptionZone", src);
8909          throw e;
8910        } finally {
8911          RetryCache.setState(cacheEntry, success);
8912        }
8913      }
8914    
8915      private void createEncryptionZoneInt(final String srcArg, String cipher,
8916          String keyName, final boolean logRetryCache) throws IOException {
8917        String src = srcArg;
8918        HdfsFileStatus resultingStat = null;
8919        checkSuperuserPrivilege();
8920        checkOperation(OperationCategory.WRITE);
8921        final byte[][] pathComponents =
8922          FSDirectory.getPathComponentsForReservedPath(src);
8923        writeLock();
8924        try {
8925          checkSuperuserPrivilege();
8926          checkOperation(OperationCategory.WRITE);
8927          checkNameNodeSafeMode("Cannot create encryption zone on " + src);
8928          src = resolvePath(src, pathComponents);
8929    
8930          final CipherSuite suite = CipherSuite.convert(cipher);
8931          // For now this is hardcoded, as we only support one method.
8932          final CryptoProtocolVersion version =
8933              CryptoProtocolVersion.ENCRYPTION_ZONES;
8934          final XAttr ezXAttr = dir.createEncryptionZone(src, suite,
8935              version, keyName);
8936          List<XAttr> xAttrs = Lists.newArrayListWithCapacity(1);
8937          xAttrs.add(ezXAttr);
8938          getEditLog().logSetXAttrs(src, xAttrs, logRetryCache);
8939          resultingStat = getAuditFileInfo(src, false);
8940        } finally {
8941          writeUnlock();
8942        }
8943        getEditLog().logSync();
8944        logAuditEvent(true, "createEncryptionZone", srcArg, null, resultingStat);
8945      }
8946    
8947      /**
8948       * Get the encryption zone for the specified path.
8949       *
8950       * @param srcArg the path of a file or directory to get the EZ for.
8951       * @return the EZ of the of the path or null if none.
8952       * @throws AccessControlException  if the caller is not the superuser.
8953       * @throws UnresolvedLinkException if the path can't be resolved.
8954       */
8955      EncryptionZone getEZForPath(final String srcArg)
8956        throws AccessControlException, UnresolvedLinkException, IOException {
8957        String src = srcArg;
8958        HdfsFileStatus resultingStat = null;
8959        final byte[][] pathComponents =
8960            FSDirectory.getPathComponentsForReservedPath(src);
8961        boolean success = false;
8962        final FSPermissionChecker pc = getPermissionChecker();
8963        checkOperation(OperationCategory.READ);
8964        readLock();
8965        try {
8966          if (isPermissionEnabled) {
8967            checkPathAccess(pc, src, FsAction.READ);
8968          }
8969          checkOperation(OperationCategory.READ);
8970          src = resolvePath(src, pathComponents);
8971          final INodesInPath iip = dir.getINodesInPath(src, true);
8972          final EncryptionZone ret = dir.getEZForPath(iip);
8973          resultingStat = getAuditFileInfo(src, false);
8974          success = true;
8975          return ret;
8976        } finally {
8977          readUnlock();
8978          logAuditEvent(success, "getEZForPath", srcArg, null, resultingStat);
8979        }
8980      }
8981    
8982      BatchedListEntries<EncryptionZone> listEncryptionZones(long prevId)
8983          throws IOException {
8984        boolean success = false;
8985        checkSuperuserPrivilege();
8986        checkOperation(OperationCategory.READ);
8987        readLock();
8988        try {
8989          checkSuperuserPrivilege();
8990          checkOperation(OperationCategory.READ);
8991          final BatchedListEntries<EncryptionZone> ret =
8992              dir.listEncryptionZones(prevId);
8993          success = true;
8994          return ret;
8995        } finally {
8996          readUnlock();
8997          logAuditEvent(success, "listEncryptionZones", null);
8998        }
8999      }
9000    
9001      /**
9002       * Set xattr for a file or directory.
9003       * 
9004       * @param src
9005       *          - path on which it sets the xattr
9006       * @param xAttr
9007       *          - xAttr details to set
9008       * @param flag
9009       *          - xAttrs flags
9010       * @throws AccessControlException
9011       * @throws SafeModeException
9012       * @throws UnresolvedLinkException
9013       * @throws IOException
9014       */
9015      void setXAttr(String src, XAttr xAttr, EnumSet<XAttrSetFlag> flag)
9016          throws AccessControlException, SafeModeException,
9017          UnresolvedLinkException, IOException {
9018        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
9019        if (cacheEntry != null && cacheEntry.isSuccess()) {
9020          return; // Return previous response
9021        }
9022        boolean success = false;
9023        try {
9024          setXAttrInt(src, xAttr, flag, cacheEntry != null);
9025          success = true;
9026        } catch (AccessControlException e) {
9027          logAuditEvent(false, "setXAttr", src);
9028          throw e;
9029        } finally {
9030          RetryCache.setState(cacheEntry, success);
9031        }
9032      }
9033      
9034      private void setXAttrInt(final String srcArg, XAttr xAttr,
9035          EnumSet<XAttrSetFlag> flag, boolean logRetryCache) throws IOException {
9036        String src = srcArg;
9037        nnConf.checkXAttrsConfigFlag();
9038        checkXAttrSize(xAttr);
9039        HdfsFileStatus resultingStat = null;
9040        FSPermissionChecker pc = getPermissionChecker();
9041        XAttrPermissionFilter.checkPermissionForApi(pc, xAttr,
9042            FSDirectory.isReservedRawName(src));
9043        checkOperation(OperationCategory.WRITE);
9044        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
9045        writeLock();
9046        try {
9047          checkOperation(OperationCategory.WRITE);
9048          checkNameNodeSafeMode("Cannot set XAttr on " + src);
9049          src = resolvePath(src, pathComponents);
9050          checkXAttrChangeAccess(src, xAttr, pc);
9051          List<XAttr> xAttrs = Lists.newArrayListWithCapacity(1);
9052          xAttrs.add(xAttr);
9053          dir.setXAttrs(src, xAttrs, flag);
9054          getEditLog().logSetXAttrs(src, xAttrs, logRetryCache);
9055          resultingStat = getAuditFileInfo(src, false);
9056        } finally {
9057          writeUnlock();
9058        }
9059        getEditLog().logSync();
9060        logAuditEvent(true, "setXAttr", srcArg, null, resultingStat);
9061      }
9062    
9063      /**
9064       * Verifies that the combined size of the name and value of an xattr is within
9065       * the configured limit. Setting a limit of zero disables this check.
9066       */
9067      private void checkXAttrSize(XAttr xAttr) {
9068        if (nnConf.xattrMaxSize == 0) {
9069          return;
9070        }
9071        int size = xAttr.getName().getBytes(Charsets.UTF_8).length;
9072        if (xAttr.getValue() != null) {
9073          size += xAttr.getValue().length;
9074        }
9075        if (size > nnConf.xattrMaxSize) {
9076          throw new HadoopIllegalArgumentException(
9077              "The XAttr is too big. The maximum combined size of the"
9078              + " name and value is " + nnConf.xattrMaxSize
9079              + ", but the total size is " + size);
9080        }
9081      }
9082      
9083      List<XAttr> getXAttrs(final String srcArg, List<XAttr> xAttrs)
9084          throws IOException {
9085        String src = srcArg;
9086        nnConf.checkXAttrsConfigFlag();
9087        FSPermissionChecker pc = getPermissionChecker();
9088        final boolean isRawPath = FSDirectory.isReservedRawName(src);
9089        boolean getAll = xAttrs == null || xAttrs.isEmpty();
9090        if (!getAll) {
9091          try {
9092            XAttrPermissionFilter.checkPermissionForApi(pc, xAttrs, isRawPath);
9093          } catch (AccessControlException e) {
9094            logAuditEvent(false, "getXAttrs", srcArg);
9095            throw e;
9096          }
9097        }
9098        checkOperation(OperationCategory.READ);
9099        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
9100        readLock();
9101        try {
9102          src = resolvePath(src, pathComponents);
9103          checkOperation(OperationCategory.READ);
9104          if (isPermissionEnabled) {
9105            checkPathAccess(pc, src, FsAction.READ);
9106          }
9107          List<XAttr> all = dir.getXAttrs(src);
9108          List<XAttr> filteredAll = XAttrPermissionFilter.
9109              filterXAttrsForApi(pc, all, isRawPath);
9110          if (getAll) {
9111            return filteredAll;
9112          } else {
9113            if (filteredAll == null || filteredAll.isEmpty()) {
9114              return null;
9115            }
9116            List<XAttr> toGet = Lists.newArrayListWithCapacity(xAttrs.size());
9117            for (XAttr xAttr : xAttrs) {
9118              boolean foundIt = false;
9119              for (XAttr a : filteredAll) {
9120                if (xAttr.getNameSpace() == a.getNameSpace()
9121                    && xAttr.getName().equals(a.getName())) {
9122                  toGet.add(a);
9123                  foundIt = true;
9124                  break;
9125                }
9126              }
9127              if (!foundIt) {
9128                throw new IOException(
9129                    "At least one of the attributes provided was not found.");
9130            }
9131            }
9132            return toGet;
9133          }
9134        } catch (AccessControlException e) {
9135          logAuditEvent(false, "getXAttrs", srcArg);
9136          throw e;
9137        } finally {
9138          readUnlock();
9139        }
9140      }
9141    
9142      List<XAttr> listXAttrs(String src) throws IOException {
9143        nnConf.checkXAttrsConfigFlag();
9144        final FSPermissionChecker pc = getPermissionChecker();
9145        final boolean isRawPath = FSDirectory.isReservedRawName(src);
9146        checkOperation(OperationCategory.READ);
9147        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
9148        readLock();
9149        try {
9150          src = resolvePath(src, pathComponents);
9151          checkOperation(OperationCategory.READ);
9152          if (isPermissionEnabled) {
9153            /* To access xattr names, you need EXECUTE in the owning directory. */
9154            checkParentAccess(pc, src, FsAction.EXECUTE);
9155          }
9156          final List<XAttr> all = dir.getXAttrs(src);
9157          final List<XAttr> filteredAll = XAttrPermissionFilter.
9158            filterXAttrsForApi(pc, all, isRawPath);
9159          return filteredAll;
9160        } catch (AccessControlException e) {
9161          logAuditEvent(false, "listXAttrs", src);
9162          throw e;
9163        } finally {
9164          readUnlock();
9165        }
9166      }
9167      
9168      /**
9169       * Remove an xattr for a file or directory.
9170       *
9171       * @param src
9172       *          - path to remove the xattr from
9173       * @param xAttr
9174       *          - xAttr to remove
9175       * @throws AccessControlException
9176       * @throws SafeModeException
9177       * @throws UnresolvedLinkException
9178       * @throws IOException
9179       */
9180      void removeXAttr(String src, XAttr xAttr) throws IOException {
9181        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
9182        if (cacheEntry != null && cacheEntry.isSuccess()) {
9183          return; // Return previous response
9184        }
9185        boolean success = false;
9186        try {
9187          removeXAttrInt(src, xAttr, cacheEntry != null);
9188          success = true;
9189        } catch (AccessControlException e) {
9190          logAuditEvent(false, "removeXAttr", src);
9191          throw e;
9192        } finally {
9193          RetryCache.setState(cacheEntry, success);
9194        }
9195      }
9196    
9197      void removeXAttrInt(final String srcArg, XAttr xAttr, boolean logRetryCache)
9198          throws IOException {
9199        String src = srcArg;
9200        nnConf.checkXAttrsConfigFlag();
9201        HdfsFileStatus resultingStat = null;
9202        FSPermissionChecker pc = getPermissionChecker();
9203        XAttrPermissionFilter.checkPermissionForApi(pc, xAttr,
9204            FSDirectory.isReservedRawName(src));
9205        checkOperation(OperationCategory.WRITE);
9206        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
9207        writeLock();
9208        try {
9209          checkOperation(OperationCategory.WRITE);
9210          checkNameNodeSafeMode("Cannot remove XAttr entry on " + src);
9211          src = resolvePath(src, pathComponents);
9212          checkXAttrChangeAccess(src, xAttr, pc);
9213    
9214          List<XAttr> xAttrs = Lists.newArrayListWithCapacity(1);
9215          xAttrs.add(xAttr);
9216          List<XAttr> removedXAttrs = dir.removeXAttrs(src, xAttrs);
9217          if (removedXAttrs != null && !removedXAttrs.isEmpty()) {
9218            getEditLog().logRemoveXAttrs(src, removedXAttrs, logRetryCache);
9219          } else {
9220            throw new IOException(
9221                "No matching attributes found for remove operation");
9222          }
9223          resultingStat = getAuditFileInfo(src, false);
9224        } finally {
9225          writeUnlock();
9226        }
9227        getEditLog().logSync();
9228        logAuditEvent(true, "removeXAttr", srcArg, null, resultingStat);
9229      }
9230    
9231      private void checkXAttrChangeAccess(String src, XAttr xAttr,
9232          FSPermissionChecker pc) throws UnresolvedLinkException,
9233          AccessControlException {
9234        if (isPermissionEnabled && xAttr.getNameSpace() == XAttr.NameSpace.USER) {
9235          final INode inode = dir.getINode(src);
9236          if (inode != null &&
9237              inode.isDirectory() &&
9238              inode.getFsPermission().getStickyBit()) {
9239            if (!pc.isSuperUser()) {
9240              checkOwner(pc, src);
9241            }
9242          } else {
9243            checkPathAccess(pc, src, FsAction.WRITE);
9244          }
9245        }
9246      }
9247    
9248      void checkAccess(String src, FsAction mode) throws AccessControlException,
9249          FileNotFoundException, UnresolvedLinkException, IOException {
9250        checkOperation(OperationCategory.READ);
9251        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
9252        readLock();
9253        try {
9254          checkOperation(OperationCategory.READ);
9255          src = FSDirectory.resolvePath(src, pathComponents, dir);
9256          if (dir.getINode(src) == null) {
9257            throw new FileNotFoundException("Path not found");
9258          }
9259          if (isPermissionEnabled) {
9260            FSPermissionChecker pc = getPermissionChecker();
9261            checkPathAccess(pc, src, mode);
9262          }
9263        } catch (AccessControlException e) {
9264          logAuditEvent(false, "checkAccess", src);
9265          throw e;
9266        } finally {
9267          readUnlock();
9268        }
9269      }
9270    
9271      /**
9272       * Default AuditLogger implementation; used when no access logger is
9273       * defined in the config file. It can also be explicitly listed in the
9274       * config file.
9275       */
9276      private static class DefaultAuditLogger extends HdfsAuditLogger {
9277    
9278        private boolean logTokenTrackingId;
9279    
9280        @Override
9281        public void initialize(Configuration conf) {
9282          logTokenTrackingId = conf.getBoolean(
9283              DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY,
9284              DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT);
9285        }
9286    
9287        @Override
9288        public void logAuditEvent(boolean succeeded, String userName,
9289            InetAddress addr, String cmd, String src, String dst,
9290            FileStatus status, UserGroupInformation ugi,
9291            DelegationTokenSecretManager dtSecretManager) {
9292          if (auditLog.isInfoEnabled()) {
9293            final StringBuilder sb = auditBuffer.get();
9294            sb.setLength(0);
9295            sb.append("allowed=").append(succeeded).append("\t");
9296            sb.append("ugi=").append(userName).append("\t");
9297            sb.append("ip=").append(addr).append("\t");
9298            sb.append("cmd=").append(cmd).append("\t");
9299            sb.append("src=").append(src).append("\t");
9300            sb.append("dst=").append(dst).append("\t");
9301            if (null == status) {
9302              sb.append("perm=null");
9303            } else {
9304              sb.append("perm=");
9305              sb.append(status.getOwner()).append(":");
9306              sb.append(status.getGroup()).append(":");
9307              sb.append(status.getPermission());
9308            }
9309            if (logTokenTrackingId) {
9310              sb.append("\t").append("trackingId=");
9311              String trackingId = null;
9312              if (ugi != null && dtSecretManager != null
9313                  && ugi.getAuthenticationMethod() == AuthenticationMethod.TOKEN) {
9314                for (TokenIdentifier tid: ugi.getTokenIdentifiers()) {
9315                  if (tid instanceof DelegationTokenIdentifier) {
9316                    DelegationTokenIdentifier dtid =
9317                        (DelegationTokenIdentifier)tid;
9318                    trackingId = dtSecretManager.getTokenTrackingId(dtid);
9319                    break;
9320                  }
9321                }
9322              }
9323              sb.append(trackingId);
9324            }
9325            sb.append("\t").append("proto=");
9326            sb.append(NamenodeWebHdfsMethods.isWebHdfsInvocation() ? "webhdfs" : "rpc");
9327            logAuditMessage(sb.toString());
9328          }
9329        }
9330    
9331        public void logAuditMessage(String message) {
9332          auditLog.info(message);
9333        }
9334      }
9335    
9336      private static void enableAsyncAuditLog() {
9337        if (!(auditLog instanceof Log4JLogger)) {
9338          LOG.warn("Log4j is required to enable async auditlog");
9339          return;
9340        }
9341        Logger logger = ((Log4JLogger)auditLog).getLogger();
9342        @SuppressWarnings("unchecked")
9343        List<Appender> appenders = Collections.list(logger.getAllAppenders());
9344        // failsafe against trying to async it more than once
9345        if (!appenders.isEmpty() && !(appenders.get(0) instanceof AsyncAppender)) {
9346          AsyncAppender asyncAppender = new AsyncAppender();
9347          // change logger to have an async appender containing all the
9348          // previously configured appenders
9349          for (Appender appender : appenders) {
9350            logger.removeAppender(appender);
9351            asyncAppender.addAppender(appender);
9352          }
9353          logger.addAppender(asyncAppender);        
9354        }
9355      }
9356    }
9357