001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs.server.blockmanagement;
019    
020    import java.io.IOException;
021    import java.util.ArrayList;
022    import java.util.Iterator;
023    import java.util.List;
024    
025    import org.apache.hadoop.hdfs.protocol.Block;
026    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
027    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState;
028    import org.apache.hadoop.hdfs.server.namenode.NameNode;
029    
030    /**
031     * Represents a block that is currently being constructed.<br>
032     * This is usually the last block of a file opened for write or append.
033     */
034    public class BlockInfoUnderConstruction extends BlockInfo {
035      /** Block state. See {@link BlockUCState} */
036      private BlockUCState blockUCState;
037    
038      /**
039       * Block replicas as assigned when the block was allocated.
040       * This defines the pipeline order.
041       */
042      private List<ReplicaUnderConstruction> replicas;
043    
044      /**
045       * Index of the primary data node doing the recovery. Useful for log
046       * messages.
047       */
048      private int primaryNodeIndex = -1;
049    
050      /**
051       * The new generation stamp, which this block will have
052       * after the recovery succeeds. Also used as a recovery id to identify
053       * the right recovery if any of the abandoned recoveries re-appear.
054       */
055      private long blockRecoveryId = 0;
056    
057      /**
058       * ReplicaUnderConstruction contains information about replicas while
059       * they are under construction.
060       * The GS, the length and the state of the replica is as reported by 
061       * the data-node.
062       * It is not guaranteed, but expected, that data-nodes actually have
063       * corresponding replicas.
064       */
065      static class ReplicaUnderConstruction extends Block {
066        private final DatanodeStorageInfo expectedLocation;
067        private ReplicaState state;
068        private boolean chosenAsPrimary;
069    
070        ReplicaUnderConstruction(Block block,
071                                 DatanodeStorageInfo target,
072                                 ReplicaState state) {
073          super(block);
074          this.expectedLocation = target;
075          this.state = state;
076          this.chosenAsPrimary = false;
077        }
078    
079        /**
080         * Expected block replica location as assigned when the block was allocated.
081         * This defines the pipeline order.
082         * It is not guaranteed, but expected, that the data-node actually has
083         * the replica.
084         */
085        private DatanodeStorageInfo getExpectedStorageLocation() {
086          return expectedLocation;
087        }
088    
089        /**
090         * Get replica state as reported by the data-node.
091         */
092        ReplicaState getState() {
093          return state;
094        }
095    
096        /**
097         * Whether the replica was chosen for recovery.
098         */
099        boolean getChosenAsPrimary() {
100          return chosenAsPrimary;
101        }
102    
103        /**
104         * Set replica state.
105         */
106        void setState(ReplicaState s) {
107          state = s;
108        }
109    
110        /**
111         * Set whether this replica was chosen for recovery.
112         */
113        void setChosenAsPrimary(boolean chosenAsPrimary) {
114          this.chosenAsPrimary = chosenAsPrimary;
115        }
116    
117        /**
118         * Is data-node the replica belongs to alive.
119         */
120        boolean isAlive() {
121          return expectedLocation.getDatanodeDescriptor().isAlive;
122        }
123    
124        @Override // Block
125        public int hashCode() {
126          return super.hashCode();
127        }
128    
129        @Override // Block
130        public boolean equals(Object obj) {
131          // Sufficient to rely on super's implementation
132          return (this == obj) || super.equals(obj);
133        }
134    
135        @Override
136        public String toString() {
137          final StringBuilder b = new StringBuilder(50);
138          appendStringTo(b);
139          return b.toString();
140        }
141        
142        @Override
143        public void appendStringTo(StringBuilder sb) {
144          sb.append("ReplicaUnderConstruction[")
145            .append(expectedLocation)
146            .append("|")
147            .append(state)
148            .append("]");
149        }
150      }
151    
152      /**
153       * Create block and set its state to
154       * {@link BlockUCState#UNDER_CONSTRUCTION}.
155       */
156      public BlockInfoUnderConstruction(Block blk, int replication) {
157        this(blk, replication, BlockUCState.UNDER_CONSTRUCTION, null);
158      }
159    
160      /**
161       * Create a block that is currently being constructed.
162       */
163      public BlockInfoUnderConstruction(Block blk, int replication,
164                                 BlockUCState state,
165                                 DatanodeStorageInfo[] targets) {
166        super(blk, replication);
167        assert getBlockUCState() != BlockUCState.COMPLETE :
168          "BlockInfoUnderConstruction cannot be in COMPLETE state";
169        this.blockUCState = state;
170        setExpectedLocations(targets);
171      }
172    
173      /**
174       * Convert an under construction block to a complete block.
175       * 
176       * @return BlockInfo - a complete block.
177       * @throws IOException if the state of the block 
178       * (the generation stamp and the length) has not been committed by 
179       * the client or it does not have at least a minimal number of replicas 
180       * reported from data-nodes. 
181       */
182      BlockInfo convertToCompleteBlock() throws IOException {
183        assert getBlockUCState() != BlockUCState.COMPLETE :
184          "Trying to convert a COMPLETE block";
185        return new BlockInfo(this);
186      }
187    
188      /** Set expected locations */
189      public void setExpectedLocations(DatanodeStorageInfo[] targets) {
190        int numLocations = targets == null ? 0 : targets.length;
191        this.replicas = new ArrayList<ReplicaUnderConstruction>(numLocations);
192        for(int i = 0; i < numLocations; i++)
193          replicas.add(
194            new ReplicaUnderConstruction(this, targets[i], ReplicaState.RBW));
195      }
196    
197      /**
198       * Create array of expected replica locations
199       * (as has been assigned by chooseTargets()).
200       */
201      public DatanodeStorageInfo[] getExpectedStorageLocations() {
202        int numLocations = replicas == null ? 0 : replicas.size();
203        DatanodeStorageInfo[] storages = new DatanodeStorageInfo[numLocations];
204        for(int i = 0; i < numLocations; i++)
205          storages[i] = replicas.get(i).getExpectedStorageLocation();
206        return storages;
207      }
208    
209      /** Get the number of expected locations */
210      public int getNumExpectedLocations() {
211        return replicas == null ? 0 : replicas.size();
212      }
213    
214      /**
215       * Return the state of the block under construction.
216       * @see BlockUCState
217       */
218      @Override // BlockInfo
219      public BlockUCState getBlockUCState() {
220        return blockUCState;
221      }
222    
223      void setBlockUCState(BlockUCState s) {
224        blockUCState = s;
225      }
226    
227      /** Get block recovery ID */
228      public long getBlockRecoveryId() {
229        return blockRecoveryId;
230      }
231    
232      /**
233       * Process the recorded replicas. When about to commit or finish the
234       * pipeline recovery sort out bad replicas.
235       * @param genStamp  The final generation stamp for the block.
236       */
237      public void setGenerationStampAndVerifyReplicas(long genStamp) {
238        // Set the generation stamp for the block.
239        setGenerationStamp(genStamp);
240        if (replicas == null)
241          return;
242    
243        // Remove the replicas with wrong gen stamp.
244        // The replica list is unchanged.
245        for (ReplicaUnderConstruction r : replicas) {
246          if (genStamp != r.getGenerationStamp()) {
247            r.getExpectedStorageLocation().removeBlock(this);
248            NameNode.blockStateChangeLog.info("BLOCK* Removing stale replica "
249                + "from location: " + r.getExpectedStorageLocation());
250          }
251        }
252      }
253    
254      /**
255       * Commit block's length and generation stamp as reported by the client.
256       * Set block state to {@link BlockUCState#COMMITTED}.
257       * @param block - contains client reported block length and generation 
258       * @throws IOException if block ids are inconsistent.
259       */
260      void commitBlock(Block block) throws IOException {
261        if(getBlockId() != block.getBlockId())
262          throw new IOException("Trying to commit inconsistent block: id = "
263              + block.getBlockId() + ", expected id = " + getBlockId());
264        blockUCState = BlockUCState.COMMITTED;
265        this.set(getBlockId(), block.getNumBytes(), block.getGenerationStamp());
266        // Sort out invalid replicas.
267        setGenerationStampAndVerifyReplicas(block.getGenerationStamp());
268      }
269    
270      /**
271       * Initialize lease recovery for this block.
272       * Find the first alive data-node starting from the previous primary and
273       * make it primary.
274       */
275      public void initializeBlockRecovery(long recoveryId) {
276        setBlockUCState(BlockUCState.UNDER_RECOVERY);
277        blockRecoveryId = recoveryId;
278        if (replicas.size() == 0) {
279          NameNode.blockStateChangeLog.warn("BLOCK*"
280            + " BlockInfoUnderConstruction.initLeaseRecovery:"
281            + " No blocks found, lease removed.");
282        }
283        boolean allLiveReplicasTriedAsPrimary = true;
284        for (int i = 0; i < replicas.size(); i++) {
285          // Check if all replicas have been tried or not.
286          if (replicas.get(i).isAlive()) {
287            allLiveReplicasTriedAsPrimary =
288                (allLiveReplicasTriedAsPrimary && replicas.get(i).getChosenAsPrimary());
289          }
290        }
291        if (allLiveReplicasTriedAsPrimary) {
292          // Just set all the replicas to be chosen whether they are alive or not.
293          for (int i = 0; i < replicas.size(); i++) {
294            replicas.get(i).setChosenAsPrimary(false);
295          }
296        }
297        long mostRecentLastUpdate = 0;
298        ReplicaUnderConstruction primary = null;
299        primaryNodeIndex = -1;
300        for(int i = 0; i < replicas.size(); i++) {
301          // Skip alive replicas which have been chosen for recovery.
302          if (!(replicas.get(i).isAlive() && !replicas.get(i).getChosenAsPrimary())) {
303            continue;
304          }
305          final ReplicaUnderConstruction ruc = replicas.get(i);
306          final long lastUpdate = ruc.getExpectedStorageLocation().getDatanodeDescriptor().getLastUpdate(); 
307          if (lastUpdate > mostRecentLastUpdate) {
308            primaryNodeIndex = i;
309            primary = ruc;
310            mostRecentLastUpdate = lastUpdate;
311          }
312        }
313        if (primary != null) {
314          primary.getExpectedStorageLocation().getDatanodeDescriptor().addBlockToBeRecovered(this);
315          primary.setChosenAsPrimary(true);
316          NameNode.blockStateChangeLog.info("BLOCK* " + this
317            + " recovery started, primary=" + primary);
318        }
319      }
320    
321      void addReplicaIfNotPresent(DatanodeStorageInfo storage,
322                         Block block,
323                         ReplicaState rState) {
324        Iterator<ReplicaUnderConstruction> it = replicas.iterator();
325        while (it.hasNext()) {
326          ReplicaUnderConstruction r = it.next();
327          DatanodeStorageInfo expectedLocation = r.getExpectedStorageLocation();
328          if(expectedLocation == storage) {
329            // Record the gen stamp from the report
330            r.setGenerationStamp(block.getGenerationStamp());
331            return;
332          } else if (expectedLocation != null &&
333                     expectedLocation.getDatanodeDescriptor() ==
334                         storage.getDatanodeDescriptor()) {
335    
336            // The Datanode reported that the block is on a different storage
337            // than the one chosen by BlockPlacementPolicy. This can occur as
338            // we allow Datanodes to choose the target storage. Update our
339            // state by removing the stale entry and adding a new one.
340            it.remove();
341            break;
342          }
343        }
344        replicas.add(new ReplicaUnderConstruction(block, storage, rState));
345      }
346    
347      @Override // BlockInfo
348      // BlockInfoUnderConstruction participates in maps the same way as BlockInfo
349      public int hashCode() {
350        return super.hashCode();
351      }
352    
353      @Override // BlockInfo
354      public boolean equals(Object obj) {
355        // Sufficient to rely on super's implementation
356        return (this == obj) || super.equals(obj);
357      }
358    
359      @Override
360      public String toString() {
361        final StringBuilder b = new StringBuilder(100);
362        appendStringTo(b);
363        return b.toString();
364      }
365    
366      @Override
367      public void appendStringTo(StringBuilder sb) {
368        super.appendStringTo(sb);
369        appendUCParts(sb);
370      }
371    
372      private void appendUCParts(StringBuilder sb) {
373        sb.append("{blockUCState=").append(blockUCState)
374          .append(", primaryNodeIndex=").append(primaryNodeIndex)
375          .append(", replicas=[");
376        if (replicas != null) {
377          Iterator<ReplicaUnderConstruction> iter = replicas.iterator();
378          if (iter.hasNext()) {
379            iter.next().appendStringTo(sb);
380            while (iter.hasNext()) {
381              sb.append(", ");
382              iter.next().appendStringTo(sb);
383            }
384          }
385        }
386        sb.append("]}");
387      }
388    }