001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.util;
020    
021    import java.io.DataInputStream;
022    import java.io.DataOutputStream;
023    import java.io.IOException;
024    import java.nio.ByteBuffer;
025    import java.util.zip.Checksum;
026    
027    import org.apache.hadoop.classification.InterfaceAudience;
028    import org.apache.hadoop.classification.InterfaceStability;
029    import org.apache.hadoop.fs.ChecksumException;
030    
031    /**
032     * This class provides inteface and utilities for processing checksums for
033     * DFS data transfers.
034     */
035    @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
036    @InterfaceStability.Evolving
037    public class DataChecksum implements Checksum {
038      
039      // Misc constants
040      public static final int HEADER_LEN = 5; /// 1 byte type and 4 byte len
041      
042      // checksum types
043      public static final int CHECKSUM_NULL    = 0;
044      public static final int CHECKSUM_CRC32   = 1;
045      public static final int CHECKSUM_CRC32C  = 2;
046      public static final int CHECKSUM_DEFAULT = 3; 
047      public static final int CHECKSUM_MIXED   = 4;
048     
049      /** The checksum types */
050      public static enum Type {
051        NULL  (CHECKSUM_NULL, 0),
052        CRC32 (CHECKSUM_CRC32, 4),
053        CRC32C(CHECKSUM_CRC32C, 4),
054        DEFAULT(CHECKSUM_DEFAULT, 0), // This cannot be used to create DataChecksum
055        MIXED (CHECKSUM_MIXED, 0); // This cannot be used to create DataChecksum
056    
057        public final int id;
058        public final int size;
059        
060        private Type(int id, int size) {
061          this.id = id;
062          this.size = size;
063        }
064    
065        /** @return the type corresponding to the id. */
066        public static Type valueOf(int id) {
067          if (id < 0 || id >= values().length) {
068            throw new IllegalArgumentException("id=" + id
069                + " out of range [0, " + values().length + ")");
070          }
071          return values()[id];
072        }
073      }
074    
075    
076      public static DataChecksum newDataChecksum(Type type, int bytesPerChecksum ) {
077        if ( bytesPerChecksum <= 0 ) {
078          return null;
079        }
080        
081        switch ( type ) {
082        case NULL :
083          return new DataChecksum(type, new ChecksumNull(), bytesPerChecksum );
084        case CRC32 :
085          return new DataChecksum(type, new PureJavaCrc32(), bytesPerChecksum );
086        case CRC32C:
087          return new DataChecksum(type, new PureJavaCrc32C(), bytesPerChecksum);
088        default:
089          return null;  
090        }
091      }
092      
093      /**
094       * Creates a DataChecksum from HEADER_LEN bytes from arr[offset].
095       * @return DataChecksum of the type in the array or null in case of an error.
096       */
097      public static DataChecksum newDataChecksum( byte bytes[], int offset ) {
098        if ( offset < 0 || bytes.length < offset + HEADER_LEN ) {
099          return null;
100        }
101        
102        // like readInt():
103        int bytesPerChecksum = ( (bytes[offset+1] & 0xff) << 24 ) | 
104                               ( (bytes[offset+2] & 0xff) << 16 ) |
105                               ( (bytes[offset+3] & 0xff) << 8 )  |
106                               ( (bytes[offset+4] & 0xff) );
107        return newDataChecksum( Type.valueOf(bytes[offset]), bytesPerChecksum );
108      }
109      
110      /**
111       * This constructucts a DataChecksum by reading HEADER_LEN bytes from
112       * input stream <i>in</i>
113       */
114      public static DataChecksum newDataChecksum( DataInputStream in )
115                                     throws IOException {
116        int type = in.readByte();
117        int bpc = in.readInt();
118        DataChecksum summer = newDataChecksum(Type.valueOf(type), bpc );
119        if ( summer == null ) {
120          throw new IOException( "Could not create DataChecksum of type " +
121                                 type + " with bytesPerChecksum " + bpc );
122        }
123        return summer;
124      }
125      
126      /**
127       * Writes the checksum header to the output stream <i>out</i>.
128       */
129      public void writeHeader( DataOutputStream out ) 
130                               throws IOException { 
131        out.writeByte( type.id );
132        out.writeInt( bytesPerChecksum );
133      }
134    
135      public byte[] getHeader() {
136        byte[] header = new byte[DataChecksum.HEADER_LEN];
137        header[0] = (byte) (type.id & 0xff);
138        // Writing in buffer just like DataOutput.WriteInt()
139        header[1+0] = (byte) ((bytesPerChecksum >>> 24) & 0xff);
140        header[1+1] = (byte) ((bytesPerChecksum >>> 16) & 0xff);
141        header[1+2] = (byte) ((bytesPerChecksum >>> 8) & 0xff);
142        header[1+3] = (byte) (bytesPerChecksum & 0xff);
143        return header;
144      }
145      
146      /**
147       * Writes the current checksum to the stream.
148       * If <i>reset</i> is true, then resets the checksum.
149       * @return number of bytes written. Will be equal to getChecksumSize();
150       */
151       public int writeValue( DataOutputStream out, boolean reset )
152                              throws IOException {
153         if ( type.size <= 0 ) {
154           return 0;
155         }
156    
157         if ( type.size == 4 ) {
158           out.writeInt( (int) summer.getValue() );
159         } else {
160           throw new IOException( "Unknown Checksum " + type );
161         }
162         
163         if ( reset ) {
164           reset();
165         }
166         
167         return type.size;
168       }
169       
170       /**
171        * Writes the current checksum to a buffer.
172        * If <i>reset</i> is true, then resets the checksum.
173        * @return number of bytes written. Will be equal to getChecksumSize();
174        */
175        public int writeValue( byte[] buf, int offset, boolean reset )
176                               throws IOException {
177          if ( type.size <= 0 ) {
178            return 0;
179          }
180    
181          if ( type.size == 4 ) {
182            int checksum = (int) summer.getValue();
183            buf[offset+0] = (byte) ((checksum >>> 24) & 0xff);
184            buf[offset+1] = (byte) ((checksum >>> 16) & 0xff);
185            buf[offset+2] = (byte) ((checksum >>> 8) & 0xff);
186            buf[offset+3] = (byte) (checksum & 0xff);
187          } else {
188            throw new IOException( "Unknown Checksum " + type );
189          }
190          
191          if ( reset ) {
192            reset();
193          }
194          
195          return type.size;
196        }
197       
198       /**
199        * Compares the checksum located at buf[offset] with the current checksum.
200        * @return true if the checksum matches and false otherwise.
201        */
202       public boolean compare( byte buf[], int offset ) {
203         if ( type.size == 4 ) {
204           int checksum = ( (buf[offset+0] & 0xff) << 24 ) | 
205                          ( (buf[offset+1] & 0xff) << 16 ) |
206                          ( (buf[offset+2] & 0xff) << 8 )  |
207                          ( (buf[offset+3] & 0xff) );
208           return checksum == (int) summer.getValue();
209         }
210         return type.size == 0;
211       }
212       
213      private final Type type;
214      private final Checksum summer;
215      private final int bytesPerChecksum;
216      private int inSum = 0;
217      
218      private DataChecksum( Type type, Checksum checksum, int chunkSize ) {
219        this.type = type;
220        summer = checksum;
221        bytesPerChecksum = chunkSize;
222      }
223      
224      // Accessors
225      public Type getChecksumType() {
226        return type;
227      }
228      public int getChecksumSize() {
229        return type.size;
230      }
231      public int getBytesPerChecksum() {
232        return bytesPerChecksum;
233      }
234      public int getNumBytesInSum() {
235        return inSum;
236      }
237      
238      public static final int SIZE_OF_INTEGER = Integer.SIZE / Byte.SIZE;
239      static public int getChecksumHeaderSize() {
240        return 1 + SIZE_OF_INTEGER; // type byte, bytesPerChecksum int
241      }
242      //Checksum Interface. Just a wrapper around member summer.
243      @Override
244      public long getValue() {
245        return summer.getValue();
246      }
247      @Override
248      public void reset() {
249        summer.reset();
250        inSum = 0;
251      }
252      @Override
253      public void update( byte[] b, int off, int len ) {
254        if ( len > 0 ) {
255          summer.update( b, off, len );
256          inSum += len;
257        }
258      }
259      @Override
260      public void update( int b ) {
261        summer.update( b );
262        inSum += 1;
263      }
264      
265      /**
266       * Verify that the given checksums match the given data.
267       * 
268       * The 'mark' of the ByteBuffer parameters may be modified by this function,.
269       * but the position is maintained.
270       *  
271       * @param data the DirectByteBuffer pointing to the data to verify.
272       * @param checksums the DirectByteBuffer pointing to a series of stored
273       *                  checksums
274       * @param fileName the name of the file being read, for error-reporting
275       * @param basePos the file position to which the start of 'data' corresponds
276       * @throws ChecksumException if the checksums do not match
277       */
278      public void verifyChunkedSums(ByteBuffer data, ByteBuffer checksums,
279          String fileName, long basePos)
280      throws ChecksumException {
281        if (type.size == 0) return;
282        
283        if (data.hasArray() && checksums.hasArray()) {
284          verifyChunkedSums(
285              data.array(), data.arrayOffset() + data.position(), data.remaining(),
286              checksums.array(), checksums.arrayOffset() + checksums.position(),
287              fileName, basePos);
288          return;
289        }
290        if (NativeCrc32.isAvailable()) {
291          NativeCrc32.verifyChunkedSums(bytesPerChecksum, type.id, checksums, data,
292              fileName, basePos);
293          return;
294        }
295        
296        int startDataPos = data.position();
297        data.mark();
298        checksums.mark();
299        try {
300          byte[] buf = new byte[bytesPerChecksum];
301          byte[] sum = new byte[type.size];
302          while (data.remaining() > 0) {
303            int n = Math.min(data.remaining(), bytesPerChecksum);
304            checksums.get(sum);
305            data.get(buf, 0, n);
306            summer.reset();
307            summer.update(buf, 0, n);
308            int calculated = (int)summer.getValue();
309            int stored = (sum[0] << 24 & 0xff000000) |
310              (sum[1] << 16 & 0xff0000) |
311              (sum[2] << 8 & 0xff00) |
312              sum[3] & 0xff;
313            if (calculated != stored) {
314              long errPos = basePos + data.position() - startDataPos - n;
315              throw new ChecksumException(
316                  "Checksum error: "+ fileName + " at "+ errPos +
317                  " exp: " + stored + " got: " + calculated, errPos);
318            }
319          }
320        } finally {
321          data.reset();
322          checksums.reset();
323        }
324      }
325      
326      /**
327       * Implementation of chunked verification specifically on byte arrays. This
328       * is to avoid the copy when dealing with ByteBuffers that have array backing.
329       */
330      private void verifyChunkedSums(
331          byte[] data, int dataOff, int dataLen,
332          byte[] checksums, int checksumsOff, String fileName,
333          long basePos) throws ChecksumException {
334        
335        int remaining = dataLen;
336        int dataPos = 0;
337        while (remaining > 0) {
338          int n = Math.min(remaining, bytesPerChecksum);
339          
340          summer.reset();
341          summer.update(data, dataOff + dataPos, n);
342          dataPos += n;
343          remaining -= n;
344          
345          int calculated = (int)summer.getValue();
346          int stored = (checksums[checksumsOff] << 24 & 0xff000000) |
347            (checksums[checksumsOff + 1] << 16 & 0xff0000) |
348            (checksums[checksumsOff + 2] << 8 & 0xff00) |
349            checksums[checksumsOff + 3] & 0xff;
350          checksumsOff += 4;
351          if (calculated != stored) {
352            long errPos = basePos + dataPos - n;
353            throw new ChecksumException(
354                "Checksum error: "+ fileName + " at "+ errPos +
355                " exp: " + stored + " got: " + calculated, errPos);
356          }
357        }
358      }
359    
360      /**
361       * Calculate checksums for the given data.
362       * 
363       * The 'mark' of the ByteBuffer parameters may be modified by this function,
364       * but the position is maintained.
365       * 
366       * @param data the DirectByteBuffer pointing to the data to checksum.
367       * @param checksums the DirectByteBuffer into which checksums will be
368       *                  stored. Enough space must be available in this
369       *                  buffer to put the checksums.
370       */
371      public void calculateChunkedSums(ByteBuffer data, ByteBuffer checksums) {
372        if (type.size == 0) return;
373        
374        if (data.hasArray() && checksums.hasArray()) {
375          calculateChunkedSums(data.array(), data.arrayOffset() + data.position(), data.remaining(),
376              checksums.array(), checksums.arrayOffset() + checksums.position());
377          return;
378        }
379        
380        data.mark();
381        checksums.mark();
382        try {
383          byte[] buf = new byte[bytesPerChecksum];
384          while (data.remaining() > 0) {
385            int n = Math.min(data.remaining(), bytesPerChecksum);
386            data.get(buf, 0, n);
387            summer.reset();
388            summer.update(buf, 0, n);
389            checksums.putInt((int)summer.getValue());
390          }
391        } finally {
392          data.reset();
393          checksums.reset();
394        }
395      }
396    
397      /**
398       * Implementation of chunked calculation specifically on byte arrays. This
399       * is to avoid the copy when dealing with ByteBuffers that have array backing.
400       */
401      private void calculateChunkedSums(
402          byte[] data, int dataOffset, int dataLength,
403          byte[] sums, int sumsOffset) {
404    
405        int remaining = dataLength;
406        while (remaining > 0) {
407          int n = Math.min(remaining, bytesPerChecksum);
408          summer.reset();
409          summer.update(data, dataOffset, n);
410          dataOffset += n;
411          remaining -= n;
412          long calculated = summer.getValue();
413          sums[sumsOffset++] = (byte) (calculated >> 24);
414          sums[sumsOffset++] = (byte) (calculated >> 16);
415          sums[sumsOffset++] = (byte) (calculated >> 8);
416          sums[sumsOffset++] = (byte) (calculated);
417        }
418      }
419    
420      @Override
421      public boolean equals(Object other) {
422        if (!(other instanceof DataChecksum)) {
423          return false;
424        }
425        DataChecksum o = (DataChecksum)other;
426        return o.bytesPerChecksum == this.bytesPerChecksum &&
427          o.type == this.type;
428      }
429      
430      @Override
431      public int hashCode() {
432        return (this.type.id + 31) * this.bytesPerChecksum;
433      }
434      
435      @Override
436      public String toString() {
437        return "DataChecksum(type=" + type +
438          ", chunkSize=" + bytesPerChecksum + ")";
439      }
440      
441      /**
442       * This just provides a dummy implimentation for Checksum class
443       * This is used when there is no checksum available or required for 
444       * data
445       */
446      static class ChecksumNull implements Checksum {
447        
448        public ChecksumNull() {}
449        
450        //Dummy interface
451        @Override
452        public long getValue() { return 0; }
453        @Override
454        public void reset() {}
455        @Override
456        public void update(byte[] b, int off, int len) {}
457        @Override
458        public void update(int b) {}
459      };
460    }