How TikaDocumentReaderService Impl configuration looks like:


<component>
      <key>org.exoplatform.services.document.DocumentReaderService</key>
      <type>org.exoplatform.services.document.impl.tika.TikaDocumentReaderServiceImpl</type>

      <!-- Old-style document readers -->
      <component-plugins>
         <component-plugin>
            <name>pdf.document.reader</name>
            <set-method>addDocumentReader</set-method>
            <type>org.exoplatform.services.document.impl.PDFDocumentReader</type>
            <description>to read the pdf inputstream</description>
         </component-plugin>

         <component-plugin>
            <name>document.readerMSWord</name>
            <set-method>addDocumentReader</set-method>
            <type>org.exoplatform.services.document.impl.MSWordDocumentReader</type>
            <description>to read the ms word inputstream</description>
         </component-plugin>

         <component-plugin>
            <name>document.readerMSXWord</name>
            <set-method>addDocumentReader</set-method>
            <type>org.exoplatform.services.document.impl.MSXWordDocumentReader</type>
            <description>to read the ms word inputstream</description>
         </component-plugin>

         <component-plugin>
            <name>document.readerMSExcel</name>
            <set-method>addDocumentReader</set-method>
            <type>org.exoplatform.services.document.impl.MSExcelDocumentReader</type>
            <description>to read the ms excel inputstream</description>
         </component-plugin>

         <component-plugin>
            <name>document.readerMSXExcel</name>
            <set-method>addDocumentReader</set-method>
            <type>org.exoplatform.services.document.impl.MSXExcelDocumentReader</type>
            <description>to read the ms excel inputstream</description>
         </component-plugin>

         <component-plugin>
            <name>document.readerMSOutlook</name>
            <set-method>addDocumentReader</set-method>
            <type>org.exoplatform.services.document.impl.MSOutlookDocumentReader</type>
            <description>to read the ms outlook inputstream</description>
         </component-plugin>

         <component-plugin>
            <name>PPTdocument.reader</name>
            <set-method>addDocumentReader</set-method>
            <type>org.exoplatform.services.document.impl.PPTDocumentReader</type>
            <description>to read the ms ppt inputstream</description>
         </component-plugin>

         <component-plugin>
            <name>MSXPPTdocument.reader</name>
            <set-method>addDocumentReader</set-method>
            <type>org.exoplatform.services.document.impl.MSXPPTDocumentReader</type>
            <description>to read the ms pptx inputstream</description>
         </component-plugin>

         <component-plugin>
            <name>document.readerHTML</name>
            <set-method>addDocumentReader</set-method>
            <type>org.exoplatform.services.document.impl.HTMLDocumentReader</type>
            <description>to read the html inputstream</description>
         </component-plugin>

         <component-plugin>
            <name>document.readerXML</name>
            <set-method>addDocumentReader</set-method>
            <type>org.exoplatform.services.document.impl.XMLDocumentReader</type>
            <description>to read the xml inputstream</description>
         </component-plugin>

         <component-plugin>
            <name>TPdocument.reader</name>
            <set-method>addDocumentReader</set-method>
            <type>org.exoplatform.services.document.impl.TextPlainDocumentReader</type>
            <description>to read the plain text inputstream</description>
            <init-params>
               <!--
                  values-param> <name>defaultEncoding</name> <description>description</description> <value>UTF-8</value>
                  </values-param
               -->
            </init-params>
         </component-plugin>

         <component-plugin>
            <name>document.readerOO</name>
            <set-method>addDocumentReader</set-method>
            <type>org.exoplatform.services.document.impl.OpenOfficeDocumentReader</type>
            <description>to read the OO inputstream</description>
         </component-plugin>

      </component-plugins>
      
      <init-params>
        <value-param>
          <name>tika-configuration</name>
          <value>jar:/conf/portal/tika-config.xml</value>
        </value-param>
      </init-params>

   </component>
</configuration>

tika-config.xml example:


<properties>

  <mimeTypeRepository magic="false"/>
  <parsers>

    <parser name="parse-dcxml" class="org.apache.tika.parser.xml.DcXMLParser">
      <mime>application/xml</mime>
      <mime>image/svg+xml</mime>
      <mime>text/xml</mime>
      <mime>application/x-google-gadget</mime>
    </parser>

    <parser name="parse-office" class="org.apache.tika.parser.microsoft.OfficeParser">
      <mime>application/excel</mime>
      <mime>application/xls</mime>
      <mime>application/msworddoc</mime>
      <mime>application/msworddot</mime>
      <mime>application/powerpoint</mime>
      <mime>application/ppt</mime>
      
      <mime>application/x-tika-msoffice</mime>
      <mime>application/msword</mime>
      <mime>application/vnd.ms-excel</mime>
      <mime>application/vnd.ms-excel.sheet.binary.macroenabled.12</mime>
      <mime>application/vnd.ms-powerpoint</mime>
      <mime>application/vnd.visio</mime>
      <mime>application/vnd.ms-outlook</mime>
    </parser>

    <parser name="parse-ooxml" class="org.apache.tika.parser.microsoft.ooxml.OOXMLParser">
      <mime>application/x-tika-ooxml</mime>
      <mime>application/vnd.openxmlformats-package.core-properties+xml</mime>
      <mime>application/vnd.openxmlformats-officedocument.spreadsheetml.sheet</mime>
      <mime>application/vnd.openxmlformats-officedocument.spreadsheetml.template</mime>
      <mime>application/vnd.ms-excel.sheet.macroenabled.12</mime>
      <mime>application/vnd.ms-excel.template.macroenabled.12</mime>
      <mime>application/vnd.ms-excel.addin.macroenabled.12</mime>
      <mime>application/vnd.openxmlformats-officedocument.presentationml.presentation</mime>
      <mime>application/vnd.openxmlformats-officedocument.presentationml.template</mime>
      <mime>application/vnd.openxmlformats-officedocument.presentationml.slideshow</mime>
      <mime>application/vnd.ms-powerpoint.presentation.macroenabled.12</mime>
      <mime>application/vnd.ms-powerpoint.slideshow.macroenabled.12</mime>
      <mime>application/vnd.ms-powerpoint.addin.macroenabled.12</mime>
      <mime>application/vnd.openxmlformats-officedocument.wordprocessingml.document</mime>
      <mime>application/vnd.openxmlformats-officedocument.wordprocessingml.template</mime>
      <mime>application/vnd.ms-word.document.macroenabled.12</mime>
      <mime>application/vnd.ms-word.template.macroenabled.12</mime>
    </parser>

    <parser name="parse-html" class="org.apache.tika.parser.html.HtmlParser">
      <mime>text/html</mime>
    </parser>

    <parser mame="parse-rtf" class="org.apache.tika.parser.rtf.RTFParser">
      <mime>application/rtf</mime>
    </parser>

    <parser name="parse-pdf" class="org.apache.tika.parser.pdf.PDFParser">
      <mime>application/pdf</mime>
    </parser>

    <parser name="parse-txt" class="org.apache.tika.parser.txt.TXTParser">
      <mime>text/plain</mime>
      <mime>script/groovy</mime>
      <mime>application/x-groovy</mime>
      <mime>application/x-javascript</mime>
      <mime>application/javascript</mime>
      <mime>text/javascript</mime>
    </parser>

    <parser name="parse-openoffice" class="org.apache.tika.parser.opendocument.OpenOfficeParser">

      <mime>application/vnd.oasis.opendocument.database</mime>

      <mime>application/vnd.sun.xml.writer</mime>
      <mime>application/vnd.oasis.opendocument.text</mime>
      <mime>application/vnd.oasis.opendocument.graphics</mime>
      <mime>application/vnd.oasis.opendocument.presentation</mime>
      <mime>application/vnd.oasis.opendocument.spreadsheet</mime>
      <mime>application/vnd.oasis.opendocument.chart</mime>
      <mime>application/vnd.oasis.opendocument.image</mime>
      <mime>application/vnd.oasis.opendocument.formula</mime>
      <mime>application/vnd.oasis.opendocument.text-master</mime>
      <mime>application/vnd.oasis.opendocument.text-web</mime>
      <mime>application/vnd.oasis.opendocument.text-template</mime>
      <mime>application/vnd.oasis.opendocument.graphics-template</mime>
      <mime>application/vnd.oasis.opendocument.presentation-template</mime>
      <mime>application/vnd.oasis.opendocument.spreadsheet-template</mime>
      <mime>application/vnd.oasis.opendocument.chart-template</mime>
      <mime>application/vnd.oasis.opendocument.image-template</mime>
      <mime>application/vnd.oasis.opendocument.formula-template</mime>
      <mime>application/x-vnd.oasis.opendocument.text</mime>
      <mime>application/x-vnd.oasis.opendocument.graphics</mime>
      <mime>application/x-vnd.oasis.opendocument.presentation</mime>
      <mime>application/x-vnd.oasis.opendocument.spreadsheet</mime>
      <mime>application/x-vnd.oasis.opendocument.chart</mime>
      <mime>application/x-vnd.oasis.opendocument.image</mime>
      <mime>application/x-vnd.oasis.opendocument.formula</mime>
      <mime>application/x-vnd.oasis.opendocument.text-master</mime>
      <mime>application/x-vnd.oasis.opendocument.text-web</mime>
      <mime>application/x-vnd.oasis.opendocument.text-template</mime>
      <mime>application/x-vnd.oasis.opendocument.graphics-template</mime>
      <mime>application/x-vnd.oasis.opendocument.presentation-template</mime>
      <mime>application/x-vnd.oasis.opendocument.spreadsheet-template</mime>
      <mime>application/x-vnd.oasis.opendocument.chart-template</mime>
      <mime>application/x-vnd.oasis.opendocument.image-template</mime>
      <mime>application/x-vnd.oasis.opendocument.formula-template</mime>
    </parser>

    <parser name="parse-image" class="org.apache.tika.parser.image.ImageParser">
      <mime>image/bmp</mime>
      <mime>image/gif</mime>
      <mime>image/jpeg</mime>
      <mime>image/png</mime>
      <mime>image/tiff</mime>
      <mime>image/vnd.wap.wbmp</mime>
      <mime>image/x-icon</mime>
      <mime>image/x-psd</mime>
      <mime>image/x-xcf</mime>
    </parser>

    <parser name="parse-class" class="org.apache.tika.parser.asm.ClassParser">
      <mime>application/x-tika-java-class</mime>
    </parser>

    <parser name="parse-mp3" class="org.apache.tika.parser.mp3.Mp3Parser">
      <mime>audio/mpeg</mime>
    </parser>

    <parser name="parse-midi" class="org.apache.tika.parser.audio.MidiParser">
      <mime>application/x-midi</mime>
      <mime>audio/midi</mime>
    </parser>

    <parser name="parse-audio" class="org.apache.tika.parser.audio.AudioParser">
      <mime>audio/basic</mime>
      <mime>audio/x-wav</mime>
      <mime>audio/x-aiff</mime>
    </parser>

  </parsers>

</properties>

As you see configuration above, there is both old-style DocumentReaders and new Tika parsers registered.

But MSWordDocumentReader and org.apache.tika.parser.microsoft.OfficeParser both refer to same "application/msword" mimetype, exclaims attentive reader. And he is right. But only one DocumentReader will be fetched.

Old-style DocumentReader registered in configuration become registered into DocumentReaderService. So, mimetypes that is supported by those DocumentReaders will have a registered pair, and user will always fetch this DocumentReaders with getDocumentReader(..) method. Tika configuration will be checked for Parsers only if there is no already registered DocumentReader.