Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.
Comment: Noted a more generic approach noted by Mario Ivankovits

...

The contents of the content.txt and non-gzip.txt files are just a directory listings, dump in anything you want here. For this example the sample archive.tar is located in the /extra/data/tryVfs directory. You can see that hardcoded in the java example below. The content.txt and non-gzip.txt files will be extracted into the same location.

Key

...

Concepts

Building the resolveFile name String

An essential ingredient for this "recipe" is the name argument for the FileSystemManager.resolveFile(String name) method. This is present around See this in the lines defining and using String gzName, line numbers 99-100 101 in the ExtractFromGzipInTar.java code listing below. The important work of connecting to the content.txt file inside the content.txt.gz file inside the archive.tar file is performed by

...

gz:tar:file:///extra/data/tryVfs/archive.tar!/tardir/content.txt.gz!content.txtcontent.txt.gz!content.txt

Generic Drill-down

On line 90 I'm giving special attention to gzip files

No Format

if (extractFile.getName().getExtension().equals("gz"))

and other types of compression like zip and bzip2 (as well as nested archives like jar and tar) will not be expanded. To generically drill down and expand zip, bzip2, jar, tar files to arbitrary depth, eliminate the "gz" specific code and use instead

No Format

if (manager.canCreateFileSystem(extractFile))
{
    FileObject innerFile = manager.createFileSystem(extractFile);
}

pom.xml Project file

This example uses Maven2. There is a pom.xml to define the project

...

No Format
/*
 * ExtractFromGzipInTar.java
 */
package gov.noaa.eds.tryVfs;

import org.apache.commons.vfs.AllFileSelector;
import org.apache.commons.vfs.FileName;
import org.apache.commons.vfs.FileObject;
import org.apache.commons.vfs.FileSystemException;
import org.apache.commons.vfs.FileSystemManager;
import org.apache.commons.vfs.FileType;
import org.apache.commons.vfs.FileTypeSelector;
import org.apache.commons.vfs.VFS;
import org.apache.commons.vfs.provider.local.LocalFile;

/**
 * Try using VFS to read the content of a compressed (gz) file inside of
 * a tar file. Extract tar file objects. If they are gzip files, decompress them.
 * Any directory structure in the tarfile is not being preserved, the contents
 * are pulled out to the same location regardless of directory hierarchy (for
 * the purposes of this example, all objects in the tar file have unique names,
 * so there are no file name conflicts).
 *
 * @author Ken Tanaka
 */
public class ExtractFromGzipInTar 
{
    FileSystemManager fsManager = null;
    static String extractDirname = "/extra/data/tryVfs";
    
    /**
     * Extract files from a tar file. If the file extracted is gzipped,
     * decompress it and remove the gzipped version.
     * @param args command line arguments are currently not used
     */
    public static void main( String[] args )
    {
        ExtractFromGzipInTar extract = new ExtractFromGzipInTar();
        
        try {
            extract.fsManager = VFS.getManager();
        } catch (FileSystemException ex) {
            throw new RuntimeException("failed to get fsManager from VFS", ex);
        }
        
        
        /* Create a tarFile FileObject to connect to the tarfile on disk */
        FileObject tarFile;
        try {
            String tarName = new String("tar:file://" + extractDirname + "/archive.tar");
            System.out.println("Resolve " + tarName);
            tarFile = extract.fsManager.resolveFile(tarName);
            
            FileName tarFileName = tarFile.getName();
            System.out.println("  Path     : " + tarFileName.getPath());
            System.out.println("  URI      : " + tarFileName.getURI());
        } catch (Exception ex) {
            throw new RuntimeException("failed to open tar file ", ex);
        }
        
        /* Work on files inside tarFile */
        FileObject[] children;
        try {
            children = tarFile.getChildren();
        } catch (FileSystemException ex) {
            throw new RuntimeException("failed to get contents of tarfile ", ex);
        }
        
        for (FileObject f : children) {
            extract.processChild(f);
        }
    } // main( String[] args )
    
    private void processChild(FileObject f) {
        try {
            if (f.getType() == FileType.FOLDER) {
                // Recursively process files in this folder
                FileObject[] children = f.getChildren();
                for (FileObject subfile : children) {
                    processChild(subfile);
                }
            } else {
                FileName fname = f.getName();
                String extractName = new String("file://" + extractDirname + "/"
                        + fname.getBaseName());
                System.out.println("Extracting " + extractName);
                LocalFile extractFile = (LocalFile) this.fsManager.resolveFile(extractName);
                
                // if the file is gzipped, decompress it
/* line  90   */          if (extractFile.getName().getExtension().equals("gz")) {
                    System.out.println("Decompressing " + extractName);
                    
                    // The uncompressed filename we seek
                    // content.txt
                    String fileName = extractFile.getName().getBaseName().replaceAll(".gz$", "");
                    
                    // Build the direct path to the uncompressed content of the 
                    // gzip file in the tar file.
                    // gz:tar:file:///archive.tar!/tardir/content.txt.gz!content.txt
/* line             100 */      String gzName = new String("gz:" + fname.getURI() + "!" + fileName);
                    FileObject gzFile = this.fsManager.resolveFile(gzName);
                    
                    // The decompressed path we want
                    String decompName = new String("file://" + extractDirname + "/" 
                            + fileName);
                    LocalFile decompFile = (LocalFile) this.fsManager.resolveFile(decompName);
                    
                    // Some debug lines
                    System.out.println("fileName   =" + fileName);
                    System.out.println("decompName =" + decompName);
                    System.out.println("gzName=" + gzName);
                    
                    // Extracting
                    decompFile.copyFrom(gzFile, new FileTypeSelector(FileType.FILE));
                } else {
                    // just extract the non-gzip file
                    extractFile.copyFrom(f, new AllFileSelector());
                }
            }
        } catch (FileSystemException ex) {
            ex.printStackTrace();
            throw new RuntimeException("Error working on tarfile object " + f.getName());
        }
    } // processChild(FileObject f)
}

...