You are viewing an old version of this page. View the current version.

Compare with Current View Page History

« Previous Version 3 Next »

Try using VFS to read the content of a compressed (gz) file inside of a tar file. Extract tar file objects. If they are gzip files, decompress them. Any directory structure in the tarfile is not being preserved, the contents are pulled out to the same location regardless of directory hierarchy (for the purposes of this example, all objects in the tar file have unique names, so there are no file name conflicts).

Use a multiple step approach.

  1. extract gzipped file from tar file 2. decompress gzipped content to a temporary directory 3. move decompressed content to desired destination 4. remove temporary directory 5. remove gzipped file

There should be a cleaner, more direct route. Maybe someone more familiar with VFS can post better code.

Conceptually there is a tar file:

archive.tar
 +- tardir/
     +- content.txt.gz

I'd like to end up with an uncompressed file "content.txt". Create this sample archive.tar file with some (unix) commands like:

ls -l > context.txt
gzip content.txt
mkdir tardir
mv content.txt.gz tardir
tar cvf archive.tar tardir
rm -r tardir

For this example the sample archive.tar is located in the /extra/data/tryVfs directory. You can see that hardcoded in the java example below. The content.txt file will be extracted into the same location.

This example uses Maven2. There is a pom.xml to define the project

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>gov.noaa.eds</groupId>
    <artifactId>tryVfs</artifactId>
    <packaging>jar</packaging>
    <version>1.0-SNAPSHOT</version>
    <name>Try apache commons vfs</name>
    <url>http://maven.apache.org</url>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <configuration>
                    <source>1.5</source>
                    <target>1.5</target>
                </configuration>
            </plugin>
            <plugin>
                <!-- Usage: mvn assembly:assembly -->
                <artifactId>maven-assembly-plugin</artifactId>
                <configuration>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                    <archive>
                        <manifest>
                            <mainClass>gov.noaa.eds.tryVfs.MultiStep</mainClass>
                        </manifest>
                    </archive>
                </configuration>
            </plugin>
        </plugins>
    </build>
    <dependencies>
        <dependency>
            <groupId>commons-vfs</groupId>
            <artifactId>commons-vfs</artifactId>
            <version>1.0</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>3.8.1</version>
            <scope>test</scope>
        </dependency>
    </dependencies>
</project>

Content of src/main/java/gov/noaa/eds/tryVfs/MultiStep.java

/*
 * MultiStep.java
 */
package gov.noaa.eds.tryVfs;

import org.apache.commons.vfs.AllFileSelector;
import org.apache.commons.vfs.FileName;
import org.apache.commons.vfs.FileObject;
import org.apache.commons.vfs.FileSystemException;
import org.apache.commons.vfs.FileSystemManager;
import org.apache.commons.vfs.FileType;
import org.apache.commons.vfs.FileTypeSelector;
import org.apache.commons.vfs.VFS;
import org.apache.commons.vfs.provider.local.LocalFile;

/**
 * Try using VFS to read the content of a compressed (gz) file inside of
 * a tar file. Extract tar file objects. If they are gzip files, decompress them.
 * Any directory structure in the tarfile is not being preserved, the contents
 * are pulled out to the same location regardless of directory hierarchy (for
 * the purposes of this example, all objects in the tar file have unique names,
 * so there are no file name conflicts).
 *
 * Use a multiple step approach.
 * 1. extract gzipped file from tar file
 * 2. decompress gzipped content to a temporary directory
 * 3. move decompressed content to desired destination
 * 4. remove temporary directory
 * 5. remove gzipped file
 *
 * There should be a cleaner more direct route, but I haven't discovered it yet.
 * 
 * @author ktanaka
 */
public class MultiStep {
    FileSystemManager fsManager = null;
    static String extractDirname = "/extra/data/tryVfs";
    LocalFile extractDir = null;
    
    /**
     * Extract files from a tar file. If the file extracted is gzipped,
     * decompress it and remove the gzipped version.
     * @param args command line arguments are currently not used
     */
    public static void main( String[] args ) {
        MultiStep msExtract = new MultiStep();
        
        try {
            msExtract.fsManager = VFS.getManager();
        } catch (FileSystemException ex) {
            throw new RuntimeException("failed to get fsManager from VFS", ex);
        }
        
        try {
            msExtract.extractDir = (LocalFile) msExtract.fsManager.resolveFile("file://"
                    + extractDirname);
            if (! msExtract.extractDir.exists()) {
                msExtract.extractDir.createFolder();
            }
        } catch (FileSystemException ex) {
            throw new RuntimeException("failed to prepare extract directory " 
                    + extractDirname, ex);
        }
        
        
        /* Create a tarFile object */
        FileObject tarFile;
        try {
            System.out.println("Resolve tar file:");
            tarFile = msExtract.fsManager.resolveFile(
                    "tar:/extra/data/tryVfs/archive.tar");
            
            FileName tarFileName = tarFile.getName();
            System.out.println("  Path     : " + tarFileName.getPath());
            System.out.println("  URI      : " + tarFileName.getURI());
        } catch (Exception ex) {
            throw new RuntimeException("failed to open tar file ", ex);
        }
        
        /* Work on files inside tarFile */
        FileObject[] children;
        try {
            children = tarFile.getChildren();
        } catch (FileSystemException ex) {
            throw new RuntimeException("failed to get contents of tarfile ", ex);
        }
        
        for (FileObject f : children) {
            msExtract.processChild(f);
        }
        
    } // main( String[] args )
    
    private void processChild(FileObject f) {
        try {
            if (f.getType() == FileType.FOLDER) {
                // Recursively process files in this folder
                FileObject[] children = f.getChildren();
                for (FileObject subfile : children) {
                    processChild(subfile);
                }
            } else {
                FileName fname = f.getName();
                String extractName = new String(this.extractDir.getName() + "/"
                        + fname.getBaseName());
                System.out.println("Extracting " + extractName);
                LocalFile extractFile = (LocalFile) this.fsManager.resolveFile(extractName);
                extractFile.copyFrom(f, new AllFileSelector());
                
                // if the file is gzipped, decompress it
                if (extractFile.getName().getExtension().equals("gz")) {
                    System.out.println("Decompressing " + extractName);
                    String gzName = new String("gz://" + extractFile.getName().getPath());
                    System.out.println("gzName=" + gzName);
                    FileObject gzFile = this.fsManager.resolveFile(gzName);
                    String fileName = extractFile.getName().getBaseName().replaceAll(".gz$", "");
                    
                    // The decompressed path we want
                    String decompName = new String(this.extractDir.getName() + "/" 
                            + fileName);
                    
                    // A temporary Directory
                    String tmpDirname = new String(this.extractDir.getName() + "/" 
                            + fileName + ".tmp");
                    
                    // A temporary file path
                    String tmpFilename = new String(tmpDirname + "/" + fileName);
                    
                    // Some debug lines
                    System.out.println("fileName   =" + fileName);
                    System.out.println("decompName =" + decompName);
                    System.out.println("tmpDirname =" + tmpDirname);
                    System.out.println("tmpFilename=" + tmpFilename);
                    
                    // Extracting from gzip file ends up with a directory containing what
                    // we want.
                    LocalFile tmpDir = (LocalFile) this.fsManager.resolveFile(tmpDirname);
                    tmpDir.copyFrom(gzFile, new FileTypeSelector(FileType.FILE));
                    
                    // Move the uncompressed file to the location desired.
                    LocalFile tmpFile = (LocalFile) this.fsManager.resolveFile(tmpFilename);
                    LocalFile decompFile = (LocalFile) this.fsManager.resolveFile(decompName);
                    tmpFile.moveTo(decompFile);
                    
                    // Delete the temporary directory.
                    tmpDir.delete(new AllFileSelector());
                    
                    // Delete the gzip file now that we have the uncompressed version.
                    // Note that the plain file FileObject (extractFile) is used 
                    // for deleting instead of the gzip FileObject (gzFile).
                    extractFile.delete(new AllFileSelector());
                }
            }
        } catch (FileSystemException ex) {
            ex.printStackTrace();
            throw new RuntimeException("Error working on tarfile object " + f.getName());
        }
    } // processChild(FileObject f)
}

  • No labels