Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

Let's make Hama interfaces with BDD(Behavior Driven Development) style.

Flat file to Matrix Conversion

We need Input/Output Formatters which convert Text File/Sequence File to Matrix.

For example,

No Format
 

Vector

No Format

/**
 * Basic vector interface.
 */
public interface Vector {

  /**
   * Size of the vector
   * 
   * @return size of the vector
   */
  public int size();

  /**
   * Gets the value of index
   * 
   * @param index
   * @return v(index)
   */
  public double get(int index);

  /**
   * Sets the value of index
   * 
   * @param index
   * @param value
   */
  public void setmap(intLongWritable indexkey, double value);

  /**
   * Sets the vector
   * 
   * @param v
   * @return x = v
   */
  public Vector set(Vector v);

  /**
   * x = alpha * v
   * 
   * @param alpha
   * @param v
   * @return x = alpha * v
   */
  public Vector set(double alpha, Vector v);
  
  /**
   * Adds the value to v(index)
   * 
   * @param index
   * @param value
   */
  public void add(int index, double value);

  /**
   * x = alpha*v + x
   * 
   * @param alpha
   * @param v
   * @return x = alpha*v + x
   */
  public Vector add(double alpha, Vector v);

  /**
   * x = v + x
   * 
   * @param v
   * @return x = v + x
   */
  public Vector add(Vector v);

  /**
   * x dot v
   * 
   * @param v
   * @return x dot v
   */
  public double dot(Vector v);

  /**
   * v = alpha*v 
   * 
   * @param alpha
   * @return v = alpha*v
   */
  public Vector scale(double alpha);
  
  /**
   * Returns a sub-vector.
   * 
   * @param i0 the index of the first element
   * @param i1 the index of the last element
   * @return v[i0:i1]
   */
  public Vector subVector( int i0, int i1 ); 
  
  /**
   * Computes the given norm of the vector
   * 
   * @param type
   * @return norm of the vector
   */
  public double norm(Norm type);

  /**
   * Supported vector-norms.
   */
  enum Norm {

    /** Sum of the absolute values of the entries */
    One,

    /** The root of sum of squares */
    Two,

    /** The robust norm of the vector */
    TwoRobust,

    /** Largest entry in absolute value */
    Infinity
  }

  /**
   * Returns an iterator
   * 
   * @return iterator
   */
  public Iterator<Writable> iterator();
  
  /**
   * Returns the {@link org.apache.hadoop.io.MapWritable}
   * 
   * @return the entries of vector
   */
  public MapWritable getEntries();
}

Matrix

...

Text value,
    OutputCollector<ImmutableBytesWritable, VectorWritable> output, Reporter reporter)
    throws IOException {
      
    String line = value.toString();

    /* Do something  */

    output.collect(rowKey, vector);
  }

  public void reduce(ImmutableBytesWritable key, Iterator<VectorWritable> values,
      OutputCollector<ImmutableBytesWritable, BatchUpdate> output,
      Reporter reporter) throws IOException {

    BatchUpdate batchObj = new BatchUpdate(key.get());
    VectorDatum vector = values.next();
    for (Map.Entry<byte[], Cell> f : vector.entrySet()) {
      batchObj.put(f.getKey(), f.getValue().getValue());
    }

    output.collect(key, batchObj);
  }

Matrix Input/Outpu Formatters