Let's make Hama interfaces with BDD(Behavior Driven Development) style.
Flat file to Matrix Conversion
We need Input/Output Formatters which convert Text File/Sequence File to Matrix.
For example,
No Format |
---|
|
Vector
No Format |
---|
/** * Basic vector interface. */ public interface Vector { /** * Size of the vector * * @return size of the vector */ public int size(); /** * Gets the value of index * * @param index * @return v(index) */ public double get(int index); /** * Sets the value of index * * @param index * @param value */ public void setmap(intLongWritable indexkey, double value); /** * Sets the vector * * @param v * @return x = v */ public Vector set(Vector v); /** * x = alpha * v * * @param alpha * @param v * @return x = alpha * v */ public Vector set(double alpha, Vector v); /** * Adds the value to v(index) * * @param index * @param value */ public void add(int index, double value); /** * x = alpha*v + x * * @param alpha * @param v * @return x = alpha*v + x */ public Vector add(double alpha, Vector v); /** * x = v + x * * @param v * @return x = v + x */ public Vector add(Vector v); /** * x dot v * * @param v * @return x dot v */ public double dot(Vector v); /** * v = alpha*v * * @param alpha * @return v = alpha*v */ public Vector scale(double alpha); /** * Returns a sub-vector. * * @param i0 the index of the first element * @param i1 the index of the last element * @return v[i0:i1] */ public Vector subVector( int i0, int i1 ); /** * Computes the given norm of the vector * * @param type * @return norm of the vector */ public double norm(Norm type); /** * Supported vector-norms. */ enum Norm { /** Sum of the absolute values of the entries */ One, /** The root of sum of squares */ Two, /** The robust norm of the vector */ TwoRobust, /** Largest entry in absolute value */ Infinity } /** * Returns an iterator * * @return iterator */ public Iterator<Writable> iterator(); /** * Returns the {@link org.apache.hadoop.io.MapWritable} * * @return the entries of vector */ public MapWritable getEntries(); } |
Matrix
...
Text value,
OutputCollector<ImmutableBytesWritable, VectorWritable> output, Reporter reporter)
throws IOException {
String line = value.toString();
/* Do something */
output.collect(rowKey, vector);
}
public void reduce(ImmutableBytesWritable key, Iterator<VectorWritable> values,
OutputCollector<ImmutableBytesWritable, BatchUpdate> output,
Reporter reporter) throws IOException {
BatchUpdate batchObj = new BatchUpdate(key.get());
VectorDatum vector = values.next();
for (Map.Entry<byte[], Cell> f : vector.entrySet()) {
batchObj.put(f.getKey(), f.getValue().getValue());
}
output.collect(key, batchObj);
}
|