03/31/2013: Example using Accumulo's RegExFilter class
Example using Accumulo's RegExFilter class
package com.affy; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.SortedMap; import java.util.TreeMap; import org.apache.accumulo.core.client.IteratorSetting; import org.apache.accumulo.core.conf.AccumuloConfiguration; import org.apache.accumulo.core.data.ByteSequence; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.iterators.IteratorEnvironment; import org.apache.accumulo.core.iterators.IteratorUtil; import org.apache.accumulo.core.iterators.SortedKeyValueIterator; import org.apache.accumulo.core.iterators.SortedMapIterator; import org.apache.accumulo.core.iterators.system.MapFileIterator; import org.apache.accumulo.core.iterators.user.RegExFilter; import org.apache.accumulo.core.util.CachedConfiguration; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.log4j.Logger; public class AccumuloRegExIteratorPlayground { private final Logger log = Logger.getLogger(AccumuloRegExIteratorPlayground.class); private static final Collection<ByteSequence> EMPTY_COL_FAMS = new ArrayList<ByteSequence>(); public void process() throws IOException { final String regularExpression = "/.*"; final SortedMap<Key, Value> input = new TreeMap<Key, Value>(); input.put(new Key("1111", "2222", "3333", 0), new Value("4444".getBytes())); input.put(new Key("/1111", "2222", "3333", 0), new Value("4444".getBytes())); final RegExFilter rei = new RegExFilter(); IteratorSetting is = new IteratorSetting(1, RegExFilter.class); RegExFilter.setRegexs(is, regularExpression, null, null, null, false); if (!rei.validateOptions(is.getOptions())) { throw new RuntimeException("invalid options."); } rei.init(new SortedMapIterator(input), is.getOptions(), new IteratorEnvironment() { @Override public SortedKeyValueIterator<Key, Value> reserveMapFileReader(String mapFileName) throws IOException { Configuration conf = CachedConfiguration.getInstance(); FileSystem fs = FileSystem.get(conf); return new MapFileIterator(AccumuloConfiguration.getDefaultConfiguration(), fs, mapFileName, conf); } @Override public AccumuloConfiguration getConfig() { return AccumuloConfiguration.getDefaultConfiguration(); } @Override public IteratorUtil.IteratorScope getIteratorScope() { throw new UnsupportedOperationException("Not supported yet."); } @Override public boolean isFullMajorCompaction() { throw new UnsupportedOperationException("Not supported yet."); } @Override public void registerSideChannel(SortedKeyValueIterator<Key, Value> iter) { throw new UnsupportedOperationException("Not supported yet."); } }); rei.seek(new Range(), EMPTY_COL_FAMS, false); while (rei.hasTop()) { final Key key = rei.getTopKey(); final Value value = rei.getTopValue(); log.info(key + " --> " + value); rei.next(); } } public static void main(final String[] args) throws IOException { AccumuloRegExIteratorPlayground driver = new AccumuloRegExIteratorPlayground(); driver.process(); } }
03/30/2013: Testing Your GrepIterator Without Running Accumulo
Testing Your GrepIterator Without Running Accumulo
The pom.xml looks like this:
package com.affy; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Map; import java.util.SortedMap; import java.util.TreeMap; import org.apache.accumulo.core.client.IteratorSetting; import org.apache.accumulo.core.client.impl.MasterClient; import org.apache.accumulo.core.data.ByteSequence; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.iterators.SortedMapIterator; import org.apache.accumulo.core.iterators.user.GrepIterator; import org.apache.log4j.Logger; public class AccumuloGrepIteratorPlayground { private static final Logger log = Logger.getLogger(AccumuloGrepIteratorPlayground.class); private static final CollectionEMPTY_COL_FAMS = new ArrayList (); public static void main(final String[] args) throws IOException { final String term = "/1"; final SortedMap<Key, Value> input = new TreeMap<Key, Value>(); input.put(new Key("1111", "2222", "3333", 0), new Value("4444".getBytes())); input.put(new Key("/1111", "2222", "3333", 0), new Value("4444".getBytes())); final GrepIterator grepIterator = new GrepIterator(); final IteratorSetting iteratorSetting = new IteratorSetting(1, GrepIterator.class); GrepIterator.setTerm(iteratorSetting, term); grepIterator.init(new SortedMapIterator(input), iteratorSetting.getOptions(), null); grepIterator.seek(new Range(), EMPTY_COL_FAMS, false); while (grepIterator.hasTop()) { final Key key = grepIterator.getTopKey(); final Value value = grepIterator.getTopValue(); log.info(key + " --> " + value); grepIterator.next(); } } }
The pom.xml looks like this:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.mycompany</groupId> <artifactId>AccumuloGrepIterator</artifactId> <version>1.0-SNAPSHOT</version> <packaging>jar</packaging> <name>AccumuloGrepIterator</name> <url>http://maven.apache.org</url> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> </properties> <dependencies> <dependency> <groupId>org.apache.accumulo</groupId> <artifactId>accumulo-core</artifactId> <version>1.4.2</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>0.23.6</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>3.8.1</version> <scope>test</scope> </dependency> </dependencies> </project>