using avro-tools utility:
avro-tools tojson filename.avro | head -n 10
Using scala :
package org.sample.utils
import org.apache.spark.SparkConf| ==> copy ssh keys between servers (edge <--> neo4j) | |
| http://www.commandlinefu.com/commands/view/188/copy-your-ssh-public-key-to-a-server-from-a-machine-that-doesnt-have-ssh-copy-id | |
| cat /home/user/id_neo4j_rsa.pub | ssh [email protected] 'cat >> ~/.ssh/authorized_keys' | |
| cat /home/user/id_rsa.pub | ssh [email protected] 'cat >> ~/.ssh/authorized_keys' | |
| ==> generate keytab (if not available) | |
| ktutil: addent -password -p [email protected] -k 1 -e rc4-hmac | |
| Password for [email protected]: | |
| ktutil: wkt /home/user/user.keytab |
| import org.apache.tinkerpop.gremlin.structure.T | |
| import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerGraph | |
| import org.apache.tinkerpop.gremlin.structure.Vertex | |
| import org.apache.tinkerpop.gremlin.util.iterator.IteratorUtils | |
| object TinkerpopGraphTest { | |
| def main(args: Array[String]): Unit = { | |
| val graph: org.apache.tinkerpop.gremlin.structure.Graph = TinkerGraph.open() | |
| val marko: Vertex = graph.addVertex(T.label, "person", T.id, "1", "name", "marko", "age", "29") |
| ps -eaf | grep user | grep -v bash | grep -v grep | grep -v "su - " | grep -v someTag| grep -v "ps \-eaf" | |
| #ps -eaf | grep user | grep -v bash | grep -v grep | grep -v "su - " | grep -v someTag| grep -v "ps \-eaf" | awk -F " " '{print $2}' | xargs kill -9 | |
| ps -eaf | grep user | grep -v bash | grep -v grep | grep -v "su - " | grep -v "someTag \-eaf" | awk -F " " '{print $2}' | xargs kill -9 |
| Following are steps for running jupyter on hadoop cluster and connecting to it from local browser | |
| Assuming you have a secured spark cluster created on linux | |
| Assuming you have anaconda installed | |
| ## Steps for setting up Python Virtual Environments | |
| Add conda to path (add following to ~/.bashrc file) | |
| PATH=$PATH:/opt/anaconda/latest/bin/ | |
| export PATH |
| regex101.com | |
| Capture tomcat log | |
| 2013-12-05 21:39:15,813 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - Success! | |
| /^([0-9]{4}-[0-9]{2}-[0-9]{2}\s[0-9:]{1,},[0-9]{3})\s\[([a-zA-Z]+)\]\s([a-zA-Z]{1,})\s+(.*)$/g | |
| Extracting json fields from a file | |
| grep -Po '"text":.*?[^\\]",' <file> | |
| grep -Po '"TRANS_ID":.*?[^\\]",' <file> |
| package org.rsol.log.util | |
| import java.util.zip.GZIPOutputStream | |
| import java.io.ByteArrayOutputStream | |
| import java.util.zip.GZIPInputStream | |
| import java.io.ByteArrayInputStream | |
| import org.apache.commons.io.IOUtils | |
| object GZipUtil extends App { |
| val rdd = loadAvroData(sc,logPath,suffix).map(x => parseKV(x._2, kvPattern_quote).toMap) | |
| convertToDF(sc,rdd) | |
| def convertToDF(sc: SparkContext, rdd:RDD[Map[String,String]]): DataFrame = { | |
| val sqlContext = new SQLContext(sc) | |
| import sqlContext.implicits._ | |
| val fields = Array("name","emp","dept","nick") | |
| val schema = StructType(fields.map { x => StructField(x, StringType) }) | |
| val result = rdd.map(x => (x.getOrElse(fields(0), ""),x.getOrElse(fields(1), ""),x.getOrElse(fields(2), ""),x.getOrElse(fields(3), ""))) | |
| val df = result.toDF(fields:_*) |