Upload
casear-chu
View
115
Download
0
Embed Size (px)
Citation preview
Port
• HDFS: http://localhost:50070/
• Oozie: http://localhost:11000/oozie/v1/admin/status
• Templeton: http://localhost:50111/templeton/v1/status
• ODBC: use port 10000 in DSN configuration or connection string.
List Directory
var client = new WebHDFSClient(new Uri("http://localhost:50070"),"hadoop");!client.GetDirectoryStatus("/").ContinueWith(dl => dl.Result.Directories.ToList().ForEach(d => Console.WriteLine("/" + d.PathSuffix)));
Create Directory
var client = new WebHDFSClient(new Uri("http://localhost:50070"), "hadoop");!var created = await client.CreateDirectory("/TEST");!Console.WriteLine("True or False, we created the directory " + created.ToString());!var deleted = await client.DeleteDirectory("/TEST");!Console.WriteLine("True or False, we deleted the directory " + deleted.ToString());
Task Chaining
client.CreateDirectory("/TEST")! .ContinueWith(x => client.CreateFile(@"c:\tmp\Titles.txt", "/user/hadoop/titles.txt")! .ContinueWith(t => Console.WriteLine("new file located at " + t.Result))! .ContinueWith(t => client.OpenFile("/user/hadoop/titles.txt")! .ContinueWith(! resp => resp.Result.Content.ReadAsStringAsync()! .ContinueWith(bigString => Console.WriteLine("new file is " + bigString.Result.Length + " bytes long"))! .ContinueWith(! t2 => client.DeleteDirectory("/user/hadoop/titles.txt")! .ContinueWith(b => Console.WriteLine("Successfully deleted file."))! )! )! )! );
WebHCat
• Management of HCatalog metadata.
• Hive job submission.
• Pig job submission.
• Map/Reduce job submission.
• Streaming Map/Reduce job submission.
CreateHive
string outputDir = "basichivejob";! var client = new WebHCatHttpClient(new Uri("http://localhost:50111"), "administrator", "", "hadoop");! var t1 = client.CreateHiveJob(@"select * from src;", null, null, outputDir, null);! t1.Wait();! var response = t1.Result;! var output = response.Content.ReadAsAsync<JObject>();! output.Wait();! response.EnsureSuccessStatusCode();! string id = output.Result.GetValue("id").ToString();! client.WaitForJobToCompleteAsync(id).Wait();
using System.Net.Http;
Oozie
http://hadoopsdk.codeplex.com/wikipage?title=Oozie%20Client&referringTitle=Home
Mapper
public class SqrtMapper : MapperBase! {! public override void Map(string inputLine, MapperContext context)! {! int inputValue = int.Parse(inputLine);!! // Perform the work.! double sqrt = Math.Sqrt((double)inputValue);!! // Write output data.! context.EmitKeyValue(inputValue.ToString(), sqrt.ToString());! }! }
Hadoop Job
public class FirstJob : HadoopJob<Mapper,Combiner,Reducer>! {! public override HadoopJobConfiguration Configure(ExecutorContext context)! {! HadoopJobConfiguration config = new HadoopJobConfiguration();! config.InputPath = "input/SqrtJob";! config.OutputFolder = "output/SqrtJob";! return config;! }! }!
HiveRowpublic class TitlesRow : HiveRow! {! public string MovieId { get; set; }! public string Name { get; set; }! public int Year { get; set; }! public string Rating { get; set; }! }!! public class AwardsRow : HiveRow! {! public string MovieId { get; set; }! public string AwardId { get; set; }! public int Year { get; set; }! public string Won { get; set; }! public string Type { get; set; }! public string Category { get; set; }! }!! public class ActorsRow : HiveRow! {! public string MovieId { get; set; }! public string ActorId { get; set; }! public int AwardsCount { get; set; }! public string Name { get; set; }!
HiveConnection public class MyHiveDatabase : HiveConnection! {! public MyHiveDatabase(Uri webHcatUri, string username, string password, string azureStorageAccount, string azureStorageKey) : base(webHcatUri, username, password, azureStorageAccount, azureStorageKey) { }!! public HiveTable<AwardsRow> Awards! {! get! {! return this.GetTable<AwardsRow>("Awards");! }! }!! public HiveTable<TitlesRow> Titles! {! get! {! return this.GetTable<TitlesRow>("Titles");! }! }!! public HiveTable<ActorsRow> Actors! {! get! {! return this.GetTable<ActorsRow>("Actors");! }! }! }
Simple Linqvar db = new MyHiveDatabase(! webHCatUri: new Uri("http://localhost:50111"),! userName: "hadoop", password: null,! storageAccount: “ASV storage account name”, storageKey: “ASV storage account key”);!! var q = from x in! (from a in db.Actors! select new { a.ActorId, foo = a.AwardsCount })! group x by x.ActorId into g! select new { ActorId = g.Key, bar = g.Average(z => z.foo) };!! q.ExecuteQuery().Wait();! var results1 = q.ToList();!!! var projectionQuery = from aw in db.Awards! join t in db.Titles! on aw.MovieId equals t.MovieId! where t.Year == 1994 && aw.Won == "True"! select new { MovieId = t.MovieId, Name = t.Name, Type = aw.Type, Category = aw.Category, Year = t.Year };!!! var newTable = projectionQuery.CreateTable("AwardsIn1994");
Resource
• http://hadoopsdk.codeplex.com/
• https://github.com/WindowsAzure-Samples/HDInsight-Labs-Preview
• http://wag.codeplex.com/
Machine Learning is programming computers to optimize a
performance criterion using example data or past experience
Data
http://labrosa.ee.columbia.edu/millionsong/tasteprofile
http://www.grouplens.org/node/12