37
CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 Doc 7 Assignment 1 Comments Sep 19, 2017 Copyright ©, All rights reserved. 2017 SDSU & Roger Whitney, 5500 Campanile Drive, San Diego, CA 92182-7700 USA. OpenContent (http://www.opencontent.org/opl.shtml) license defines the copyright on this document.

CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

  • Upload
    others

  • View
    1

  • Download
    0

Embed Size (px)

Citation preview

Page 1: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017

Doc 7 Assignment 1 Comments Sep 19, 2017

Copyright ©, All rights reserved. 2017 SDSU & Roger Whitney, 5500 Campanile Drive, San Diego, CA 92182-7700 USA. OpenContent (http://www.opencontent.org/opl.shtml) license defines the copyright on this document.

Page 2: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

2

def sum_multiples_3_5(N: Int) { //Intializing the variables var from : Int = 0 var i : Int = 0 //For loop from 0 to number N for(i<- 0 until N){ //if condition to give the condition if(i >= N){ 0 } else if ((i % 3 == 0 || i % 5 == 0) && i % 15 != 0 ){

from = from + i }

} //printing the output println("The sum of multiples of 3 and 5 less than " +N + " is " +from) }

// Add 1 to x x = x + 1

No return value Can not use result

Page 3: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

3

def sum_multiples_3_5(N: Int) = { var from : Int = 0 var i : Int = 0 for(i<- 0 until N){ if(i >= N){ 0 } else if ((i % 3 == 0 || i % 5 == 0) && i % 15 != 0 ){ from = from + i } } return from }

def sum_multiples_3_5(N: Int) = { var from : Int = 0 var i : Int = 0 for(i<- 0 until N - 1){ if ((i % 3 == 0 || i % 5 == 0) && i % 15 != 0 ){ from = from + i } } return from }

Page 4: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

4

def sum_multiples_3_5(N: Int) = { var sum : Int = 0 for(i<- 0 until N - 1){ if ((i % 3 == 0 || i % 5 == 0) && i % 15 != 0 ){ sum = sum + i } } return sum }

def sum_multiples_3_5(N: Int) = { var sum : Int = 0 for(i <- 0 to N - 1; if ((i % 3 == 0 || i % 5 == 0) && i % 15 != 0 )) { sum = sum + i } return sum }def sumMultiples3Or5(n: Int) = { var sum : Int = 0 for(k <- 0 to n - 1; if ((k % 3 == 0 || k % 5 == 0) && k % 15 != 0 )) { sum = sum + k } sum }

def sum_multiples_3_5(N: Int) = { var from : Int = 0 var i : Int = 0 for(i<- 0 until N - 1){ if ((i % 3 == 0 || i % 5 == 0) && i % 15 != 0 ){ from = from + i } } return from }

Page 5: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

5

def sum_multiples_3_5(N: Int) { //Intializing the variables var from : Int = 0 var i : Int = 0 //For loop from 0 to number N for(i<- 0 until N){ //if condition to give the condition if(i >= N){ 0 } else if ((i % 3 == 0 || i % 5 == 0) && i % 15 != 0 ){

from = from + i }

} //printing the output println("The sum of multiples of 3 and 5 less than " +N + " is " +from) }

def sumMultiples3Or5(n: Int) = { var sum : Int = 0 for(k <- 0 to n - 1; if ((k % 3 == 0 || k % 5 == 0) && k % 15 != 0)) { sum = sum + k } sum }

Page 6: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

6

def sum_multiples_3_5(N : Int): Int = { var sumTotal = 0 for (i <- 1 to N-1){

if (i % 3 == 0 && i % 5 == 0){ /*this conditional is left blank intentionally. Normally a continue statement is used to skip a loop iteration, but that is unnecessary in this case. Having entered this block of the if-else chain, the other blocks will be ignored until the next loop cycle*/ }else if(i % 3 == 0){ sumTotal += i }else if(i % 5 == 0){ sumTotal += i; }else{ //left blank intentionally } } sumTotal }

Page 7: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

7

def sum_multiples_3_5(N : Int): Int = { var sumTotal = 0 for (i <- 1 to N-1){ if (i % 3 == 0 && i % 5 == 0){ }else if(i % 3 == 0){ sumTotal += i }else if(i % 5 == 0){ sumTotal += i; }else{ } } sumTotal }

Sans comments

def sum_multiples_3_5(N : Int): Int = { var sumTotal = 0 for (i <- 1 to N-1){ if (i % 3 == 0 && i % 5 == 0){ }else if (i % 3 == 0 || (i % 5 == 0)){ sumTotal += i } } sumTotal }

Combining conditions

Page 8: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

8

def product(s: String): BigInt = { s.map( BigInt(_)).product

}

def product(s: String): BigInt = { s.map( BigInt(_)).product }

def product(s: String): BigInt = s.map( BigInt(_)).product

Page 9: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

9

def noDuplicates(ints: Array[Int]): Array[Int] = { ints.foldLeft(Array[Int]()) { (acc,element) => if (acc.contains(element)) acc else acc :+ element } }

Page 10: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

10

def patternCount(text: String, pattern: String): Int = { var count = 0 var pass = false val tl = text.length -1 val pl = pattern.length - 1 var ct=0 for (i <- 0 to tl - pl) { ct=0 for (j <- 0 to pl) { if (text.charAt(i + j) == pattern.charAt(j)) { pass = true ct=ct+1 }else {pass = false} } if (pass && ct==pl+1) { count += 1 ct=0 } } return count }

Page 11: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

11

import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var rand = scala.util.Random for(i<-1 to 1000) file1.write(rand.nextInt(Int.MaxValue/2)+ "\n") file1.close //File 2 val file2 = new PrintWriter(new File("secondFile.txt")) for(i<-1 to 100000) file2.write(rand.nextInt(Int.MaxValue/2)+ "\n") file2.close //file 3 val file3 = new PrintWriter(new File("thirdFile.txt")) for(i<-1 to 10000000) file3.write(rand.nextInt(Int.MaxValue/2)+ "\n") file3.close

Code Repeated

Page 12: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

12

import java.io._

def fillFile(name: String, size: Int) = { val file = new java.io.PrintWriter(new File(name)) var rand = scala.util.Random for(i<-1 to size)

file.write(rand.nextInt(Int.MaxValue/2)+ "\n") file.close

}

fillFile(“small.txt”, 1000) fillFile(“medium.txt”, 10000) fillFile(“large.txt”, 10000000)

Repeats removed

Page 13: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

13

def sumOfFloats():Float={ var floatHolder = new Array[Float](1000000); var icounter=0; var jcounter=0; var sum=0f; for(icounter<-0 to 1000000-1) { floatHolder(icounter)=0.00001f } for(icounter<-0 to 1000000-1) { sum+=floatHolder(jcounter); } return sum; // comes up to 9.917345. The error percent is (10-9.917345/10)=0.82% }

(10-9.917345/10)*100 == 0.8265

Do the calculation! Don’t just report result in comment

Page 14: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

14

def Create(serial: String, amt: Int): Unit={

val t0 = System.currentTimeMillis() var text: String= "" var f = new File(fileName+serial+".txt") var writer = new PrintWriter(f) var arr = new Array[Int](amt)

for(i <- 0 to amt-1) { arr(i) = Random.nextInt(Int.MaxValue / 2) text = arr(i) + "\n" writer.write(text) }

writer.close() computeMedianAndAverage(fileName, serial, amt) val t1 = System.currentTimeMillis() println("Time: " + (t1 - t0) + " milliseconds"+"\n\n") }

Naming conventions

Create what?

Page 15: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

15

product(“I am a graduate student ")

162320796575110448299770568058252150526144046694400000000

sum_multiples_3_5(123)

res0_9: BigInt = 2870

product(“SDSU”)

39818420

Page 16: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

16

sum_multiples_3_5(3) == 0 sum_multiples_3_5(4) == 3 sum_multiples_3_5(5) == 3 sum_multiples_3_5(6) == 3 + 5 sum_multiples_3_5(10) == 3 + 5 + 6 + 9 sum_multiples_3_5(11) == 3 + 5 + 6 + 9 + 10 sum_multiples_3_5(20) == 3 + 5 + 6 + 9 + 10 + 12 + 18

sum_multiples_3_5(3) == 0 sum_multiples_3_5(4) == 3 sum_multiples_3_5(5) == 3 sum_multiples_3_5(6) == 8 sum_multiples_3_5(10) == 23 sum_multiples_3_5(11) == 33 sum_multiples_3_5(20) == 63 sum_multiples_3_5(123) == 2880

assert(sum_multiples_3_5(3) == 0) assert(sum_multiples_3_5(4) == 3) assert(sum_multiples_3_5(5) == 3) assert(sum_multiples_3_5(6) == 3 + 5) …

product("SDSU") == ’S'.toInt * ’D'.toInt * ’S'.toInt * ’U’.toInt product("aB\u0100")=='a'.toInt * 'B'.toInt * '\u0100'.toInt

Page 17: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

17

def minmax(values: Array[Int]) = { for(a<-values) printf("%d,",a) println var min=values.reduce(_ min _) var max=values.reduce(_ max _) printf("Min= %d, Max= %d.\n\n",min,max) (min,max) }

Page 18: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

18

What is flag1? What is flag2

def pattern_count(t:String, p:String) : Long = { var i = 0; var count = 0; while(i<(t.length()-p.length()+1)) { var flag2 = i; var flag1 = 0; var flag3 = 0; i += 1; while (flag3<p.length()) { if(t(flag2)==p(flag3)){ flag1 += 1; flag2 += 1; flag3 += 1; } else { flag3 = p.length()+1; } if(flag1==p.length()) { count += 1; } } } return count; }

def pattern_count(t:String, p:String) : Long = { var offset = 0; var count = 0; while(offset<(t.length()-p.length()+1)) { var textIndex = offset; var matches = 0; var patternIndex = 0; offset += 1; while (patternIndex <p.length()) { if(t(textIndex)==p(patternIndex)){ matches += 1; textIndex += 1; patternIndex += 1; } else { patternIndex = p.length()+1; } if(matches ==p.length()) { count += 1; } } } return count; }

Page 19: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

19

def sum_multiples_3_5(n: Int): Int = {

var sum: Int = 0 if (n < 0) { println("Error! Number is negative!") System.exit(1) }

for (i <- 1 until n) { if (i % 3 == 0) { //if the number is multiple of 3 only if (i % 5 != 0) { //if the number is multiple of both sum = sum + i } } else if (i % 5 == 0) { //if the number is multiple of 5 only sum = sum + i } }

Page 20: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

20

def randomInts(n:Int,range:Int):Array[Int]={ if (range<=0||n<=0){ print("Invalid input!"); return Array() } val array= new Array[Int](n) val r = scala.util.Random for(i<-0 to n-1)array(i)=r.nextInt(range) array }

Page 21: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

21

def noDuplicates(ints:Array[Int]):Array[Int]= { var withoutDups = ints.distinct withoutDups }

def noDuplicates(ints:Array[Int]):Array[Int]= { ints.distinct }

def noDuplicates(ints:Array[Int]):Array[Int] = ints.distinct

Page 22: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

22

def product(s : String):Double = {

var x = s.foldLeft(1L)(_*_.toInt)

println("The Value of X is " + x)

return x

}

def product(s : String):Double = { var x = s.foldLeft(1L)(_*_.toInt) println("The Value of X is " + x) return x

}

def product(s : String):Double = { var x = s.foldLeft(1L)(_*_.toInt) return x

}

def product(s : String):Double = { return s.foldLeft(1L)(_*_.toInt)

}

Page 23: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

23

def noDuplicates(ints : Array[Int]): Array[Int] = {

var derpy = ints.distinct

derpy

}

Page 24: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

24

var floats = new Array[Float](1000000) var sum = 0.0f

for(i <- 0 until floats.length){ floats(i) = (0.00001).toFloat sum += floats(i) }

println(sum)

//Expected result: 10 //Actual result: 9.917345

//Margin of error: 0.82655%

// |9.917345 - 10| / 10 // = 0.0082655

// 0.0082655 * 100 // 0.82655

Do the calculations

Page 25: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

25

writeRandomFiles ( "n1k.txt", 1000 ) var time1s = System.currentTimeMillis() var Arr1 = readRandomFiles ( "n1k.txt" ) var Avg1: BigInt = (Arr1.foldLeft(0L)(_ + _)) / 1000 // Sum has a bug in it...only returns ints. Overflow. var sortedArr1 = Arr1.sortWith (_ < _) var Med1: BigInt = sortedArr1(500) var time1e = System.currentTimeMillis() var t1 = time1e - time1s println ( s"1000: Avg = $Avg1, Med = $Med1, Time = $t1" )

// Part 2 (100,000 integers) writeRandomFiles ( "n100k.txt", 100000 ) var time2s = System.currentTimeMillis() var Arr2 = readRandomFiles ( "n100k.txt" ) var Avg2: BigInt = (Arr2.foldLeft(0L)(_ + _)) / 100000 var sortedArr2 = Arr2.sortWith (_ < _) var Med2: BigInt = sortedArr2(50000) var time2e = System.currentTimeMillis() var t2 = time2e - time2s println ( s"100000: Avg = $Avg2, Med = $Med2, Time = $t2" )

// Part 3 (10,000,000 integers) writeRandomFiles ( "n10m.txt", 10000000 )

Page 26: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

26

def sumAllElementsOfArray() = { var sumOfArray: Float = 0 var arrayOfElements = new Array[Float](1000000) for (i <- 0 until arrayOfElements.length) { arrayOfElements(i) = 0.00001f } arrayOfElements.foreach(v => sumOfArray += v) println(sumOfArray) println(arrayOfElements(0)*arrayOfElements.length - sumOfArray) }

Page 27: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

27

def error(actual: BigDecimal, expected: BigDecimal): BigDecimal = { (actual - expected).abs / expected * BigDecimal(100) }

def time[T1, T2](callback: T1 => T2, args: T1): (T2, Long) = { val start = System.currentTimeMillis() val result = callback(args) val runtime = System.currentTimeMillis() - start (result, runtime) }

def EvaluateRandom(fileNumber: Int, n: Int) { val file = randomInts(n, Int.MaxValue / 2)

val (avg, timeAvg) = time(average, file) val (med, timeMed) = time(median, file)

println() println(s"File $fileNumber") println(s"n: $n") println(f"average: $avg%.1f") println(s"median : med") println(s"time to compute average: $timeAvg milliseconds")

Page 28: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

28

def fileReader(fname: String): ArrayBuffer [Int] ={ var ary = new ArrayBuffer [Int] var temp = Array[String]() val bufferedSource = Source.fromFile(fname) for (line <- bufferedSource.getLines) { temp = line.split(" ") for(i <- 0 to temp.length-1){ ary.append(temp(i).toInt) } } bufferedSource.close ary }

Page 29: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

29

def sum_multiples_3_5(N:Int) = { var i=1 var sum=0 if(N<3){ println("There is no such kind of numbers.\n") } else{ printf("The multiples of 3 and 5 less than %s are listed below:\n",N) while (i<N){ if ((i%3 == 0 | i%5 == 0) & i%15 != 0){ sum=sum+i; print(i+", "); } i=i+1 } println printf("Sum of those multiples = %d.\n\n", sum) } sum }

Page 30: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

30

def averageMedian(file:String) = { val beginTime = System.currentTimeMillis() var arrayBuffer = ArrayBuffer[Int]() val numbersSource = Source.fromFile(file) for (line <- numbersSource.getLines()){ arrayBuffer+=line.toInt } var array = arrayBuffer.toArray print ("median:"+medianCalculator(array)+"\n") print ("Average:"+averageCalculator(array)+"\n") val endTime = System.currentTimeMillis() print ("Time Taken:"+(endTime-beginTime) + "\n") arrayBuffer.clear numbersSource.close() }

Space complexity

Space complexity

Page 31: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

31

def averageCalculator(array: Array[Int]): BigDecimal = { var source = convertToBigDecimal(array) var sum: BigDecimal = 0 for(i <- 0 to (array.length-1)){ sum = sum + array(i) } return (sum/BigDecimal(array.length)) }

def convertToBigDecimal(array: Array[Int]): Array[BigDecimal] = { var result = ArrayBuffer[BigDecimal]() for ( i <- 0 to (array.length-1)){ result+=BigDecimal(array(i)) } return result.toArray }

Space Complexity

Page 32: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

32

def averageCalculator(array: Array[Int]): BigDecimal = { var source = convertToBigDecimal(array) var sum: BigDecimal = 0 for(i <- 0 to (array.length-1)){ sum = sum + array(i) } return (sum/BigDecimal(array.length)) }

def averageCalculator(array: Array[Int]): BigDecimal = { var sum: BigDecimal = array.reduce(BigDecimal(_) + BigDecimal(_)) return (sum/BigDecimal(array.length)) }

Space Complexity

def averageCalculator(array: Array[Int]): BigDecimal = { var sum: BigDecimal = 0 for(k <- array){ sum = sum +BigDecimal(k) } return (sum/BigDecimal(array.length)) }

Space Complexity

Page 33: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

33

def average(file:String) :BigDecimal = { var sum: BigDecimal = 0 var count: Long = 0 val numbersSource = Source.fromFile(file) for (line <- numbersSource.getLines()){ sum += BigDecimal(line.toInt) count += 1 } return sum/BigDecimal(count) }

Space Complexity

Should sum be: BigDecimal or Long?

Page 34: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

34

def minmax(values:Array[Int]):(Int,Int) = { val sortedArray = values.sortWith(_<_) val arrLength = sortedArray.length (sortedArray(0),sortedArray(arrLength-1)) }

def minmax(values:Array[Int]):(Int,Int) = { (values.min, values.max) }

Time Complexity?

Page 35: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

35

1,000,000 integers between 0 and Int.MaxValue / 2

How much space in memory

How much space in on disk

As strings

As bytes

Page 36: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

36

How to find median of N integer with one processor

N = 1,000,000

1,000,000,000,000

1,000,000,000

4,000,000,000

Page 37: CS 696 Intro to Big Data: Tools and Methods Fall Semester, 2017 … · 2017-09-19 · 11 import java.io._ //File 1 val file1 = new java.io.PrintWriter(new File("firstFile.txt")) var

Kahan Summation

37

def kahanSum(input:Array[Double]) = { var sum = 0.0 var lostBits = 0.0 for (element <- input) { val y = element - lostBits val newSum = sum + y lostBits = (newSum - sum) - y sum = newSum } sum }

Method Error Growth Rate

Normal Sum O(εn)

Pairwise Sum O(εlog(n))

Kahan Sum O(ε)

ε = machine precision n = number of floats to add