Marvin and Me

Preview:

Citation preview

Marvin & MeMACHINE LEARNING

adactushousing.co.uk

IN HOUSING

http://linkd.in/1IxgqDbBrian Moran

Adapted and updated from work by Michelle Zappa http://www.michellzappa.com

“The field of study interested in the development of computer algorithms for transforming data into intelligent action”

Machine Learning with R, Lantz, 2013

What on earth could this possibly have to do with housing?

Marvin & YouCheck email

Naive Bayes

Internet search

PageRank

Watch Netflix

Boltzman machine

Buy lunch

Artificial neural net

Use Sat-Nav

Dijkstra’s algorithm

Apply for a loan

Decision trees

Shop on Amazon

Matrix factorization

Get a letter

k-means clusteringDiagrams (but not text) from: http://machinelearningmastery.com/a-tour-of-machine-learning-algorithms/

Marvin & You

Machine Learning is a pervasive feature in modern life

Marvin & You

New or better servicesIncreased incomeReduced costs

The (hard) problem K-means solves

Anatomy of an Algorithm

The (hard) problem K-means solves

Find the positions of 'K' number of clusters that minimize the distance from the data points to the clusters.

Anatomy of an Algorithm

Example taken from 'Data Smart', John W Foreman

Anatomy of an Algorithm

Example taken from 'Data Smart', John W Foreman

y

x

Anatomy of an Algorithm

Example taken from 'Data Smart', John W Foreman

‘K’ = 3

Initialize starting positionsy

x

Anatomy of an Algorithm

Example taken from 'Data Smart', John W Foreman

‘K’ = 3

Initialize starting positions

Iterate positions and measure…y

x

Anatomy of an Algorithm

Example taken from 'Data Smart', John W Foreman

‘K’ = 3

Initialize starting positions

Iterate positions and measure…

… until optimal solution is found

y

x

Anatomy of an Algorithm

K-means pseudo code

Anatomy of an Algorithm

K-means code in R

func

tion

(x, c

ente

rs, i

ter.m

ax =

10L

, nst

art =

1L,

alg

orith

m =

c("H

artig

an-W

ong"

,

"Llo

yd",

"For

gy",

"Mac

Que

en"),

trac

e =

FALS

E)

{ .M

imax

<- .

Mac

hine

$int

eger

.max

do_

one

<- fu

nctio

n(nm

eth)

{

s

witc

h(nm

eth,

{

ist

eps.

Qtra

n <-

as.

inte

ger(m

in(.M

imax

, 50

* m))

iTran

<- c

(iste

ps.Q

tran,

inte

ger(m

ax(0

, k -

1)))

Z <-

.For

tran(

C_km

ns, x

, m, p

, cen

ters

= c

ente

rs,

a

s.in

tege

r(k),

c1 =

inte

ger(m

), c2

= in

tege

r(m),

nc

= in

tege

r(k),

doub

le(k

), do

uble

(k),

ncp

= in

tege

r(k),

D =

dou

ble(

m),

iTran

= iT

ran,

live

= in

tege

r(k),

iter

= it

er.m

ax, w

ss =

dou

ble(

k), i

faul

t = a

s.in

tege

r(tra

ce))

switc

h(Z$

ifaul

t, st

op("e

mpt

y cl

uste

r: try

a b

ette

r set

of i

nitia

l cen

ters

",

c

all.

= FA

LSE)

, Z$i

ter <

- max

(Z$i

ter,

iter.m

ax +

1L)

, sto

p("n

umbe

r of c

lust

er c

entre

s m

ust l

ie b

etwe

en 1

and

nro

w(x)

",

c

all.

= FA

LSE)

, war

ning

(get

text

f("Q

uick

-TRA

NSfe

r sta

ge s

teps

exc

eede

d m

axim

um (=

%d)

",

is

teps

.Qtra

n), c

all.

= FA

LSE)

)

}

, {

Z <-

.C(C

_km

eans

_Llo

yd, x

, m, p

, cen

ters

= c

ente

rs,

k

, c1

= in

tege

r(m),

iter =

iter

.max

, nc

= in

tege

r(k),

wss

= d

oubl

e(k)

)

}

, {

Z <-

.C(C

_km

eans

_Mac

Que

en, x

, m, p

, cen

ters

= a

s.do

uble

(cen

ters

),

k

, c1

= in

tege

r(m),

iter =

iter

.max

, nc

= in

tege

r(k),

wss

= d

oubl

e(k)

)

}

)

i

f (m

23 <

- any

(nm

eth

== c

(2L,

3L)

)) {

if (a

ny(Z

$nc

== 0

))

w

arni

ng("e

mpt

y cl

uste

r: try

a b

ette

r set

of i

nitia

l cen

ters

",

call.

= FA

LSE)

}

i

f (Z$

iter >

iter

.max

) {

warn

ing(

sprin

tf(ng

ette

xt(it

er.m

ax, "

did

not c

onve

rge

in %

d ite

ratio

n",

"d

id n

ot c

onve

rge

in %

d ite

ratio

ns"),

iter

.max

),

c

all.

= FA

LSE,

dom

ain

= NA

)

if

(m23

)

Z$i

faul

t <- 2

L

}

i

f (nm

eth

%in

% c

(2L,

3L)

) {

if (a

ny(Z

$nc

== 0

))

w

arni

ng("e

mpt

y cl

uste

r: try

a b

ette

r set

of i

nitia

l cen

ters

",

call.

= FA

LSE)

}

Z

}

x <

- as.

mat

rix(x

)

m <

- as.

inte

ger(n

row(

x))

if

(is.

na(m

))

sto

p("in

valid

nro

w(x)

")

p <

- as.

inte

ger(n

col(x

))

if (i

s.na

(p))

s

top(

"inva

lid n

col(x

)")

if

(miss

ing(

cent

ers)

)

s

top(

"'cen

ters

' mus

t be

a nu

mbe

r or a

mat

rix")

n

met

h <-

swi

tch(

mat

ch.a

rg(a

lgor

ithm

), `H

artig

an-W

ong`

= 1

L,

Llo

yd =

2L,

For

gy =

2L,

Mac

Que

en =

3L)

sto

rage

.mod

e(x)

<- "

doub

le"

if

(len

gth(

cent

ers)

==

1L) {

i

f (ce

nter

s ==

1)

nmet

h <-

3L

k <

- cen

ters

i

f (ns

tart

== 1

L)

cent

ers

<- x

[sam

ple.

int(m

, k),

, dro

p =

FALS

E]

if (

nsta

rt >=

2L

|| any

(dup

licat

ed(c

ente

rs)))

{

cn

<- u

niqu

e(x)

m

m <

- nro

w(cn

)

if

(mm

< k

)

sto

p("m

ore

clus

ter c

ente

rs th

an d

istin

ct d

ata

poin

ts.")

ce

nter

s <-

cn[

sam

ple.

int(m

m, k

), , d

rop

= FA

LSE]

}

}

else

{

c

ente

rs <

- as.

mat

rix(c

ente

rs)

if (

any(

dupl

icat

ed(c

ente

rs)))

st

op("i

nitia

l cen

ters

are

not

dist

inct

")

c

n <-

NUL

L

k

<- n

row(

cent

ers)

i

f (m

< k

)

st

op("m

ore

clus

ter c

ente

rs th

an d

ata

poin

ts")

}

k

<- a

s.in

tege

r(k)

if

(is.

na(k

))

sto

p("'in

valid

val

ue o

f 'k'"

)

iter

.max

<- a

s.in

tege

r(ite

r.max

)

if (i

s.na

(iter

.max

) || it

er.m

ax <

1L)

s

top(

"'iter

.max

' mus

t be

posit

ive")

if

(nco

l(x) !

= nc

ol(c

ente

rs))

s

top(

"mus

t hav

e sa

me

num

ber o

f col

umns

in 'x

' and

'cen

ters

'")

s

tora

ge.m

ode(

cent

ers)

<- "

doub

le"

Z

<- d

o_on

e(nm

eth)

bes

t <- s

um(Z

$wss

)

if (n

star

t >=

2L &

& !is

.nul

l(cn)

)

f

or (i

in 2

:nst

art)

{

ce

nter

s <-

cn[

sam

ple.

int(m

m, k

), , d

rop

= FA

LSE]

ZZ

<- d

o_on

e(nm

eth)

if

((z <

- sum

(ZZ$

wss)

) < b

est)

{

Z <

- ZZ

b

est <

- z

}

}

cen

ters

<- m

atrix

(Z$c

ente

rs, k

)

dim

nam

es(c

ente

rs) <

- list

(1L:

k, d

imna

mes

(x)[[

2L]])

clu

ster

<- Z

$c1

if

(!is.

null(

rn <

- row

nam

es(x

)))

nam

es(c

lust

er) <

- rn

to

tss

<- s

um(s

cale

(x, s

cale

= F

ALSE

)^2)

stru

ctur

e(lis

t(clu

ster

= c

lust

er, c

ente

rs =

cen

ters

, tot

ss =

tots

s,

with

inss

= Z

$wss

, tot

.with

inss

= b

est,

betw

eens

s =

tots

s -

best

, size

= Z

$nc,

iter

= Z

$ite

r, ifa

ult =

Z$i

faul

t),

cla

ss =

"km

eans

") } <b

ytec

ode:

0x7

f9a2

3016

278>

<e

nviro

nmen

t: na

mes

pace

:sta

ts>

Anatomy of an Algorithm

Marvin & Housing?

Clustering ForecastingPattern discovery Classification

Descriptive Predictive

When A or B happens, C tends to

follow…

These cases are a similar type…

Number X is going to change to Z

This new case looks like it’s of this type…

Spot warning signs of failing tenancies?

Targeted information and services?

Better planning of maintenance spend?

Automatically handle web enquiries?

Pattern discovery

When A or B happens, C tends to

follow…

Pattern Discovery: Rent Arrears

Pattern discovery

When A or B happens, C tends to

follow…

RENT ARREARS RISK

Pattern Discovery: Rent Arrears

RENT ARREARS RISK

Transactional data

OneR algorithm Missed gas appointments

Pattern Discovery: Rent Arrears

1R code demo in R

Results: 1R Missed Gas Appointments —> Rent arrears

Clustering: Targeting Services

Clustering

These cases are a similar type…

Clustering

These cases are a similar type…

THEY MIGHT WANT FINANCIAL ADVICE

Clustering: Targeting Services

THEY MIGHT WANT FINANCIAL ADVICE

Assign to cluster

k-means clustering

Clustering: Targeting Services

15% of tenants fall into clusters that are 2x at risk of arrears

K-means code demo in R

Results: K-means Clusters —> Rent arrears

Forecasting: Refining budgeting

Number X is going to change to Z

Forecasting

Forecasting: Refining budgeting

Number X is going to change to Z

Forecasting£ s N E E D E D T O REPLACE IN NEXT YEAR

Forecasting: Refining budgeting

£ s N E E D E D T O REPLACE IN NEXT YEAR

Boiler data

Artificial neural net

Still working on gathering the data

on this one…

Classification: Repairs

Classification

This new case looks like it’s of this type…

Classification

This new case looks like it’s of this type…

TRADE

Classification: Repairs

REPAIR REQUEST

TRADE

Classification: Repairs

REPAIR REQUEST

Check web form

Naive Bayes

“Our ba th room is leaking through the ceiling onto the stairs and the ceiling is wet through along with the walls where the taps are mounted. And puddle on the stairs”

PLUMBER

Classification: RepairsCheck web form

Naive Bayes

Check web form

Naive Bayes

bathroomleaking

ceilingstairsceiling

wet

walls taps

stairs>70%

accuracy

PLUMBER

Classification: Repairs

Naive Bayes code demo in R

Results: Naive Bayes Text —> Repair trade

Next steps…

Surveillance and ‘coveillance’

Customisation through segmentation

Automation and ‘friction-free’ services

Data-led decisions

New Opportunities

Medium-term Plans

One click repair adactus

This Year's Focus

Three Options1. Repair

request Delay Staffcheck

Staffinput

Repairordered

2. Repairrequest

Repairordered

3. Repairrequest

MachineLearning

Staffcheck

Staffinput

Repairordered

RepairorderedProblem? N

Y

-£300,000

-£225,000

-£150,000

-£75,000

£0

£75,000

£150,000

£225,000

£300,000

10% channel shift 20% channel shift 30% channel shift 40% channel shift 50% channel shift 60% channel shift 70% channel shift

Avoiding Pyrrhic VictoriesNe

t cos

t / b

enefi

t of r

epai

rs s

elf-s

ervic

e

90% accuracy

10% accuracy

50% accuracy

Marvin's Future Impact

New or better servicesIncreased incomeReduced costs

Marvin's Future Impact

Standard models for the sector?

Some disclaimers, caveats, warnings and a bit of existential worry

Business Understanding

Data Understanding

Data PreparationModelling

Evaluation

Deployment

The CRISP data mining process

Text Book Data Science

Business Understanding

Data Understanding

Data PreparationModelling

Evaluation

Deployment

The Truth of the MatterCount to ten

Garbage In, Garbage Out

Data Scientists are Unicorns

Adapted from http://www.anlytcs.com/2014/01/data-science-venn-diagram-v20.html

Data Science

Mathematics and Statistics

Research Methods

UNICORN

Machine Learning

Subject Matter Expertise

Programming Skills

Computer Science

Argumentum ad Verecundiam

Argumentum ad Verecundiam

Classification by Microsoft's projectoxford.ai

Models are Not 100% Correct

Tesla press release December 2015

"One can see this with the annual machine vision competitions, where the computer will

properly identify something as a dog more than 99% of the time, but might occasionally call it a potted plant. Making such mistakes at 70 mph

would be highly problematic."

Elon Musk

What would Dr. Malcolm think?Your scientists were so preoccupied with

whether or not they could

they didn't stop to think if they should

Humans Need Not Apply?

£0m £10m £20m £30m £40m

See ‘Humans need not Apply’ for a pessimistic assessment: https://youtu.be/7Pq-S557XQU See Jerry Kaplan for a more optimistic view: https://youtu.be/JiiP5ROnzw8

Adactus Housing Group Operating Costs 2014/15

Employees Other things

Marvin & MeMACHINE LEARNING

adactushousing.co.uk

IN HOUSING

http://linkd.in/1IxgqDbBrian Moran

Music: Emily Howell (a computer programme), ‘From Darkness, Light’: II. Fugue

Recommended