uam
# Perform attribution with Unified Attribution Model (UAM)
## You will learn how to perform attribution with UAM model on two use cases:
## Use case 1: perform attribution when no customer journeys are available
## Use case 2: perform attribution mixing customer journeys and aggregated touchpoints
## Use case 3: perform attribution of the revenue
## Use case 4: perform attribution when for one or more channels expressed as number of clicks, the click-through rates are not available
### Now you can load ChannelAttribution Pro.
library("ChannelAttributionPro")
### Set your token
token="yourtoken"
## USE CASE 1 - Attribution when no customer journeys are avaible
### In the following use case, you will learn how performing attribution on channels for which only aggregated touchpoints (clicks and/or impressions are available).
#### We have daily data on 6 channels: A, B, C, D, E, F collected from 2019-01-01 to 2019-12-31.
#### A, D, E, F are number of clicks while B and C are number of impressions.
#### We load the data frame including the time series of the observed conversions and of the observed touchpoints for each channel.
df_aggr = read.csv("https://app.channelattribution.io/data/data_aggregated.csv", sep = ";")
head(df_aggr,10)
#### We load the data frame including the time series of observed click-through rates for each channel expressed as number of clicks.
df_ctr = read.csv("https://app.channelattribution.io/data/data_ctr.csv",sep=";")
head(df_ctr,10)
#### Columns B and C are NaN because channels B and C are impressions and a click-through rate is not needed. Channels measured in the number of impressions must have a click-through rate column set to missing so the algorithm can understand that they are impressions.
#### Now we load the data frame with the distribution of the allocated budget between the channels from 2019-01-01 to 2019-12-31.
#### Now we are ready to run UAM on our data.
res=uam(df_aggr=df_aggr,df_ctr=df_ctr,df_paths=NULL,verbose=1)
head(res$attribution,10)
#### The output of the model is the attribution of the available conversions between the available channels. So the sum of A+B+C+D+D+E+F for each row is equal to the value in column "conversions" in the corresponding row.
## USE CASE 2 - Attribution mixing customer journeys and aggregated touchpoints
### In the following use case, you will learn how to perform attribution when for some channels are availble customer journeys while for other channels only aggragated touchpoints are available.In the following use case we will learn how to perform attribution when for some channels are available customers journeys while for other channels only aggragated touchpoints are available.
#### We have aggregate data on 6 channels: A, B , C, D, E collected from 2019-01-01 to 2019-12-31.
#### For channel C, D, E, F we know customer journeys
#### For channel A and B we only know aggragegated touchpoints for each day
#### A, D, E, F are number of clicks while B and C are number of impressions.
#### We load the data frame including the time series of observed conversions and observed touchpoints for each channel.
df_aggr = read.csv("https://app.channelattribution.io/data/data_aggregated.csv", sep = ";")
head(df_aggr,10)
#### We load the data frame including the time series of observed click-through rates for each channel expressed as number of clicks.
df_ctr = read.csv("https://app.channelattribution.io/data/data_ctr.csv",sep=";")
head(df_ctr,10)
#### Columns B and C are NaN because channels B and C are impressions and a click-through rate is not needed. Channels measured in the number of impressions must have a click-through rate column set to missing so the algorithm can understand that they are impressions.
#### Finally, we load the data frame including customer journeys. It includes 3 columns: "id_path" is an integer wthat univocally identify the customer journey, "timestamp" is the timestamp of the visited channel and "channel" is the visited channel. Channel equal to "((CONV))" indicates a conversion.
df_paths = read.csv("https://app.channelattribution.io/data/data_paths.csv",sep=";")
head(df_paths,10)
#### Now we are ready to run UAM on our data.
res=uam(df_aggr=df_aggr,df_ctr=df_ctr,df_paths=df_paths,channel_conv_name="((CONV))",order=1,verbose=1)
head(res$attribution,10)
#### The output of the model is the attribution of the available conversions between the available channels. So the sum of A+B+C+D+D+E+F for each row is equal to the value in column "conversions" in the correspondant row.
#### Now we will show how to return path-level attribution for the MTA part
library(dplyr)
library(stringr)
df_paths_t = df_paths
df_paths_t = df_paths_t %>%
mutate(total_conversions = ifelse(channel == "((CONV))", 1, 0))
df_paths_t = df_paths_t %>%
group_by(id_path) %>%
mutate(total_conversions = sum(total_conversions)) %>%
ungroup()
df_paths_t = df_paths_t %>%
filter(channel != "((CONV))")
df_paths_t = df_paths_t %>%
group_by(id_path) %>%
summarise(
path = str_c(channel, collapse = " > "),
total_conversions = first(total_conversions),
.groups = "drop"
)
res_path_attr = new_paths_attribution(
Data = df_paths_t,
var_path = "path",
var_conv = "total_conversions",
Dparams = res$parameters_mta,
var_value = NULL,
row_sep = ";",
cha_sep = ">",
flg_write_nulls = 1,
flg_write_paths = 0
)
print(res_path_attr$attribution)
## USE CASE 3 - Attribution of the revenue
### In the following use case, you will learn how performing attribution of the revenue associated to the observed conversions
#### We load the data frame including the time series observed conversions, revenue (value) and of observed touchpoints for each channel.
df_aggr = read.csv("https://app.channelattribution.io/data/data_aggregated_w_value.csv",sep=";")
head(df_aggr,10)
#### We load the data frame including the time series of observed click-through rates for each channel expressed as number of clicks.
df_ctr = read.csv("https://app.channelattribution.io/data/data_ctr.csv",sep=";")
head(df_ctr,10)
### (Optional) Finally, we load the data frame including customer journeys.
df_paths = read.csv("https://app.channelattribution.io/data/data_paths.csv",sep=";")
head(df_paths,10)
#### Now we are ready to run UAM on our data.
channels=setdiff(colnames(df_aggr),c('timestamp_from','timestamp_to','conversions','value'))
res=uam(df_aggr=df_aggr[,c(c('timestamp_from','timestamp_to','conversions'),channels)],df_ctr=df_ctr,df_paths=NULL,channel_conv_name="((CONV))",order=1,nsim_start=1e5,max_step=NULL,ncore=1,nfold=10,seed=1234567,conv_par=0.05,rate_step_sim=1.5,verbose=1)
res=res$attribution
#### Finally, we perform attribution on revenue
library(tidyr)
res = res %>% pivot_longer(cols = -c(timestamp_from, timestamp_to, conversions), names_to = 'channel', values_to = 'attribution')
res = res %>% inner_join(df_aggr %>% select(timestamp_from, timestamp_to, value), by = c('timestamp_from', 'timestamp_to'))
res = res %>%
mutate(attribution_value = value * attribution / conversions) %>%
rename(attribution_conversions = attribution) %>%
select(timestamp_from, timestamp_to, conversions, value, channel, attribution_conversions, attribution_value)
head(res,10)
## USE CASE 4: perform attribution when for one or more channels expressed as number of clicks, the click-through rates are not available.
#### We have aggregate data on 6 channels: A, B , C, D, E collected from 2019-01-01 to 2019-12-31.
#### For channel C, D, E, F we know customer journeys
#### For channel A and B we only know aggragegated touchpoints for each day
#### A, D, E, F are number of clicks while B and C are number of impressions.
#### For channel A click-through rates are note available
#### We load the data frame including the time series of the observed conversions and of observed touchpoints for each channel.
df_aggr = read.csv("https://app.channelattribution.io/data/data_aggregated.csv", sep = ";")
head(df_aggr,10)
#### First of all we perform attribution on channel A. We create two artificial channels: the total number of clicks and the total number of impressions.
df_aggr_1 = df_aggr
df_aggr_1$total_impressions = df_aggr_1$B + df_aggr_1$C
df_aggr_1$total_clicks = df_aggr_1$A + df_aggr_1$D + df_aggr_1$E + df_aggr_1$F
df_aggr_1 = df_aggr_1[, c('timestamp_from', 'timestamp_to', 'conversions', 'total_impressions', 'total_clicks')]
head(df_aggr_1,10)
#### We load the data frame with the click-through rates and delete the click-through rates for channel A because we are supposing we don't know them.
df_ctr = read.csv("https://app.channelattribution.io/data/data_ctr.csv", sep = ";")
df_ctr$A = NULL
#### We estimate the click-through rate for "total_clicks" with the weighted average of the click-through rates for channels D, E, F. Then we set to NaN the click- through rates for "total_impressions".
df_ctr_1 = merge(df_aggr[,c('timestamp_from', 'timestamp_to', 'D', 'E', 'F')],
df_ctr[,c('timestamp_from', 'timestamp_to', 'D', 'E', 'F')],
by = c('timestamp_from', 'timestamp_to'), all = TRUE)
df_ctr_1$sum_x = df_ctr_1$D.x + df_ctr_1$E.x + df_ctr_1$F.x
df_ctr_1$total_clicks = df_ctr_1$D.y * df_ctr_1$D.x / df_ctr_1$sum_x +
df_ctr_1$E.y * df_ctr_1$E.x / df_ctr_1$sum_x +
df_ctr_1$F.y * df_ctr_1$F.x / df_ctr_1$sum_x
df_ctr_1 = df_ctr_1[,c('timestamp_from', 'timestamp_to', 'total_clicks')]
df_ctr_1$total_impressions = NA
df_ctr_1 = df_ctr_1[,c('timestamp_from', 'timestamp_to', 'total_impressions', 'total_clicks')]
head(df_ctr_1,10)
#### We perform attribution for "total_clicks" and "total_impressions".
res_attr_1=uam(df_aggr=df_aggr_1,df_ctr=df_ctr_1,df_paths=NULL,channel_conv_name="((CONV))",order=1,nsim_start=1e5,max_step=NULL,ncore=1,nfold=10,seed=1234567,conv_par=0.05,rate_step_sim=1.5,verbose=1)
res_attr_1=res_attr_1$attribution
head(res_attr_1,10)
#### We load the data frame including customer journeys. It includes 3 columns: "id_path" is an integer wthat univocally identify the customer journey, "timestamp" is the timestamp of the visited channel and "channel" is the visited channel. Channel equal to "((CONV))" indicates a conversion.
df_paths = read.csv("https://app.channelattribution.io/data/data_paths.csv", sep = ";")
head(df_paths,10)
#### Now we perform attribution on channels expressed as number of clicks to find the attribution for channel A.
df_aggr_clicks = merge(res_attr_1[,c('timestamp_from','timestamp_to','total_clicks')], df_aggr[,c("timestamp_from","timestamp_to","A","D","E","F")], by=c('timestamp_from','timestamp_to'), all=TRUE)
df_aggr_clicks = rename(df_aggr_clicks, conversions = total_clicks)
#we set all the click-through rates to 1, since all the channels are expressed in number of clicks
df_ctr_clicks = df_aggr_clicks[,c('timestamp_from','timestamp_to','conversions',"A","D","E","F")]
df_ctr_clicks$A = 1
df_ctr_clicks$D = 1
df_ctr_clicks$E = 1
df_ctr_clicks$F = 1
df_ctr_clicks$conversions = NULL
res_attr_clicks = uam(df_aggr=df_aggr_clicks, df_ctr=df_ctr_clicks, df_paths = df_paths, channel_conv_name = "((CONV))", order = 1, nsim_start = 1e5, max_step = NULL, ncore = 1, nfold = 10, seed = 1234567, conv_par = 0.05, rate_step_sim = 1.5, verbose = 1)
res_attr_clicks=res_attr_clicks$attribution
res_attr_clicks$conversions = NULL
res_attr_clicks = rename(res_attr_clicks, A_conversions = A)
head(res_attr_clicks,10)
#### "A_conversions" is the final attribution for channel A.
#### Now we perform attribution for all the other channels. We subtract to the total conversions, the conversions attributed to channel A.
df_aggr_2 = merge(df_aggr, res_attr_clicks[c("timestamp_from", "timestamp_to", "A_conversions")], by = c("timestamp_from", "timestamp_to"), all = TRUE)
df_aggr_2$conversions = df_aggr_2$conversions - df_aggr_2$A_conversions
df_aggr_2 = df_aggr_2[c("timestamp_from", "timestamp_to", "conversions", "B", "C", "D", "E", "F")]
head(df_aggr_2,10)
#### We perform attribution for the other channels.
res_attr = uam(df_aggr=df_aggr_2, df_ctr=df_ctr, df_paths = df_paths, channel_conv_name = "((CONV))", order = 1, nsim_start = 1e5, max_step = NULL, ncore = 1, nfold = 10, seed = 1234567, conv_par = 0.05, rate_step_sim = 1.5, verbose = 1)
res_attr=res_attr$attribution
res_attr$conversions = NULL
head(res_attr,10)
#### Finally we merge the final data frame with the attribution we got for A.
res_attr = merge(res_attr_clicks[c('timestamp_from', 'timestamp_to', 'A_conversions')], res_attr, by = c('timestamp_from', 'timestamp_to'), all = TRUE)
names(res_attr)[names(res_attr) == 'A_conversions'] = 'A'
res_attr = merge(df_aggr[c('timestamp_from', 'timestamp_to', 'conversions')], res_attr, by = c('timestamp_from', 'timestamp_to'), all = TRUE)
head(res_attr,10)