{
"version": "2.1",
"className": "datameer.dap.common.entity.WorkbookConfigurationImpl",
"file": {
"uuid": "e1b88f00-899f-4c62-817a-cf1e7cb62e96",
"path": "/Workbooks/Behavioral Analytics/Clickstream Preprocessing.wbk",
"description": "This workbook transforms raw web server logs into information useful for analysts by sessionizing, enriching with visitor profile information, and extracting click paths.\r\n\r\nThis workbook drives the Clickstream Analytics workbook",
"name": "Clickstream Preprocessing"
},
"pullType": "MANUALLY",
"properties": {},
"sheets": [
{
"name": "weblogs",
"sheetId": "weblogs",
"sheetType": "das.internal.DataSourceSheetType",
"keep": false,
"datasource": {
"path": "/FileUploads/Clickstream Data.upl",
"uuid": "743b6251-4524-4668-a771-9a716b2b1e58"
},
"columnStyles": []
},
{
"name": "No_Images_Or_Errors",
"sheetId": "No_Images_Or_Errors",
"sheetType": "das.internal.FilterSheetType",
"keep": false,
"sourceSheet": "weblogs",
"connector": "AND",
"filterArguments": [
{
"column": "status",
"filterExpression": "EQUALS",
"value": "200",
"valueStatic": true,
"type": "CONSTANT"
},
{
"column": "request",
"filterExpression": "NOT_CONTAINS",
"value": ".jpg",
"valueStatic": true,
"type": "CONSTANT"
},
{
"column": "request",
"filterExpression": "NOT_CONTAINS",
"value": ".jpeg",
"valueStatic": true,
"type": "CONSTANT"
},
{
"column": "request",
"filterExpression": "NOT_CONTAINS",
"value": ".gif",
"valueStatic": true,
"type": "CONSTANT"
},
{
"column": "request",
"filterExpression": "NOT_CONTAINS",
"value": ".tif",
"valueStatic": true,
"type": "CONSTANT"
},
{
"column": "request",
"filterExpression": "NOT_CONTAINS",
"value": ".ico",
"valueStatic": true,
"type": "CONSTANT"
}
],
"columnStyles": []
},
{
"name": "Session_Stats",
"sheetId": "Session_Stats",
"sheetType": "das.internal.FormulaSheetType",
"keep": true,
"formulas": [
{
"columnName": "User",
"columnId": "0",
"columnIndex": 0,
"formulaString": "=GROUPBY(#No_Images_Or_Errors!remoteUser)"
},
{
"columnName": "Session_Start",
"columnId": "1",
"columnIndex": 1,
"formulaString": "=GROUPBYGAP(#No_Images_Or_Errors!timeEnglishFormat;5m)"
},
{
"columnName": "Session_End",
"columnId": "2",
"columnIndex": 2,
"formulaString": "=GROUPMAX(#No_Images_Or_Errors!timeEnglishFormat)"
},
{
"columnName": "Mins",
"columnId": "3",
"columnIndex": 3,
"formulaString": "=(#Session_End-#Session_Start)/60000"
},
{
"columnName": "Clicks",
"columnId": "4",
"columnIndex": 4,
"formulaString": "=GROUPCOUNT()"
}
],
"columnStyles": [
{
"columnIndex": 0,
"width": 100,
"name": "User",
"thousandSeparator": true
},
{
"columnIndex": 1,
"width": 100,
"name": "Session_Start",
"thousandSeparator": true
},
{
"columnIndex": 2,
"width": 100,
"name": "Session_End",
"thousandSeparator": true
},
{
"columnIndex": 3,
"width": 100,
"name": "Mins",
"thousandSeparator": true
},
{
"columnIndex": 4,
"width": 100,
"name": "Clicks",
"thousandSeparator": true
}
],
"nextColumnId": 5
},
{
"name": "Sessionized_Clicks",
"sheetId": "Sessionized_Clicks",
"sheetType": "das.internal.FormulaSheetType",
"keep": true,
"formulas": [
{
"columnName": "User",
"columnId": "0",
"columnIndex": 0,
"formulaString": "=GROUPBY(#No_Images_Or_Errors!remoteUser)"
},
{
"columnName": "Session_Start",
"columnId": "1",
"columnIndex": 1,
"formulaString": "=GROUPBYGAP(#No_Images_Or_Errors!timeEnglishFormat;5m)"
},
{
"columnName": "Path",
"columnId": "2",
"columnIndex": 2,
"formulaString": "=GROUP_PATH_CHANGES(URL_FILE(#No_Images_Or_Errors!request))"
},
{
"columnName": "From",
"columnId": "3",
"columnIndex": 3,
"formulaString": "=REGEX(JSON_ELEMENT(#Path;0);\"(.*)\\\\..*\";\"$1\";\"external\")"
},
{
"columnName": "To",
"columnId": "4",
"columnIndex": 4,
"formulaString": "=REGEX(JSON_ELEMENT(#Path;1);\"(.*)\\\\..*\";\"$1\";\"external\")"
},
{
"columnName": "Last_Click",
"columnId": "5",
"columnIndex": 5,
"formulaString": "=GROUP_PREVIOUS(#No_Images_Or_Errors!timeEnglishFormat)"
},
{
"columnName": "Landing",
"columnId": "6",
"columnIndex": 6,
"formulaString": "=#No_Images_Or_Errors!timeEnglishFormat"
},
{
"columnName": "Dwell_Time_ms",
"columnId": "7",
"columnIndex": 7,
"formulaString": "=GROUP_DIFF(#Landing)"
}
],
"columnStyles": [
{
"columnIndex": 0,
"width": 100,
"name": "User",
"thousandSeparator": true
},
{
"columnIndex": 1,
"width": 100,
"name": "Session_Start",
"thousandSeparator": true
},
{
"columnIndex": 2,
"width": 100,
"name": "Path",
"thousandSeparator": true
},
{
"columnIndex": 3,
"width": 100,
"name": "From",
"thousandSeparator": true
},
{
"columnIndex": 4,
"width": 100,
"name": "To",
"thousandSeparator": true
},
{
"columnIndex": 5,
"width": 100,
"name": "Last_Click",
"thousandSeparator": true
},
{
"columnIndex": 6,
"width": 100,
"name": "Landing",
"thousandSeparator": true
},
{
"columnIndex": 7,
"width": 100,
"name": "Dwell_Time_ms",
"thousandSeparator": true
}
],
"nextColumnId": 8
},
{
"name": "Moves",
"sheetId": "Moves",
"sheetType": "das.internal.FilterSheetType",
"keep": true,
"sourceSheet": "Sessionized_Clicks",
"connector": "AND",
"filterArguments": [
{
"column": "To",
"filterExpression": "NOT_CONTAINS",
"value": "external",
"valueStatic": true,
"type": "CONSTANT"
}
],
"columnStyles": []
},
{
"name": "Click_Pairs",
"sheetId": "Click_Pairs",
"sheetType": "das.internal.FormulaSheetType",
"keep": true,
"formulas": [
{
"columnName": "Path",
"columnId": "0",
"columnIndex": 0,
"formulaString": "=GROUPBY(#Moves!Path)"
},
{
"columnName": "From",
"columnId": "1",
"columnIndex": 1,
"formulaString": "=REGEX(JSON_ELEMENT(#Path;0);\"(.*)\\\\..*\";\"$1\";\"external\")"
},
{
"columnName": "To",
"columnId": "2",
"columnIndex": 2,
"formulaString": "=REGEX(JSON_ELEMENT(#Path;1);\"(.*)\\\\..*\";\"$1\";\"external\")"
},
{
"columnName": "Occurrences",
"columnId": "3",
"columnIndex": 3,
"formulaString": "=GROUPCOUNT()"
},
{
"columnName": "Avg_Mins_On_Page",
"columnId": "4",
"columnIndex": 4,
"formulaString": "=GROUPAVERAGE(#Moves!Dwell_Time_ms)/60000"
}
],
"columnStyles": [
{
"columnIndex": 0,
"width": 100,
"name": "Path",
"thousandSeparator": true
},
{
"columnIndex": 1,
"width": 100,
"name": "From",
"thousandSeparator": true
},
{
"columnIndex": 2,
"width": 100,
"name": "To",
"thousandSeparator": true
},
{
"columnIndex": 3,
"width": 100,
"name": "Occurrences",
"thousandSeparator": true
},
{
"columnIndex": 4,
"width": 100,
"name": "Avg_Mins_On_Page",
"thousandSeparator": true
}
],
"nextColumnId": 5
},
{
"name": "User_Stats",
"sheetId": "User_Stats",
"sheetType": "das.internal.FormulaSheetType",
"keep": false,
"formulas": [
{
"columnName": "User",
"columnId": "0",
"columnIndex": 0,
"formulaString": "=GROUPBY(#Session_Stats!User)"
},
{
"columnName": "Avg_Session_Mins",
"columnId": "1",
"columnIndex": 1,
"formulaString": "=GROUPAVERAGE(#Session_Stats!Mins)"
},
{
"columnName": "Avg_Clicks",
"columnId": "2",
"columnIndex": 2,
"formulaString": "=GROUPAVERAGE(#Session_Stats!Clicks)"
},
{
"columnName": "Median_Session_Mins",
"columnId": "3",
"columnIndex": 3,
"formulaString": "=GROUPMEDIAN(#Session_Stats!Mins)"
},
{
"columnName": "Num_Sessions",
"columnId": "4",
"columnIndex": 4,
"formulaString": "=GROUPCOUNT()"
},
{
"columnName": "Total_Clicks",
"columnId": "5",
"columnIndex": 5,
"formulaString": "=GROUPSUM(#Session_Stats!Clicks)"
}
],
"columnStyles": [
{
"columnIndex": 0,
"width": 100,
"name": "User",
"thousandSeparator": true
},
{
"columnIndex": 1,
"width": 100,
"name": "Avg_Session_Mins",
"thousandSeparator": true
},
{
"columnIndex": 2,
"width": 100,
"name": "Avg_Clicks",
"thousandSeparator": true
},
{
"columnIndex": 3,
"width": 100,
"name": "Median_Session_Mins",
"thousandSeparator": true
},
{
"columnIndex": 4,
"width": 100,
"name": "Num_Sessions",
"thousandSeparator": true
},
{
"columnIndex": 5,
"width": 100,
"name": "Total_Clicks",
"thousandSeparator": true
}
],
"nextColumnId": 6
},
{
"name": "customers",
"sheetId": "customers",
"sheetType": "das.internal.DataSourceSheetType",
"keep": false,
"datasource": {
"path": "/FileUploads/Web User Profile.upl",
"uuid": "cc9fc852-9e16-4c81-ab97-ccfc9ee687b6"
},
"columnStyles": []
},
{
"name": "Users_Enriched",
"sheetId": "Users_Enriched",
"sheetType": "das.internal.JoinedSheetType",
"keep": true,
"sheetDefinition": {
"joinCategory": "TWO_MEMBER_JOIN",
"joinPairs": [
{
"joinType": "INNER",
"sheet1": "User_Stats",
"sheet2": "customers",
"joinColumn1": "User",
"joinColumn2": "user"
}
],
"sheetToIncludeColumns": {
"User_Stats": [
"User",
"Avg_Session_Mins",
"Avg_Clicks",
"Median_Session_Mins",
"Num_Sessions",
"Total_Clicks"
],
"customers": [
"Email",
"Activated"
]
}
},
"columnStyles": []
},
{
"name": "Raw_ClickPaths",
"sheetId": "Raw_ClickPaths",
"sheetType": "das.internal.FormulaSheetType",
"keep": false,
"formulas": [
{
"columnName": "User",
"columnId": "0",
"columnIndex": 0,
"formulaString": "=GROUPBY(#Sessionized_Clicks!User)"
},
{
"columnName": "Session_Start",
"columnId": "1",
"columnIndex": 1,
"formulaString": "=GROUPBY(#Sessionized_Clicks!Session_Start)"
},
{
"columnName": "Path",
"columnId": "2",
"columnIndex": 2,
"formulaString": "=TOJSON(GROUPCOMBIN(#Sessionized_Clicks!To))"
}
],
"columnStyles": [
{
"columnIndex": 0,
"width": 100,
"name": "User",
"thousandSeparator": true
},
{
"columnIndex": 1,
"width": 100,
"name": "Session_Start",
"thousandSeparator": true
},
{
"columnIndex": 2,
"width": 100,
"name": "Path",
"thousandSeparator": true
}
],
"nextColumnId": 3
},
{
"name": "Internal_Moves",
"sheetId": "Internal_Moves",
"sheetType": "das.internal.FilterSheetType",
"keep": true,
"sourceSheet": "Click_Pairs",
"connector": "AND",
"filterArguments": [
{
"column": "Path",
"filterExpression": "NOT_CONTAINS",
"value": "external",
"valueStatic": true,
"type": "CONSTANT"
}
],
"columnStyles": []
},
{
"name": "ClickPaths_4_Plus",
"sheetId": "ClickPaths_4_Plus",
"sheetType": "das.internal.FilterSheetType",
"keep": true,
"sourceSheet": "Raw_ClickPaths",
"connector": "AND",
"filterArguments": [
{
"filterExpression": "FORMULA",
"value": "NOT(ISNULL(JSON_ELEMENT(#Path;3)))",
"valueStatic": false,
"type": "ADVANCED_FORMULA"
}
],
"columnStyles": []
},
{
"name": "Page_Pairs",
"sheetId": "Page_Pairs",
"sheetType": "das.internal.FormulaSheetType",
"keep": true,
"formulas": [
{
"columnName": "From",
"columnId": "0",
"columnIndex": 0,
"formulaString": "=#Click_Pairs!From"
},
{
"columnName": "To",
"columnId": "1",
"columnIndex": 1,
"formulaString": "=#Click_Pairs!To"
},
{
"columnName": "Occurrences",
"columnId": "2",
"columnIndex": 2,
"formulaString": "=#Click_Pairs!Occurrences"
},
{
"columnName": "Avg_Mins_On_Page",
"columnId": "3",
"columnIndex": 3,
"formulaString": "=#Click_Pairs!Avg_Mins_On_Page"
}
],
"columnStyles": [
{
"columnIndex": 0,
"width": 100,
"name": "From",
"thousandSeparator": true
},
{
"columnIndex": 1,
"width": 100,
"name": "To",
"thousandSeparator": true
},
{
"columnIndex": 2,
"width": 100,
"name": "Occurrences",
"thousandSeparator": true
},
{
"columnIndex": 3,
"width": 100,
"name": "Avg_Mins_On_Page",
"thousandSeparator": true
}
],
"nextColumnId": 4
},
{
"name": "Entrances",
"sheetId": "Entrances",
"sheetType": "das.internal.FilterSheetType",
"keep": true,
"sourceSheet": "Sessionized_Clicks",
"connector": "AND",
"filterArguments": [
{
"column": "From",
"filterExpression": "TEXT_EQUALS",
"value": "external",
"valueStatic": true,
"type": "CONSTANT"
}
],
"columnStyles": []
}
],
"errorHandlingMode": "IGNORE"
}