_workbookRead

This data may be referenced in many pages. By changing the name of this page or its content, you will be affecting many pages throughout the wiki.
 {
  "version": "2.1",
  "className": "datameer.dap.common.entity.WorkbookConfigurationImpl",
  "file": {
    "uuid": "e1b88f00-899f-4c62-817a-cf1e7cb62e96",
    "path": "/Workbooks/Behavioral Analytics/Clickstream Preprocessing.wbk",
    "description": "This workbook transforms raw web server logs into information useful for analysts by sessionizing, enriching with visitor profile information, and extracting click paths.\r\n\r\nThis workbook drives the Clickstream Analytics workbook",
    "name": "Clickstream Preprocessing"
  },
  "pullType": "MANUALLY",
  "properties": {},
  "sheets": [
    {
      "name": "weblogs",
      "sheetId": "weblogs",
      "sheetType": "das.internal.DataSourceSheetType",
      "keep": false,
      "datasource": {
        "path": "/FileUploads/Clickstream Data.upl",
        "uuid": "743b6251-4524-4668-a771-9a716b2b1e58"
      },
      "columnStyles": []
    },
    {
      "name": "No_Images_Or_Errors",
      "sheetId": "No_Images_Or_Errors",
      "sheetType": "das.internal.FilterSheetType",
      "keep": false,
      "sourceSheet": "weblogs",
      "connector": "AND",
      "filterArguments": [
        {
          "column": "status",
          "filterExpression": "EQUALS",
          "value": "200",
          "valueStatic": true,
          "type": "CONSTANT"
        },
        {
          "column": "request",
          "filterExpression": "NOT_CONTAINS",
          "value": ".jpg",
          "valueStatic": true,
          "type": "CONSTANT"
        },
        {
          "column": "request",
          "filterExpression": "NOT_CONTAINS",
          "value": ".jpeg",
          "valueStatic": true,
          "type": "CONSTANT"
        },
        {
          "column": "request",
          "filterExpression": "NOT_CONTAINS",
          "value": ".gif",
          "valueStatic": true,
          "type": "CONSTANT"
        },
        {
          "column": "request",
          "filterExpression": "NOT_CONTAINS",
          "value": ".tif",
          "valueStatic": true,
          "type": "CONSTANT"
        },
        {
          "column": "request",
          "filterExpression": "NOT_CONTAINS",
          "value": ".ico",
          "valueStatic": true,
          "type": "CONSTANT"
        }
      ],
      "columnStyles": []
    },
    {
      "name": "Session_Stats",
      "sheetId": "Session_Stats",
      "sheetType": "das.internal.FormulaSheetType",
      "keep": true,
      "formulas": [
        {
          "columnName": "User",
          "columnId": "0",
          "columnIndex": 0,
          "formulaString": "=GROUPBY(#No_Images_Or_Errors!remoteUser)"
        },
        {
          "columnName": "Session_Start",
          "columnId": "1",
          "columnIndex": 1,
          "formulaString": "=GROUPBYGAP(#No_Images_Or_Errors!timeEnglishFormat;5m)"
        },
        {
          "columnName": "Session_End",
          "columnId": "2",
          "columnIndex": 2,
          "formulaString": "=GROUPMAX(#No_Images_Or_Errors!timeEnglishFormat)"
        },
        {
          "columnName": "Mins",
          "columnId": "3",
          "columnIndex": 3,
          "formulaString": "=(#Session_End-#Session_Start)/60000"
        },
        {
          "columnName": "Clicks",
          "columnId": "4",
          "columnIndex": 4,
          "formulaString": "=GROUPCOUNT()"
        }
      ],
      "columnStyles": [
        {
          "columnIndex": 0,
          "width": 100,
          "name": "User",
          "thousandSeparator": true
        },
        {
          "columnIndex": 1,
          "width": 100,
          "name": "Session_Start",
          "thousandSeparator": true
        },
        {
          "columnIndex": 2,
          "width": 100,
          "name": "Session_End",
          "thousandSeparator": true
        },
        {
          "columnIndex": 3,
          "width": 100,
          "name": "Mins",
          "thousandSeparator": true
        },
        {
          "columnIndex": 4,
          "width": 100,
          "name": "Clicks",
          "thousandSeparator": true
        }
      ],
      "nextColumnId": 5
    },
    {
      "name": "Sessionized_Clicks",
      "sheetId": "Sessionized_Clicks",
      "sheetType": "das.internal.FormulaSheetType",
      "keep": true,
      "formulas": [
        {
          "columnName": "User",
          "columnId": "0",
          "columnIndex": 0,
          "formulaString": "=GROUPBY(#No_Images_Or_Errors!remoteUser)"
        },
        {
          "columnName": "Session_Start",
          "columnId": "1",
          "columnIndex": 1,
          "formulaString": "=GROUPBYGAP(#No_Images_Or_Errors!timeEnglishFormat;5m)"
        },
        {
          "columnName": "Path",
          "columnId": "2",
          "columnIndex": 2,
          "formulaString": "=GROUP_PATH_CHANGES(URL_FILE(#No_Images_Or_Errors!request))"
        },
        {
          "columnName": "From",
          "columnId": "3",
          "columnIndex": 3,
          "formulaString": "=REGEX(JSON_ELEMENT(#Path;0);\"(.*)\\\\..*\";\"$1\";\"external\")"
        },
        {
          "columnName": "To",
          "columnId": "4",
          "columnIndex": 4,
          "formulaString": "=REGEX(JSON_ELEMENT(#Path;1);\"(.*)\\\\..*\";\"$1\";\"external\")"
        },
        {
          "columnName": "Last_Click",
          "columnId": "5",
          "columnIndex": 5,
          "formulaString": "=GROUP_PREVIOUS(#No_Images_Or_Errors!timeEnglishFormat)"
        },
        {
          "columnName": "Landing",
          "columnId": "6",
          "columnIndex": 6,
          "formulaString": "=#No_Images_Or_Errors!timeEnglishFormat"
        },
        {
          "columnName": "Dwell_Time_ms",
          "columnId": "7",
          "columnIndex": 7,
          "formulaString": "=GROUP_DIFF(#Landing)"
        }
      ],
      "columnStyles": [
        {
          "columnIndex": 0,
          "width": 100,
          "name": "User",
          "thousandSeparator": true
        },
        {
          "columnIndex": 1,
          "width": 100,
          "name": "Session_Start",
          "thousandSeparator": true
        },
        {
          "columnIndex": 2,
          "width": 100,
          "name": "Path",
          "thousandSeparator": true
        },
        {
          "columnIndex": 3,
          "width": 100,
          "name": "From",
          "thousandSeparator": true
        },
        {
          "columnIndex": 4,
          "width": 100,
          "name": "To",
          "thousandSeparator": true
        },
        {
          "columnIndex": 5,
          "width": 100,
          "name": "Last_Click",
          "thousandSeparator": true
        },
        {
          "columnIndex": 6,
          "width": 100,
          "name": "Landing",
          "thousandSeparator": true
        },
        {
          "columnIndex": 7,
          "width": 100,
          "name": "Dwell_Time_ms",
          "thousandSeparator": true
        }
      ],
      "nextColumnId": 8
    },
    {
      "name": "Moves",
      "sheetId": "Moves",
      "sheetType": "das.internal.FilterSheetType",
      "keep": true,
      "sourceSheet": "Sessionized_Clicks",
      "connector": "AND",
      "filterArguments": [
        {
          "column": "To",
          "filterExpression": "NOT_CONTAINS",
          "value": "external",
          "valueStatic": true,
          "type": "CONSTANT"
        }
      ],
      "columnStyles": []
    },
    {
      "name": "Click_Pairs",
      "sheetId": "Click_Pairs",
      "sheetType": "das.internal.FormulaSheetType",
      "keep": true,
      "formulas": [
        {
          "columnName": "Path",
          "columnId": "0",
          "columnIndex": 0,
          "formulaString": "=GROUPBY(#Moves!Path)"
        },
        {
          "columnName": "From",
          "columnId": "1",
          "columnIndex": 1,
          "formulaString": "=REGEX(JSON_ELEMENT(#Path;0);\"(.*)\\\\..*\";\"$1\";\"external\")"
        },
        {
          "columnName": "To",
          "columnId": "2",
          "columnIndex": 2,
          "formulaString": "=REGEX(JSON_ELEMENT(#Path;1);\"(.*)\\\\..*\";\"$1\";\"external\")"
        },
        {
          "columnName": "Occurrences",
          "columnId": "3",
          "columnIndex": 3,
          "formulaString": "=GROUPCOUNT()"
        },
        {
          "columnName": "Avg_Mins_On_Page",
          "columnId": "4",
          "columnIndex": 4,
          "formulaString": "=GROUPAVERAGE(#Moves!Dwell_Time_ms)/60000"
        }
      ],
      "columnStyles": [
        {
          "columnIndex": 0,
          "width": 100,
          "name": "Path",
          "thousandSeparator": true
        },
        {
          "columnIndex": 1,
          "width": 100,
          "name": "From",
          "thousandSeparator": true
        },
        {
          "columnIndex": 2,
          "width": 100,
          "name": "To",
          "thousandSeparator": true
        },
        {
          "columnIndex": 3,
          "width": 100,
          "name": "Occurrences",
          "thousandSeparator": true
        },
        {
          "columnIndex": 4,
          "width": 100,
          "name": "Avg_Mins_On_Page",
          "thousandSeparator": true
        }
      ],
      "nextColumnId": 5
    },
    {
      "name": "User_Stats",
      "sheetId": "User_Stats",
      "sheetType": "das.internal.FormulaSheetType",
      "keep": false,
      "formulas": [
        {
          "columnName": "User",
          "columnId": "0",
          "columnIndex": 0,
          "formulaString": "=GROUPBY(#Session_Stats!User)"
        },
        {
          "columnName": "Avg_Session_Mins",
          "columnId": "1",
          "columnIndex": 1,
          "formulaString": "=GROUPAVERAGE(#Session_Stats!Mins)"
        },
        {
          "columnName": "Avg_Clicks",
          "columnId": "2",
          "columnIndex": 2,
          "formulaString": "=GROUPAVERAGE(#Session_Stats!Clicks)"
        },
        {
          "columnName": "Median_Session_Mins",
          "columnId": "3",
          "columnIndex": 3,
          "formulaString": "=GROUPMEDIAN(#Session_Stats!Mins)"
        },
        {
          "columnName": "Num_Sessions",
          "columnId": "4",
          "columnIndex": 4,
          "formulaString": "=GROUPCOUNT()"
        },
        {
          "columnName": "Total_Clicks",
          "columnId": "5",
          "columnIndex": 5,
          "formulaString": "=GROUPSUM(#Session_Stats!Clicks)"
        }
      ],
      "columnStyles": [
        {
          "columnIndex": 0,
          "width": 100,
          "name": "User",
          "thousandSeparator": true
        },
        {
          "columnIndex": 1,
          "width": 100,
          "name": "Avg_Session_Mins",
          "thousandSeparator": true
        },
        {
          "columnIndex": 2,
          "width": 100,
          "name": "Avg_Clicks",
          "thousandSeparator": true
        },
        {
          "columnIndex": 3,
          "width": 100,
          "name": "Median_Session_Mins",
          "thousandSeparator": true
        },
        {
          "columnIndex": 4,
          "width": 100,
          "name": "Num_Sessions",
          "thousandSeparator": true
        },
        {
          "columnIndex": 5,
          "width": 100,
          "name": "Total_Clicks",
          "thousandSeparator": true
        }
      ],
      "nextColumnId": 6
    },
    {
      "name": "customers",
      "sheetId": "customers",
      "sheetType": "das.internal.DataSourceSheetType",
      "keep": false,
      "datasource": {
        "path": "/FileUploads/Web User Profile.upl",
        "uuid": "cc9fc852-9e16-4c81-ab97-ccfc9ee687b6"
      },
      "columnStyles": []
    },
    {
      "name": "Users_Enriched",
      "sheetId": "Users_Enriched",
      "sheetType": "das.internal.JoinedSheetType",
      "keep": true,
      "sheetDefinition": {
        "joinCategory": "TWO_MEMBER_JOIN",
        "joinPairs": [
          {
            "joinType": "INNER",
            "sheet1": "User_Stats",
            "sheet2": "customers",
            "joinColumn1": "User",
            "joinColumn2": "user"
          }
        ],
        "sheetToIncludeColumns": {
          "User_Stats": [
            "User",
            "Avg_Session_Mins",
            "Avg_Clicks",
            "Median_Session_Mins",
            "Num_Sessions",
            "Total_Clicks"
          ],
          "customers": [
            "Email",
            "Activated"
          ]
        }
      },
      "columnStyles": []
    },
    {
      "name": "Raw_ClickPaths",
      "sheetId": "Raw_ClickPaths",
      "sheetType": "das.internal.FormulaSheetType",
      "keep": false,
      "formulas": [
        {
          "columnName": "User",
          "columnId": "0",
          "columnIndex": 0,
          "formulaString": "=GROUPBY(#Sessionized_Clicks!User)"
        },
        {
          "columnName": "Session_Start",
          "columnId": "1",
          "columnIndex": 1,
          "formulaString": "=GROUPBY(#Sessionized_Clicks!Session_Start)"
        },
        {
          "columnName": "Path",
          "columnId": "2",
          "columnIndex": 2,
          "formulaString": "=TOJSON(GROUPCOMBIN(#Sessionized_Clicks!To))"
        }
      ],
      "columnStyles": [
        {
          "columnIndex": 0,
          "width": 100,
          "name": "User",
          "thousandSeparator": true
        },
        {
          "columnIndex": 1,
          "width": 100,
          "name": "Session_Start",
          "thousandSeparator": true
        },
        {
          "columnIndex": 2,
          "width": 100,
          "name": "Path",
          "thousandSeparator": true
        }
      ],
      "nextColumnId": 3
    },
    {
      "name": "Internal_Moves",
      "sheetId": "Internal_Moves",
      "sheetType": "das.internal.FilterSheetType",
      "keep": true,
      "sourceSheet": "Click_Pairs",
      "connector": "AND",
      "filterArguments": [
        {
          "column": "Path",
          "filterExpression": "NOT_CONTAINS",
          "value": "external",
          "valueStatic": true,
          "type": "CONSTANT"
        }
      ],
      "columnStyles": []
    },
    {
      "name": "ClickPaths_4_Plus",
      "sheetId": "ClickPaths_4_Plus",
      "sheetType": "das.internal.FilterSheetType",
      "keep": true,
      "sourceSheet": "Raw_ClickPaths",
      "connector": "AND",
      "filterArguments": [
        {
          "filterExpression": "FORMULA",
          "value": "NOT(ISNULL(JSON_ELEMENT(#Path;3)))",
          "valueStatic": false,
          "type": "ADVANCED_FORMULA"
        }
      ],
      "columnStyles": []
    },
    {
      "name": "Page_Pairs",
      "sheetId": "Page_Pairs",
      "sheetType": "das.internal.FormulaSheetType",
      "keep": true,
      "formulas": [
        {
          "columnName": "From",
          "columnId": "0",
          "columnIndex": 0,
          "formulaString": "=#Click_Pairs!From"
        },
        {
          "columnName": "To",
          "columnId": "1",
          "columnIndex": 1,
          "formulaString": "=#Click_Pairs!To"
        },
        {
          "columnName": "Occurrences",
          "columnId": "2",
          "columnIndex": 2,
          "formulaString": "=#Click_Pairs!Occurrences"
        },
        {
          "columnName": "Avg_Mins_On_Page",
          "columnId": "3",
          "columnIndex": 3,
          "formulaString": "=#Click_Pairs!Avg_Mins_On_Page"
        }
      ],
      "columnStyles": [
        {
          "columnIndex": 0,
          "width": 100,
          "name": "From",
          "thousandSeparator": true
        },
        {
          "columnIndex": 1,
          "width": 100,
          "name": "To",
          "thousandSeparator": true
        },
        {
          "columnIndex": 2,
          "width": 100,
          "name": "Occurrences",
          "thousandSeparator": true
        },
        {
          "columnIndex": 3,
          "width": 100,
          "name": "Avg_Mins_On_Page",
          "thousandSeparator": true
        }
      ],
      "nextColumnId": 4
    },
    {
      "name": "Entrances",
      "sheetId": "Entrances",
      "sheetType": "das.internal.FilterSheetType",
      "keep": true,
      "sourceSheet": "Sessionized_Clicks",
      "connector": "AND",
      "filterArguments": [
        {
          "column": "From",
          "filterExpression": "TEXT_EQUALS",
          "value": "external",
          "valueStatic": true,
          "type": "CONSTANT"
        }
      ],
      "columnStyles": []
    }
  ],
  "errorHandlingMode": "IGNORE"
}