ContosoRetailDW on Azure SQL Data Warehouse

Happy New Year! It as been a while my friends… here goes 😉

ContosoRetailDW on Azure SQL Data Warehouse

This post assumes you are familiar or have already been introduced to Azure SQL Data Warehouse. If not I strongly encourage to read and follow-up on the SQL Data Warehouse Documentation to get you going.

Optionally if you would like to have a local on premise copy of the ContosoRetailDW database – A fictitious retail demo dataset used for presenting Microsoft Business Intelligence products. You can download here -> https://www.microsoft.com/en-ca/download/details.aspx?id=18279

As much as we love AdventureWorks, it is also worthwhile to explore and work with bigger data volumes, to develop and test new concepts! Hence the entire ContosoRetailDW database contains more than 34M rows of records. So I decided to make it happen…ContosoRetailDW on Azure SQL Data Warehouse.

Before we start the scripts (code) for this post are available here on GitHub -> https://github.com/sfrechette/contosoretaildw-azuresqldw

First thing let’s start by creating a blank SQL Data Warehouse database named ContosoRetailDW Create a SQL Data Warehouse

azure_sqldw

Next we need to create a slightly redesigned and modified ContosoRetailDW schema for the data to be imported and used in Azure SQL Data Warehouse. I have removed the following 3 columns from each tables; ETLLoadID, LoadDate and UpdateDate and in addition columns of data type geography and geometry which are not supported in SQL Data Warehouse.

After successfully creating the SQL Data Warehouse database connect with Visual Studio Connect to SQL Data Warehouse with Visual Studio to your newly created blank database and run the following dsql script to create the schema for the ContosoRetailDW database.

For detailed information on table design -> Table design in SQL Data Warehouse

ContosoRetailDW DDL schema script – ContosoRetailDW_ddl.dsql

USE ContosoRetailDW;

CREATE TABLE [dbo].[DimAccount](
	[AccountKey] [int] NOT NULL,
	[ParentAccountKey] [int] NULL,
	[AccountLabel] [nvarchar](100) NULL,
	[AccountName] [nvarchar](50) NULL,
	[AccountDescription] [nvarchar](50) NULL,
	[AccountType] [nvarchar](50) NULL,
	[Operator] [nvarchar](50) NULL,
	[CustomMembers] [nvarchar](300) NULL,
	[ValueType] [nvarchar](50) NULL,
	[CustomMemberOptions] [nvarchar](200) NULL
) 
WITH (CLUSTERED COLUMNSTORE INDEX);

CREATE TABLE [dbo].[DimChannel](
	[ChannelKey] [int] NOT NULL,
	[ChannelLabel] [nvarchar](100) NOT NULL,
	[ChannelName] [nvarchar](20) NULL,
	[ChannelDescription] [nvarchar](50) NULL
) 
WITH (CLUSTERED COLUMNSTORE INDEX);

CREATE TABLE [dbo].[DimCurrency](
	[CurrencyKey] [int] NOT NULL,
	[CurrencyLabel] [nvarchar](10) NOT NULL,
	[CurrencyName] [nvarchar](20) NOT NULL,
	[CurrencyDescription] [nvarchar](50) NOT NULL
) 
WITH (CLUSTERED COLUMNSTORE INDEX);

CREATE TABLE [dbo].[DimCustomer](
	[CustomerKey] [int] NOT NULL,
	[GeographyKey] [int] NOT NULL,
	[CustomerLabel] [nvarchar](100) NOT NULL,
	[Title] [nvarchar](8) NULL,
	[FirstName] [nvarchar](50) NULL,
	[MiddleName] [nvarchar](50) NULL,
	[LastName] [nvarchar](50) NULL,
	[NameStyle] [bit] NULL,
	[BirthDate] [date] NULL,
	[MaritalStatus] [nchar](1) NULL,
	[Suffix] [nvarchar](10) NULL,
	[Gender] [nvarchar](1) NULL,
	[EmailAddress] [nvarchar](50) NULL,
	[YearlyIncome] [money] NULL,
	[TotalChildren] [tinyint] NULL,
	[NumberChildrenAtHome] [tinyint] NULL,
	[Education] [nvarchar](40) NULL,
	[Occupation] [nvarchar](100) NULL,
	[HouseOwnerFlag] [nchar](1) NULL,
	[NumberCarsOwned] [tinyint] NULL,
	[AddressLine1] [nvarchar](120) NULL,
	[AddressLine2] [nvarchar](120) NULL,
	[Phone] [nvarchar](20) NULL,
	[DateFirstPurchase] [date] NULL,
	[CustomerType] [nvarchar](15) NULL,
	[CompanyName] [nvarchar](100) NULL
) 
WITH (CLUSTERED COLUMNSTORE INDEX, DISTRIBUTION = HASH(CustomerKey)); 

CREATE TABLE [dbo].[DimDate](
	[Datekey] [datetime] NOT NULL,
	[FullDateLabel] [nvarchar](20) NOT NULL,
	[DateDescription] [nvarchar](20) NOT NULL,
	[CalendarYear] [int] NOT NULL,
	[CalendarYearLabel] [nvarchar](20) NOT NULL,
	[CalendarHalfYear] [int] NOT NULL,
	[CalendarHalfYearLabel] [nvarchar](20) NOT NULL,
	[CalendarQuarter] [int] NOT NULL,
	[CalendarQuarterLabel] [nvarchar](20) NULL,
	[CalendarMonth] [int] NOT NULL,
	[CalendarMonthLabel] [nvarchar](20) NOT NULL,
	[CalendarWeek] [int] NOT NULL,
	[CalendarWeekLabel] [nvarchar](20) NOT NULL,
	[CalendarDayOfWeek] [int] NOT NULL,
	[CalendarDayOfWeekLabel] [nvarchar](10) NOT NULL,
	[FiscalYear] [int] NOT NULL,
	[FiscalYearLabel] [nvarchar](20) NOT NULL,
	[FiscalHalfYear] [int] NOT NULL,
	[FiscalHalfYearLabel] [nvarchar](20) NOT NULL,
	[FiscalQuarter] [int] NOT NULL,
	[FiscalQuarterLabel] [nvarchar](20) NOT NULL,
	[FiscalMonth] [int] NOT NULL,
	[FiscalMonthLabel] [nvarchar](20) NOT NULL,
	[IsWorkDay] [nvarchar](20) NOT NULL,
	[IsHoliday] [int] NOT NULL,
	[HolidayName] [nvarchar](20) NOT NULL,
	[EuropeSeason] [nvarchar](50) NULL,
	[NorthAmericaSeason] [nvarchar](50) NULL,
	[AsiaSeason] [nvarchar](50) NULL
) 
WITH (CLUSTERED COLUMNSTORE INDEX);

CREATE TABLE [dbo].[DimEmployee](
	[EmployeeKey] [int] NOT NULL,
	[ParentEmployeeKey] [int] NULL,
	[FirstName] [nvarchar](50) NOT NULL,
	[LastName] [nvarchar](50) NOT NULL,
	[MiddleName] [nvarchar](50) NULL,
	[Title] [nvarchar](50) NULL,
	[HireDate] [date] NULL,
	[BirthDate] [date] NULL,
	[EmailAddress] [nvarchar](50) NULL,
	[Phone] [nvarchar](25) NULL,
	[MaritalStatus] [nchar](1) NULL,
	[EmergencyContactName] [nvarchar](50) NULL,
	[EmergencyContactPhone] [nvarchar](25) NULL,
	[SalariedFlag] [bit] NULL,
	[Gender] [nchar](1) NULL,
	[PayFrequency] [tinyint] NULL,
	[BaseRate] [money] NULL,
	[VacationHours] [smallint] NULL,
	[CurrentFlag] [bit] NOT NULL,
	[SalesPersonFlag] [bit] NOT NULL,
	[DepartmentName] [nvarchar](50) NULL,
	[StartDate] [date] NULL,
	[EndDate] [date] NULL,
	[Status] [nvarchar](50) NULL
) 
WITH (CLUSTERED COLUMNSTORE INDEX);

CREATE TABLE [dbo].[DimEntity](
	[EntityKey] [int] NOT NULL,
	[EntityLabel] [nvarchar](100) NULL,
	[ParentEntityKey] [int] NULL,
	[ParentEntityLabel] [nvarchar](100) NULL,
	[EntityName] [nvarchar](50) NULL,
	[EntityDescription] [nvarchar](100) NULL,
	[EntityType] [nvarchar](100) NULL,
	[StartDate] [datetime] NULL,
	[EndDate] [datetime] NULL,
	[Status] [nvarchar](50) NULL
) 
WITH (CLUSTERED COLUMNSTORE INDEX);

CREATE TABLE [dbo].[DimGeography](
	[GeographyKey] [int] NOT NULL,
	[GeographyType] [nvarchar](50) NOT NULL,
	[ContinentName] [nvarchar](50) NOT NULL,
	[CityName] [nvarchar](100) NULL,
	[StateProvinceName] [nvarchar](100) NULL,
	[RegionCountryName] [nvarchar](100) NULL
) 
WITH (CLUSTERED COLUMNSTORE INDEX);

CREATE TABLE [dbo].[DimMachine](
	[MachineKey] [int] NOT NULL,
	[MachineLabel] [nvarchar](100) NULL,
	[StoreKey] [int] NOT NULL,
	[MachineType] [nvarchar](50) NOT NULL,
	[MachineName] [nvarchar](100) NOT NULL,
	[MachineDescription] [nvarchar](200) NOT NULL,
	[VendorName] [nvarchar](50) NOT NULL,
	[MachineOS] [nvarchar](50) NOT NULL,
	[MachineSource] [nvarchar](100) NOT NULL,
	[MachineHardware] [nvarchar](100) NULL,
	[MachineSoftware] [nvarchar](100) NOT NULL,
	[Status] [nvarchar](50) NOT NULL,
	[ServiceStartDate] [datetime] NOT NULL,
	[DecommissionDate] [datetime] NULL,
	[LastModifiedDate] [datetime] NULL
) 
WITH (CLUSTERED COLUMNSTORE INDEX);

CREATE TABLE [dbo].[DimOutage](
	[OutageKey] [int] NOT NULL,
	[OutageLabel] [nvarchar](100) NOT NULL,
	[OutageName] [nvarchar](50) NOT NULL,
	[OutageDescription] [nvarchar](200) NOT NULL,
	[OutageType] [nvarchar](50) NOT NULL,
	[OutageTypeDescription] [nvarchar](200) NOT NULL,
	[OutageSubType] [nvarchar](50) NOT NULL,
	[OutageSubTypeDescription] [nvarchar](200) NOT NULL
) 
WITH (CLUSTERED COLUMNSTORE INDEX);

CREATE TABLE [dbo].[DimProduct](
	[ProductKey] [int] NOT NULL,
	[ProductLabel] [nvarchar](255) NULL,
	[ProductName] [nvarchar](500) NULL,
	[ProductDescription] [nvarchar](400) NULL,
	[ProductSubcategoryKey] [int] NULL,
	[Manufacturer] [nvarchar](50) NULL,
	[BrandName] [nvarchar](50) NULL,
	[ClassID] [nvarchar](10) NULL,
	[ClassName] [nvarchar](20) NULL,
	[StyleID] [nvarchar](10) NULL,
	[StyleName] [nvarchar](20) NULL,
	[ColorID] [nvarchar](10) NULL,
	[ColorName] [nvarchar](20) NOT NULL,
	[Size] [nvarchar](50) NULL,
	[SizeRange] [nvarchar](50) NULL,
	[SizeUnitMeasureID] [nvarchar](20) NULL,
	[Weight] [float] NULL,
	[WeightUnitMeasureID] [nvarchar](20) NULL,
	[UnitOfMeasureID] [nvarchar](10) NULL,
	[UnitOfMeasureName] [nvarchar](40) NULL,
	[StockTypeID] [nvarchar](10) NULL,
	[StockTypeName] [nvarchar](40) NULL,
	[UnitCost] [money] NULL,
	[UnitPrice] [money] NULL,
	[AvailableForSaleDate] [datetime] NULL,
	[StopSaleDate] [datetime] NULL,
	[Status] [nvarchar](7) NULL,
	[ImageURL] [nvarchar](150) NULL,
	[ProductURL] [nvarchar](150) NULL
) 
WITH (CLUSTERED COLUMNSTORE INDEX, DISTRIBUTION = HASH(ProductKey));

CREATE TABLE [dbo].[DimProductCategory](
	[ProductCategoryKey] [int] NOT NULL,
	[ProductCategoryLabel] [nvarchar](100) NULL,
	[ProductCategoryName] [nvarchar](30) NOT NULL,
	[ProductCategoryDescription] [nvarchar](50) NOT NULL
) 
WITH (CLUSTERED COLUMNSTORE INDEX);

CREATE TABLE [dbo].[DimProductSubcategory](
	[ProductSubcategoryKey] [int] NOT NULL,
	[ProductSubcategoryLabel] [nvarchar](100) NULL,
	[ProductSubcategoryName] [nvarchar](50) NOT NULL,
	[ProductSubcategoryDescription] [nvarchar](100) NULL,
	[ProductCategoryKey] [int] NULL
) WITH (CLUSTERED COLUMNSTORE INDEX);

CREATE TABLE [dbo].[DimPromotion](
	[PromotionKey] [int] NOT NULL,
	[PromotionLabel] [nvarchar](100) NULL,
	[PromotionName] [nvarchar](100) NULL,
	[PromotionDescription] [nvarchar](255) NULL,
	[DiscountPercent] [float] NULL,
	[PromotionType] [nvarchar](50) NULL,
	[PromotionCategory] [nvarchar](50) NULL,
	[StartDate] [datetime] NOT NULL,
	[EndDate] [datetime] NULL,
	[MinQuantity] [int] NULL,
	[MaxQuantity] [int] NULL
) 
WITH (CLUSTERED COLUMNSTORE INDEX);

CREATE TABLE [dbo].[DimSalesTerritory](
	[SalesTerritoryKey] [int] NOT NULL,
	[GeographyKey] [int] NOT NULL,
	[SalesTerritoryLabel] [nvarchar](100) NULL,
	[SalesTerritoryName] [nvarchar](50) NOT NULL,
	[SalesTerritoryRegion] [nvarchar](50) NOT NULL,
	[SalesTerritoryCountry] [nvarchar](50) NOT NULL,
	[SalesTerritoryGroup] [nvarchar](50) NULL,
	[SalesTerritoryLevel] [nvarchar](10) NULL,
	[SalesTerritoryManager] [int] NULL,
	[StartDate] [datetime] NULL,
	[EndDate] [datetime] NULL,
	[Status] [nvarchar](50) NULL
) 
WITH (CLUSTERED COLUMNSTORE INDEX);

CREATE TABLE [dbo].[DimScenario](
	[ScenarioKey] [int] NOT NULL,
	[ScenarioLabel] [nvarchar](100) NOT NULL,
	[ScenarioName] [nvarchar](20) NULL,
	[ScenarioDescription] [nvarchar](50) NULL
) 
WITH (CLUSTERED COLUMNSTORE INDEX);

CREATE TABLE [dbo].[DimStore](
	[StoreKey] [int] NOT NULL,
	[GeographyKey] [int] NOT NULL,
	[StoreManager] [int] NULL,
	[StoreType] [nvarchar](15) NULL,
	[StoreName] [nvarchar](100) NOT NULL,
	[StoreDescription] [nvarchar](300) NOT NULL,
	[Status] [nvarchar](20) NOT NULL,
	[OpenDate] [datetime] NOT NULL,
	[CloseDate] [datetime] NULL,
	[EntityKey] [int] NULL,
	[ZipCode] [nvarchar](20) NULL,
	[ZipCodeExtension] [nvarchar](10) NULL,
	[StorePhone] [nvarchar](15) NULL,
	[StoreFax] [nvarchar](14) NULL,
	[AddressLine1] [nvarchar](100) NULL,
	[AddressLine2] [nvarchar](100) NULL,
	[CloseReason] [nvarchar](20) NULL,
	[EmployeeCount] [int] NULL,
	[SellingAreaSize] [float] NULL,
	[LastRemodelDate] [datetime] NULL
) 
WITH (CLUSTERED COLUMNSTORE INDEX);

CREATE TABLE [dbo].[FactExchangeRate](
	[ExchangeRateKey] [int] NOT NULL,
	[CurrencyKey] [int] NOT NULL,
	[DateKey] [datetime] NOT NULL,
	[AverageRate] [float] NOT NULL,
	[EndOfDayRate] [float] NOT NULL
) 
WITH (CLUSTERED COLUMNSTORE INDEX);

CREATE TABLE [dbo].[FactInventory](
	[InventoryKey] [int] NOT NULL,
	[DateKey] [datetime] NOT NULL,
	[StoreKey] [int] NOT NULL,
	[ProductKey] [int] NOT NULL,
	[CurrencyKey] [int] NOT NULL,
	[OnHandQuantity] [int] NOT NULL,
	[OnOrderQuantity] [int] NOT NULL,
	[SafetyStockQuantity] [int] NULL,
	[UnitCost] [money] NOT NULL,
	[DaysInStock] [int] NULL,
	[MinDayInStock] [int] NULL,
	[MaxDayInStock] [int] NULL,
	[Aging] [int] NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX, DISTRIBUTION = HASH(InventoryKey));

CREATE TABLE [dbo].[FactITMachine](
	[ITMachinekey] [int] NOT NULL,
	[MachineKey] [int] NOT NULL,
	[Datekey] [datetime] NOT NULL,
	[CostAmount] [money] NULL,
	[CostType] [nvarchar](200) NOT NULL
) 
WITH (CLUSTERED COLUMNSTORE INDEX);

CREATE TABLE [dbo].[FactITSLA](
	[ITSLAkey] [int] NOT NULL,
	[DateKey] [datetime] NOT NULL,
	[StoreKey] [int] NOT NULL,
	[MachineKey] [int] NOT NULL,
	[OutageKey] [int] NOT NULL,
	[OutageStartTime] [datetime] NOT NULL,
	[OutageEndTime] [datetime] NOT NULL,
	[DownTime] [int] NOT NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX);

CREATE TABLE [dbo].[FactOnlineSales](
	[OnlineSalesKey] [int] NOT NULL,
	[DateKey] [datetime] NOT NULL,
	[StoreKey] [int] NOT NULL,
	[ProductKey] [int] NOT NULL,
	[PromotionKey] [int] NOT NULL,
	[CurrencyKey] [int] NOT NULL,
	[CustomerKey] [int] NOT NULL,
	[SalesOrderNumber] [nvarchar](20) NOT NULL,
	[SalesOrderLineNumber] [int] NULL,
	[SalesQuantity] [int] NOT NULL,
	[SalesAmount] [money] NOT NULL,
	[ReturnQuantity] [int] NOT NULL,
	[ReturnAmount] [money] NULL,
	[DiscountQuantity] [int] NULL,
	[DiscountAmount] [money] NULL,
	[TotalCost] [money] NOT NULL,
	[UnitCost] [money] NULL,
	[UnitPrice] [money] NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX, DISTRIBUTION = HASH(OnlineSalesKey),
PARTITION(DateKey RANGE RIGHT FOR VALUES (	'2005-01-01','2006-01-01','2007-01-01','2008-01-01','2009-01-01',
											'2010-01-01','2011-01-01','2012-01-01','2013-01-01','2014-01-01',
											'2015-01-01','2016-01-01','2017-01-01','2018-01-01','2019-01-01','2020-01-01')));

CREATE TABLE [dbo].[FactSales](
	[SalesKey] [int] NOT NULL,
	[DateKey] [datetime] NOT NULL,
	[channelKey] [int] NOT NULL,
	[StoreKey] [int] NOT NULL,
	[ProductKey] [int] NOT NULL,
	[PromotionKey] [int] NOT NULL,
	[CurrencyKey] [int] NOT NULL,
	[UnitCost] [money] NOT NULL,
	[UnitPrice] [money] NOT NULL,
	[SalesQuantity] [int] NOT NULL,
	[ReturnQuantity] [int] NOT NULL,
	[ReturnAmount] [money] NULL,
	[DiscountQuantity] [int] NULL,
	[DiscountAmount] [money] NULL,
	[TotalCost] [money] NOT NULL,
	[SalesAmount] [money] NOT NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX, DISTRIBUTION = HASH(SalesKey));

CREATE TABLE [dbo].[FactSalesQuota](
	[SalesQuotaKey] [int] NOT NULL,
	[ChannelKey] [int] NOT NULL,
	[StoreKey] [int] NOT NULL,
	[ProductKey] [int] NOT NULL,
	[DateKey] [datetime] NOT NULL,
	[CurrencyKey] [int] NOT NULL,
	[ScenarioKey] [int] NOT NULL,
	[SalesQuantityQuota] [money] NOT NULL,
	[SalesAmountQuota] [money] NOT NULL,
	[GrossMarginQuota] [money] NOT NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX, DISTRIBUTION = HASH(SalesQuotaKey));

CREATE TABLE [dbo].[FactStrategyPlan](
	[StrategyPlanKey] [int] NOT NULL,
	[Datekey] [datetime] NOT NULL,
	[EntityKey] [int] NOT NULL,
	[ScenarioKey] [int] NOT NULL,
	[AccountKey] [int] NOT NULL,
	[CurrencyKey] [int] NOT NULL,
	[ProductCategoryKey] [int] NULL,
	[Amount] [money] NOT NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX, DISTRIBUTION = HASH(StrategyPlanKey));

Got Data!
We need to load some data! The GZip data files, 25 of them totalling approx. 413MB are located here on OneDrive -> https://1drv.ms/f/s!Al_yetnJltJSmUP0M0MBPTUSNOqa. You will need to load these files to an Azure blob storage. James Serra has a great post to help you get started: Getting data into Azure Blob Storage. For the purpose of this exercise I created a blob storage container name ‘contosoretaildw’ with a ‘data’ folder and imported the GZip files using the AzCopy Command-Line Utility Transfer data with the AzCopy Command-Line Utility.

blob_contoso

Will be loading data from blob storage to SQL Data Warehouse using Polybase Load data with PolyBase in SQL Data Warehouse. The following script assumes we already have defined and created a schema for the ContosoRetailDW database. You could also load data using the CTAS (Create Table As Select) feature only if you have not yet defined and created the ContosoRetailDW schema. The script for this scenario PolybaseLoadCTAS.dsql is available on GitHub.

Loading data from Azure Blog Storage – PolybaseLoadInsert.dsql

--Create Schema for External Tables 
CREATE SCHEMA ext;

--Create Master Key. Required to encrypt the credential secret in the next step.
CREATE MASTER KEY;

--Create Database Scoped Credential. Provide the Azure storage account key. 
--The identity name does not affect authentication to Azure storage.
CREATE DATABASE SCOPED CREDENTIAL AzureStorageCredential 
WITH IDENTITY = 'user', 
SECRET = 'azure_storage_account_key_goes_here';

--Create External Data Source. Specify location and credential to access your Azure blob storage.
CREATE EXTERNAL DATA SOURCE AzureStorage 
WITH (  
        TYPE = Hadoop, 
        LOCATION = 'wasbs://contosoretaildw@stephdefaultstorage.blob.core.windows.net/',
        CREDENTIAL = AzureStorageCredential
); 

--Create External File format. Specify the layout of data stored in Azure storage blobs. 
CREATE EXTERNAL FILE FORMAT TextFileFormat 
WITH 
(   
    FORMAT_TYPE = DELIMITEDTEXT, 
    FORMAT_OPTIONS  
    (
        FIELD_TERMINATOR ='|',
        USE_TYPE_DEFAULT = FALSE
    ),
	DATA_COMPRESSION = 'org.apache.hadoop.io.compress.GzipCodec'
);

--DimAccount
CREATE EXTERNAL TABLE ext.DimAccount (
	[AccountKey] [int] NOT NULL,
	[ParentAccountKey] [int] NULL,
	[AccountLabel] [nvarchar](100) NULL,
	[AccountName] [nvarchar](50) NULL,
	[AccountDescription] [nvarchar](50) NULL,
	[AccountType] [nvarchar](50) NULL,
	[Operator] [nvarchar](50) NULL,
	[CustomMembers] [nvarchar](300) NULL,
	[ValueType] [nvarchar](50) NULL,
	[CustomMemberOptions] [nvarchar](200) NULL
)
WITH (
        LOCATION='data/DimAccount.txt.gz', 
        DATA_SOURCE=AzureStorage, 
        FILE_FORMAT=TextFileFormat
);

INSERT INTO dbo.DimAccount
	SELECT * FROM ext.DimAccount;

--DimChannel
CREATE EXTERNAL TABLE ext.DimChannel (
	[ChannelKey] [int] NOT NULL,
	[ChannelLabel] [nvarchar](100) NOT NULL,
	[ChannelName] [nvarchar](20) NULL,
	[ChannelDescription] [nvarchar](50) NULL
)
WITH (
        LOCATION='data/DimChannel.txt.gz', 
        DATA_SOURCE=AzureStorage, 
        FILE_FORMAT=TextFileFormat
);

INSERT INTO dbo.DimChannel
	SELECT * FROM ext.DimChannel;

--DimCurrency
CREATE EXTERNAL TABLE ext.DimCurrency (
	[CurrencyKey] [int] NOT NULL,
	[CurrencyLabel] [nvarchar](10) NOT NULL,
	[CurrencyName] [nvarchar](20) NOT NULL,
	[CurrencyDescription] [nvarchar](50) NOT NULL
)
WITH (
        LOCATION='data/DimCurrency.txt.gz', 
        DATA_SOURCE=AzureStorage, 
        FILE_FORMAT=TextFileFormat
);

INSERT INTO dbo.DimCurrency
	SELECT * FROM ext.DimCurrency;

--DimCustomer
CREATE EXTERNAL TABLE ext.DimCustomer (
	[CustomerKey] [int] NOT NULL,
	[GeographyKey] [int] NOT NULL,
	[CustomerLabel] [nvarchar](100) NOT NULL,
	[Title] [nvarchar](8) NULL,
	[FirstName] [nvarchar](50) NULL,
	[MiddleName] [nvarchar](50) NULL,
	[LastName] [nvarchar](50) NULL,
	[NameStyle] [bit] NULL,
	[BirthDate] [date] NULL,
	[MaritalStatus] [nchar](1) NULL,
	[Suffix] [nvarchar](10) NULL,
	[Gender] [nvarchar](1) NULL,
	[EmailAddress] [nvarchar](50) NULL,
	[YearlyIncome] [money] NULL,
	[TotalChildren] [tinyint] NULL,
	[NumberChildrenAtHome] [tinyint] NULL,
	[Education] [nvarchar](40) NULL,
	[Occupation] [nvarchar](100) NULL,
	[HouseOwnerFlag] [nchar](1) NULL,
	[NumberCarsOwned] [tinyint] NULL,
	[AddressLine1] [nvarchar](120) NULL,
	[AddressLine2] [nvarchar](120) NULL,
	[Phone] [nvarchar](20) NULL,
	[DateFirstPurchase] [date] NULL,
	[CustomerType] [nvarchar](15) NULL,
	[CompanyName] [nvarchar](100) NULL
)
WITH (
        LOCATION='data/DimCustomer.txt.gz', 
        DATA_SOURCE=AzureStorage, 
        FILE_FORMAT=TextFileFormat
);

INSERT INTO dbo.DimCustomer
	SELECT * FROM ext.DimCustomer;

--DimDate
CREATE EXTERNAL TABLE ext.DimDate (
	[Datekey] [datetime] NOT NULL,
	[FullDateLabel] [nvarchar](20) NOT NULL,
	[DateDescription] [nvarchar](20) NOT NULL,
	[CalendarYear] [int] NOT NULL,
	[CalendarYearLabel] [nvarchar](20) NOT NULL,
	[CalendarHalfYear] [int] NOT NULL,
	[CalendarHalfYearLabel] [nvarchar](20) NOT NULL,
	[CalendarQuarter] [int] NOT NULL,
	[CalendarQuarterLabel] [nvarchar](20) NULL,
	[CalendarMonth] [int] NOT NULL,
	[CalendarMonthLabel] [nvarchar](20) NOT NULL,
	[CalendarWeek] [int] NOT NULL,
	[CalendarWeekLabel] [nvarchar](20) NOT NULL,
	[CalendarDayOfWeek] [int] NOT NULL,
	[CalendarDayOfWeekLabel] [nvarchar](10) NOT NULL,
	[FiscalYear] [int] NOT NULL,
	[FiscalYearLabel] [nvarchar](20) NOT NULL,
	[FiscalHalfYear] [int] NOT NULL,
	[FiscalHalfYearLabel] [nvarchar](20) NOT NULL,
	[FiscalQuarter] [int] NOT NULL,
	[FiscalQuarterLabel] [nvarchar](20) NOT NULL,
	[FiscalMonth] [int] NOT NULL,
	[FiscalMonthLabel] [nvarchar](20) NOT NULL,
	[IsWorkDay] [nvarchar](20) NOT NULL,
	[IsHoliday] [int] NOT NULL,
	[HolidayName] [nvarchar](20) NOT NULL,
	[EuropeSeason] [nvarchar](50) NULL,
	[NorthAmericaSeason] [nvarchar](50) NULL,
	[AsiaSeason] [nvarchar](50) NULL
)
WITH (
        LOCATION='data/DimDate.txt.gz', 
        DATA_SOURCE=AzureStorage, 
        FILE_FORMAT=TextFileFormat
);

INSERT INTO dbo.DimDate
	SELECT * FROM ext.DimDate;

--DimEmployee
CREATE EXTERNAL TABLE ext.DimEmployee (
	[EmployeeKey] [int] NOT NULL,
	[ParentEmployeeKey] [int] NULL,
	[FirstName] [nvarchar](50) NOT NULL,
	[LastName] [nvarchar](50) NOT NULL,
	[MiddleName] [nvarchar](50) NULL,
	[Title] [nvarchar](50) NULL,
	[HireDate] [date] NULL,
	[BirthDate] [date] NULL,
	[EmailAddress] [nvarchar](50) NULL,
	[Phone] [nvarchar](25) NULL,
	[MaritalStatus] [nchar](1) NULL,
	[EmergencyContactName] [nvarchar](50) NULL,
	[EmergencyContactPhone] [nvarchar](25) NULL,
	[SalariedFlag] [bit] NULL,
	[Gender] [nchar](1) NULL,
	[PayFrequency] [tinyint] NULL,
	[BaseRate] [money] NULL,
	[VacationHours] [smallint] NULL,
	[CurrentFlag] [bit] NOT NULL,
	[SalesPersonFlag] [bit] NOT NULL,
	[DepartmentName] [nvarchar](50) NULL,
	[StartDate] [date] NULL,
	[EndDate] [date] NULL,
	[Status] [nvarchar](50) NULL
)
WITH (
        LOCATION='data/DimEmployee.txt.gz', 
        DATA_SOURCE=AzureStorage, 
        FILE_FORMAT=TextFileFormat
);

INSERT INTO dbo.DimEmployee
	SELECT * FROM ext.DimEmployee;

--DimEntity
CREATE EXTERNAL TABLE ext.DimEntity (
	[EntityKey] [int] NOT NULL,
	[EntityLabel] [nvarchar](100) NULL,
	[ParentEntityKey] [int] NULL,
	[ParentEntityLabel] [nvarchar](100) NULL,
	[EntityName] [nvarchar](50) NULL,
	[EntityDescription] [nvarchar](100) NULL,
	[EntityType] [nvarchar](100) NULL,
	[StartDate] [datetime] NULL,
	[EndDate] [datetime] NULL,
	[Status] [nvarchar](50) NULL
)
WITH (
        LOCATION='data/DimEntity.txt.gz', 
        DATA_SOURCE=AzureStorage, 
        FILE_FORMAT=TextFileFormat
);

INSERT INTO dbo.DimEntity
	SELECT * FROM ext.DimEntity;

--DimGeography
CREATE EXTERNAL TABLE ext.DimGeography (
	[GeographyKey] [int] NOT NULL,
	[GeographyType] [nvarchar](50) NOT NULL,
	[ContinentName] [nvarchar](50) NOT NULL,
	[CityName] [nvarchar](100) NULL,
	[StateProvinceName] [nvarchar](100) NULL,
	[RegionCountryName] [nvarchar](100) NULL
)
WITH (
        LOCATION='data/DimGeography.txt.gz', 
        DATA_SOURCE=AzureStorage, 
        FILE_FORMAT=TextFileFormat
);

INSERT INTO dbo.DimGeography
	SELECT * FROM ext.DimGeography;

--DimMachine
CREATE EXTERNAL TABLE ext.DimMachine (
	[MachineKey] [int] NOT NULL,
	[MachineLabel] [nvarchar](100) NULL,
	[StoreKey] [int] NOT NULL,
	[MachineType] [nvarchar](50) NOT NULL,
	[MachineName] [nvarchar](100) NOT NULL,
	[MachineDescription] [nvarchar](200) NOT NULL,
	[VendorName] [nvarchar](50) NOT NULL,
	[MachineOS] [nvarchar](50) NOT NULL,
	[MachineSource] [nvarchar](100) NOT NULL,
	[MachineHardware] [nvarchar](100) NULL,
	[MachineSoftware] [nvarchar](100) NOT NULL,
	[Status] [nvarchar](50) NOT NULL,
	[ServiceStartDate] [datetime] NOT NULL,
	[DecommissionDate] [datetime] NULL,
	[LastModifiedDate] [datetime] NULL
)
WITH (
        LOCATION='data/DimMachine.txt.gz', 
        DATA_SOURCE=AzureStorage, 
        FILE_FORMAT=TextFileFormat
);

INSERT INTO dbo.DimMachine
	SELECT * FROM ext.DimMachine;

--DimOutage
CREATE EXTERNAL TABLE ext.DimOutage (
	[OutageKey] [int] NOT NULL,
	[OutageLabel] [nvarchar](100) NOT NULL,
	[OutageName] [nvarchar](50) NOT NULL,
	[OutageDescription] [nvarchar](200) NOT NULL,
	[OutageType] [nvarchar](50) NOT NULL,
	[OutageTypeDescription] [nvarchar](200) NOT NULL,
	[OutageSubType] [nvarchar](50) NOT NULL,
	[OutageSubTypeDescription] [nvarchar](200) NOT NULL
)
WITH (
        LOCATION='data/DimOutage.txt.gz', 
        DATA_SOURCE=AzureStorage, 
        FILE_FORMAT=TextFileFormat
);

INSERT INTO dbo.DimOutage
	SELECT * FROM ext.DimOutage;

--DimProduct
CREATE EXTERNAL TABLE ext.DimProduct (
	[ProductKey] [int] NOT NULL,
	[ProductLabel] [nvarchar](255) NULL,
	[ProductName] [nvarchar](500) NULL,
	[ProductDescription] [nvarchar](400) NULL,
	[ProductSubcategoryKey] [int] NULL,
	[Manufacturer] [nvarchar](50) NULL,
	[BrandName] [nvarchar](50) NULL,
	[ClassID] [nvarchar](10) NULL,
	[ClassName] [nvarchar](20) NULL,
	[StyleID] [nvarchar](10) NULL,
	[StyleName] [nvarchar](20) NULL,
	[ColorID] [nvarchar](10) NULL,
	[ColorName] [nvarchar](20) NOT NULL,
	[Size] [nvarchar](50) NULL,
	[SizeRange] [nvarchar](50) NULL,
	[SizeUnitMeasureID] [nvarchar](20) NULL,
	[Weight] [float] NULL,
	[WeightUnitMeasureID] [nvarchar](20) NULL,
	[UnitOfMeasureID] [nvarchar](10) NULL,
	[UnitOfMeasureName] [nvarchar](40) NULL,
	[StockTypeID] [nvarchar](10) NULL,
	[StockTypeName] [nvarchar](40) NULL,
	[UnitCost] [money] NULL,
	[UnitPrice] [money] NULL,
	[AvailableForSaleDate] [datetime] NULL,
	[StopSaleDate] [datetime] NULL,
	[Status] [nvarchar](7) NULL,
	[ImageURL] [nvarchar](150) NULL,
	[ProductURL] [nvarchar](150) NULL
)
WITH (
        LOCATION='data/DimProduct.txt.gz', 
        DATA_SOURCE=AzureStorage, 
        FILE_FORMAT=TextFileFormat
);

INSERT INTO dbo.DimProduct
	SELECT * FROM ext.DimProduct;

--DimProductCategory
CREATE EXTERNAL TABLE ext.DimProductCategory (
	[ProductCategoryKey] [int] NOT NULL,
	[ProductCategoryLabel] [nvarchar](100) NULL,
	[ProductCategoryName] [nvarchar](30) NOT NULL,
	[ProductCategoryDescription] [nvarchar](50) NOT NULL
)
WITH (
        LOCATION='data/DimProductCategory.txt.gz', 
        DATA_SOURCE=AzureStorage, 
        FILE_FORMAT=TextFileFormat
);

INSERT INTO dbo.DimProductCategory
	SELECT * FROM ext.DimProductCategory;

--DimProductSubcategory
CREATE EXTERNAL TABLE ext.DimProductSubcategory (
	[ProductSubcategoryKey] [int] NOT NULL,
	[ProductSubcategoryLabel] [nvarchar](100) NULL,
	[ProductSubcategoryName] [nvarchar](50) NOT NULL,
	[ProductSubcategoryDescription] [nvarchar](100) NULL,
	[ProductCategoryKey] [int] NULL
)
WITH (
        LOCATION='data/DimProductSubcategory.txt.gz', 
        DATA_SOURCE=AzureStorage, 
        FILE_FORMAT=TextFileFormat
);

INSERT INTO dbo.DimProductSubcategory
	SELECT * FROM ext.DimProductSubcategory;

--DimPromotion
CREATE EXTERNAL TABLE ext.DimPromotion (
	[PromotionKey] [int] NOT NULL,
	[PromotionLabel] [nvarchar](100) NULL,
	[PromotionName] [nvarchar](100) NULL,
	[PromotionDescription] [nvarchar](255) NULL,
	[DiscountPercent] [float] NULL,
	[PromotionType] [nvarchar](50) NULL,
	[PromotionCategory] [nvarchar](50) NULL,
	[StartDate] [datetime] NOT NULL,
	[EndDate] [datetime] NULL,
	[MinQuantity] [int] NULL,
	[MaxQuantity] [int] NULL
)
WITH (
        LOCATION='data/DimPromotion.txt.gz', 
        DATA_SOURCE=AzureStorage, 
        FILE_FORMAT=TextFileFormat
);

INSERT INTO dbo.DimPromotion
	SELECT * FROM ext.DimPromotion;

--DimSalesTerritory
CREATE EXTERNAL TABLE ext.DimSalesTerritory (
	[SalesTerritoryKey] [int] NOT NULL,
	[GeographyKey] [int] NOT NULL,
	[SalesTerritoryLabel] [nvarchar](100) NULL,
	[SalesTerritoryName] [nvarchar](50) NOT NULL,
	[SalesTerritoryRegion] [nvarchar](50) NOT NULL,
	[SalesTerritoryCountry] [nvarchar](50) NOT NULL,
	[SalesTerritoryGroup] [nvarchar](50) NULL,
	[SalesTerritoryLevel] [nvarchar](10) NULL,
	[SalesTerritoryManager] [int] NULL,
	[StartDate] [datetime] NULL,
	[EndDate] [datetime] NULL,
	[Status] [nvarchar](50) NULL
)
WITH (
        LOCATION='data/DimSalesTerritory.txt.gz', 
        DATA_SOURCE=AzureStorage, 
        FILE_FORMAT=TextFileFormat
);

INSERT INTO dbo.DimSalesTerritory
	SELECT * FROM ext.DimSalesTerritory;

--DimScenario
CREATE EXTERNAL TABLE ext.DimScenario (
	[ScenarioKey] [int] NOT NULL,
	[ScenarioLabel] [nvarchar](100) NOT NULL,
	[ScenarioName] [nvarchar](20) NULL,
	[ScenarioDescription] [nvarchar](50) NULL
)
WITH (
        LOCATION='data/DimScenario.txt.gz', 
        DATA_SOURCE=AzureStorage, 
        FILE_FORMAT=TextFileFormat
);

INSERT INTO dbo.DimScenario
	SELECT * FROM ext.DimScenario;

--DimStore
CREATE EXTERNAL TABLE ext.DimStore (
	[StoreKey] [int] NOT NULL,
	[GeographyKey] [int] NOT NULL,
	[StoreManager] [int] NULL,
	[StoreType] [nvarchar](15) NULL,
	[StoreName] [nvarchar](100) NOT NULL,
	[StoreDescription] [nvarchar](300) NOT NULL,
	[Status] [nvarchar](20) NOT NULL,
	[OpenDate] [datetime] NOT NULL,
	[CloseDate] [datetime] NULL,
	[EntityKey] [int] NULL,
	[ZipCode] [nvarchar](20) NULL,
	[ZipCodeExtension] [nvarchar](10) NULL,
	[StorePhone] [nvarchar](15) NULL,
	[StoreFax] [nvarchar](14) NULL,
	[AddressLine1] [nvarchar](100) NULL,
	[AddressLine2] [nvarchar](100) NULL,
	[CloseReason] [nvarchar](20) NULL,
	[EmployeeCount] [int] NULL,
	[SellingAreaSize] [float] NULL,
	[LastRemodelDate] [datetime] NULL
)
WITH (
        LOCATION='data/DimStore.txt.gz', 
        DATA_SOURCE=AzureStorage, 
        FILE_FORMAT=TextFileFormat
);

INSERT INTO dbo.DimStore
	SELECT * FROM ext.DimStore;

--FactExchangeRate
CREATE EXTERNAL TABLE ext.FactExchangeRate (
	[ExchangeRateKey] [int] NOT NULL,
	[CurrencyKey] [int] NOT NULL,
	[DateKey] [datetime] NOT NULL,
	[AverageRate] [float] NOT NULL,
	[EndOfDayRate] [float] NOT NULL
)
WITH (
        LOCATION='data/FactExchangeRate.txt.gz', 
        DATA_SOURCE=AzureStorage, 
        FILE_FORMAT=TextFileFormat
);

INSERT INTO dbo.FactExchangeRate
	SELECT * FROM ext.FactExchangeRate;

--FactInventory
CREATE EXTERNAL TABLE ext.FactInventory (
	[InventoryKey] [int] NOT NULL,
	[DateKey] [datetime] NOT NULL,
	[StoreKey] [int] NOT NULL,
	[ProductKey] [int] NOT NULL,
	[CurrencyKey] [int] NOT NULL,
	[OnHandQuantity] [int] NOT NULL,
	[OnOrderQuantity] [int] NOT NULL,
	[SafetyStockQuantity] [int] NULL,
	[UnitCost] [money] NOT NULL,
	[DaysInStock] [int] NULL,
	[MinDayInStock] [int] NULL,
	[MaxDayInStock] [int] NULL,
	[Aging] [int] NULL
)
WITH (
        LOCATION='data/FactInventory.txt.gz', 
        DATA_SOURCE=AzureStorage, 
        FILE_FORMAT=TextFileFormat
);

INSERT INTO dbo.FactInventory
	SELECT * FROM ext.FactInventory;

--FactITMachine
CREATE EXTERNAL TABLE ext.FactITMachine (
	[ITMachinekey] [int] NOT NULL,
	[MachineKey] [int] NOT NULL,
	[Datekey] [datetime] NOT NULL,
	[CostAmount] [money] NULL,
	[CostType] [nvarchar](200) NOT NULL
)
WITH (
        LOCATION='data/FactITMachine.txt.gz', 
        DATA_SOURCE=AzureStorage, 
        FILE_FORMAT=TextFileFormat
);

INSERT INTO dbo.FactITMachine
	SELECT * FROM ext.FactITMachine;

--FactITSLA
CREATE EXTERNAL TABLE ext.FactITSLA (
	[ITSLAkey] [int] NOT NULL,
	[DateKey] [datetime] NOT NULL,
	[StoreKey] [int] NOT NULL,
	[MachineKey] [int] NOT NULL,
	[OutageKey] [int] NOT NULL,
	[OutageStartTime] [datetime] NOT NULL,
	[OutageEndTime] [datetime] NOT NULL,
	[DownTime] [int] NOT NULL
)
WITH (
        LOCATION='data/FactITSLA.txt.gz', 
        DATA_SOURCE=AzureStorage, 
        FILE_FORMAT=TextFileFormat
);

INSERT INTO dbo.FactITSLA
	SELECT * FROM ext.FactITSLA;

--FactOnlineSales
CREATE EXTERNAL TABLE ext.FactOnlineSales (
	[OnlineSalesKey] [int] NOT NULL,
	[DateKey] [datetime] NOT NULL,
	[StoreKey] [int] NOT NULL,
	[ProductKey] [int] NOT NULL,
	[PromotionKey] [int] NOT NULL,
	[CurrencyKey] [int] NOT NULL,
	[CustomerKey] [int] NOT NULL,
	[SalesOrderNumber] [nvarchar](20) NOT NULL,
	[SalesOrderLineNumber] [int] NULL,
	[SalesQuantity] [int] NOT NULL,
	[SalesAmount] [money] NOT NULL,
	[ReturnQuantity] [int] NOT NULL,
	[ReturnAmount] [money] NULL,
	[DiscountQuantity] [int] NULL,
	[DiscountAmount] [money] NULL,
	[TotalCost] [money] NOT NULL,
	[UnitCost] [money] NULL,
	[UnitPrice] [money] NULL
)
WITH (
        LOCATION='data/FactOnlineSales.txt.gz', 
        DATA_SOURCE=AzureStorage, 
        FILE_FORMAT=TextFileFormat
);

INSERT INTO dbo.FactOnlineSales
	SELECT * FROM ext.FactOnlineSales;

--FactSales
CREATE EXTERNAL TABLE ext.FactSales (
	[SalesKey] [int] NOT NULL,
	[DateKey] [datetime] NOT NULL,
	[channelKey] [int] NOT NULL,
	[StoreKey] [int] NOT NULL,
	[ProductKey] [int] NOT NULL,
	[PromotionKey] [int] NOT NULL,
	[CurrencyKey] [int] NOT NULL,
	[UnitCost] [money] NOT NULL,
	[UnitPrice] [money] NOT NULL,
	[SalesQuantity] [int] NOT NULL,
	[ReturnQuantity] [int] NOT NULL,
	[ReturnAmount] [money] NULL,
	[DiscountQuantity] [int] NULL,
	[DiscountAmount] [money] NULL,
	[TotalCost] [money] NOT NULL,
	[SalesAmount] [money] NOT NULL
)
WITH (
        LOCATION='data/FactSales.txt.gz', 
        DATA_SOURCE=AzureStorage, 
        FILE_FORMAT=TextFileFormat
);

INSERT INTO dbo.FactSales
	SELECT * FROM ext.FactSales;

--FactSalesQuota
CREATE EXTERNAL TABLE ext.FactSalesQuota (
	[SalesQuotaKey] [int] NOT NULL,
	[ChannelKey] [int] NOT NULL,
	[StoreKey] [int] NOT NULL,
	[ProductKey] [int] NOT NULL,
	[DateKey] [datetime] NOT NULL,
	[CurrencyKey] [int] NOT NULL,
	[ScenarioKey] [int] NOT NULL,
	[SalesQuantityQuota] [money] NOT NULL,
	[SalesAmountQuota] [money] NOT NULL,
	[GrossMarginQuota] [money] NOT NULL
)
WITH (
        LOCATION='data/FactSalesQuota.txt.gz', 
        DATA_SOURCE=AzureStorage, 
        FILE_FORMAT=TextFileFormat
);

INSERT INTO dbo.FactSalesQuota
	SELECT * FROM ext.FactSalesQuota;

--FactStrategyPlan
CREATE EXTERNAL TABLE ext.FactStrategyPlan (
	[StrategyPlanKey] [int] NOT NULL,
	[Datekey] [datetime] NOT NULL,
	[EntityKey] [int] NOT NULL,
	[ScenarioKey] [int] NOT NULL,
	[AccountKey] [int] NOT NULL,
	[CurrencyKey] [int] NOT NULL,
	[ProductCategoryKey] [int] NULL,
	[Amount] [money] NOT NULL
)
WITH (
        LOCATION='data/FactStrategyPlan.txt.gz', 
        DATA_SOURCE=AzureStorage, 
        FILE_FORMAT=TextFileFormat
);

INSERT INTO dbo.FactStrategyPlan
	SELECT * FROM ext.FactStrategyPlan;

To conclude we now need to update statistics on our ContosoRetailDW SQL Data Warehouse. Without proper statistics, you will not get the performance that SQL Data Warehouse is designed to provide. Tables and columns do not have statistics automatically generated by SQL Data Warehouse and so you need to create them yourself. Will be using a stored procedure named dbo.prc_sqldw_create_stats to create statistics on all columns in a database. Manage statistics in SQL Data Warehouse.

To create statistics on all columns in the table with this procedure, simply call the procedure:
exec prc_sqldw_create_stats;

dbo.prc_sqldw_create_stats

CREATE PROCEDURE    [dbo].[prc_sqldw_create_stats]
(   @create_type    tinyint -- 1 default 2 Fullscan 3 Sample
,   @sample_pct     tinyint
)
AS

IF @create_type NOT IN (1,2,3)
BEGIN
    THROW 151000,'Invalid value for @stats_type parameter. Valid range 1 (default), 2 (fullscan) or 3 (sample).',1;
END;

IF @sample_pct IS NULL
BEGIN;
    SET @sample_pct = 20;
END;

IF OBJECT_ID('tempdb..#stats_ddl') IS NOT NULL
BEGIN;
    DROP TABLE #stats_ddl;
END;

CREATE TABLE #stats_ddl
WITH    (   DISTRIBUTION    = HASH([seq_nmbr])
        ,   LOCATION        = USER_DB
        )
AS
WITH T
AS
(
SELECT      t.[name]                        AS [table_name]
,           s.[name]                        AS [table_schema_name]
,           c.[name]                        AS [column_name]
,           c.[column_id]                   AS [column_id]
,           t.[object_id]                   AS [object_id]
,           ROW_NUMBER()
            OVER(ORDER BY (SELECT NULL))    AS [seq_nmbr]
FROM        sys.[tables] t
JOIN        sys.[schemas] s         ON  t.[schema_id]       = s.[schema_id]
JOIN        sys.[columns] c         ON  t.[object_id]       = c.[object_id]
LEFT JOIN   sys.[stats_columns] l   ON  l.[object_id]       = c.[object_id]
                                    AND l.[column_id]       = c.[column_id]
                                    AND l.[stats_column_id] = 1
LEFT JOIN   sys.[external_tables] e ON  e.[object_id]       = t.[object_id]
WHERE       l.[object_id] IS NULL
AND         e.[object_id] IS NULL -- not an external table
)
SELECT  [table_schema_name]
,       [table_name]
,       [column_name]
,       [column_id]
,       [object_id]
,       [seq_nmbr]
,       CASE @create_type
        WHEN 1
        THEN    CAST('CREATE STATISTICS '+QUOTENAME('stat_'+table_schema_name+ '_' + table_name + '_'+column_name)+' ON '+QUOTENAME(table_schema_name)+'.'+QUOTENAME(table_name)+'('+QUOTENAME(column_name)+')' AS VARCHAR(8000))
        WHEN 2
        THEN    CAST('CREATE STATISTICS '+QUOTENAME('stat_'+table_schema_name+ '_' + table_name + '_'+column_name)+' ON '+QUOTENAME(table_schema_name)+'.'+QUOTENAME(table_name)+'('+QUOTENAME(column_name)+') WITH FULLSCAN' AS VARCHAR(8000))
        WHEN 3
        THEN    CAST('CREATE STATISTICS '+QUOTENAME('stat_'+table_schema_name+ '_' + table_name + '_'+column_name)+' ON '+QUOTENAME(table_schema_name)+'.'+QUOTENAME(table_name)+'('+QUOTENAME(column_name)+') WITH SAMPLE '+@sample_pct+'PERCENT' AS VARCHAR(8000))
        END AS create_stat_ddl
FROM T
;

DECLARE @i INT              = 1
,       @t INT              = (SELECT COUNT(*) FROM #stats_ddl)
,       @s NVARCHAR(4000)   = N''
;

WHILE @i < = @t
BEGIN
    SET @s=(SELECT create_stat_ddl FROM #stats_ddl WHERE seq_nmbr = @i);

    PRINT @s
    EXEC sp_executesql @s
    SET @i+=1;
END

DROP TABLE #stats_ddl;

We are done! Explore, write some queries, scale compute resources up and down, connect with PowerBI and create awesome data visualizations. Just remember to pause your SQL Data Warehouse when not using it.
Enjoy!

comments powered by Disqus