ContosoRetailDW on Azure SQL Data Warehouse
Happy New Year! It as been a while my friends… here goes 😉
ContosoRetailDW on Azure SQL Data Warehouse
This post assumes you are familiar or have already been introduced to Azure SQL Data Warehouse. If not I strongly encourage to read and follow-up on the SQL Data Warehouse Documentation to get you going.
Optionally if you would like to have a local on premise copy of the ContosoRetailDW database – A fictitious retail demo dataset used for presenting Microsoft Business Intelligence products. You can download here -> https://www.microsoft.com/en-ca/download/details.aspx?id=18279
As much as we love AdventureWorks, it is also worthwhile to explore and work with bigger data volumes, to develop and test new concepts! Hence the entire ContosoRetailDW database contains more than 34M rows of records. So I decided to make it happen…ContosoRetailDW on Azure SQL Data Warehouse.
Before we start the scripts (code) for this post are available here on GitHub -> https://github.com/sfrechette/contosoretaildw-azuresqldw
First thing let’s start by creating a blank SQL Data Warehouse database named ContosoRetailDW Create a SQL Data Warehouse
Next we need to create a slightly redesigned and modified ContosoRetailDW schema for the data to be imported and used in Azure SQL Data Warehouse. I have removed the following 3 columns from each tables; ETLLoadID, LoadDate and UpdateDate and in addition columns of data type geography and geometry which are not supported in SQL Data Warehouse.
After successfully creating the SQL Data Warehouse database connect with Visual Studio Connect to SQL Data Warehouse with Visual Studio to your newly created blank database and run the following dsql script to create the schema for the ContosoRetailDW database.
For detailed information on table design -> Table design in SQL Data Warehouse
ContosoRetailDW DDL schema script – ContosoRetailDW_ddl.dsql
USE ContosoRetailDW;
CREATE TABLE [dbo].[DimAccount](
[AccountKey] [int] NOT NULL,
[ParentAccountKey] [int] NULL,
[AccountLabel] [nvarchar](100) NULL,
[AccountName] [nvarchar](50) NULL,
[AccountDescription] [nvarchar](50) NULL,
[AccountType] [nvarchar](50) NULL,
[Operator] [nvarchar](50) NULL,
[CustomMembers] [nvarchar](300) NULL,
[ValueType] [nvarchar](50) NULL,
[CustomMemberOptions] [nvarchar](200) NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX);
CREATE TABLE [dbo].[DimChannel](
[ChannelKey] [int] NOT NULL,
[ChannelLabel] [nvarchar](100) NOT NULL,
[ChannelName] [nvarchar](20) NULL,
[ChannelDescription] [nvarchar](50) NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX);
CREATE TABLE [dbo].[DimCurrency](
[CurrencyKey] [int] NOT NULL,
[CurrencyLabel] [nvarchar](10) NOT NULL,
[CurrencyName] [nvarchar](20) NOT NULL,
[CurrencyDescription] [nvarchar](50) NOT NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX);
CREATE TABLE [dbo].[DimCustomer](
[CustomerKey] [int] NOT NULL,
[GeographyKey] [int] NOT NULL,
[CustomerLabel] [nvarchar](100) NOT NULL,
[Title] [nvarchar](8) NULL,
[FirstName] [nvarchar](50) NULL,
[MiddleName] [nvarchar](50) NULL,
[LastName] [nvarchar](50) NULL,
[NameStyle] [bit] NULL,
[BirthDate] [date] NULL,
[MaritalStatus] [nchar](1) NULL,
[Suffix] [nvarchar](10) NULL,
[Gender] [nvarchar](1) NULL,
[EmailAddress] [nvarchar](50) NULL,
[YearlyIncome] [money] NULL,
[TotalChildren] [tinyint] NULL,
[NumberChildrenAtHome] [tinyint] NULL,
[Education] [nvarchar](40) NULL,
[Occupation] [nvarchar](100) NULL,
[HouseOwnerFlag] [nchar](1) NULL,
[NumberCarsOwned] [tinyint] NULL,
[AddressLine1] [nvarchar](120) NULL,
[AddressLine2] [nvarchar](120) NULL,
[Phone] [nvarchar](20) NULL,
[DateFirstPurchase] [date] NULL,
[CustomerType] [nvarchar](15) NULL,
[CompanyName] [nvarchar](100) NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX, DISTRIBUTION = HASH(CustomerKey));
CREATE TABLE [dbo].[DimDate](
[Datekey] [datetime] NOT NULL,
[FullDateLabel] [nvarchar](20) NOT NULL,
[DateDescription] [nvarchar](20) NOT NULL,
[CalendarYear] [int] NOT NULL,
[CalendarYearLabel] [nvarchar](20) NOT NULL,
[CalendarHalfYear] [int] NOT NULL,
[CalendarHalfYearLabel] [nvarchar](20) NOT NULL,
[CalendarQuarter] [int] NOT NULL,
[CalendarQuarterLabel] [nvarchar](20) NULL,
[CalendarMonth] [int] NOT NULL,
[CalendarMonthLabel] [nvarchar](20) NOT NULL,
[CalendarWeek] [int] NOT NULL,
[CalendarWeekLabel] [nvarchar](20) NOT NULL,
[CalendarDayOfWeek] [int] NOT NULL,
[CalendarDayOfWeekLabel] [nvarchar](10) NOT NULL,
[FiscalYear] [int] NOT NULL,
[FiscalYearLabel] [nvarchar](20) NOT NULL,
[FiscalHalfYear] [int] NOT NULL,
[FiscalHalfYearLabel] [nvarchar](20) NOT NULL,
[FiscalQuarter] [int] NOT NULL,
[FiscalQuarterLabel] [nvarchar](20) NOT NULL,
[FiscalMonth] [int] NOT NULL,
[FiscalMonthLabel] [nvarchar](20) NOT NULL,
[IsWorkDay] [nvarchar](20) NOT NULL,
[IsHoliday] [int] NOT NULL,
[HolidayName] [nvarchar](20) NOT NULL,
[EuropeSeason] [nvarchar](50) NULL,
[NorthAmericaSeason] [nvarchar](50) NULL,
[AsiaSeason] [nvarchar](50) NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX);
CREATE TABLE [dbo].[DimEmployee](
[EmployeeKey] [int] NOT NULL,
[ParentEmployeeKey] [int] NULL,
[FirstName] [nvarchar](50) NOT NULL,
[LastName] [nvarchar](50) NOT NULL,
[MiddleName] [nvarchar](50) NULL,
[Title] [nvarchar](50) NULL,
[HireDate] [date] NULL,
[BirthDate] [date] NULL,
[EmailAddress] [nvarchar](50) NULL,
[Phone] [nvarchar](25) NULL,
[MaritalStatus] [nchar](1) NULL,
[EmergencyContactName] [nvarchar](50) NULL,
[EmergencyContactPhone] [nvarchar](25) NULL,
[SalariedFlag] [bit] NULL,
[Gender] [nchar](1) NULL,
[PayFrequency] [tinyint] NULL,
[BaseRate] [money] NULL,
[VacationHours] [smallint] NULL,
[CurrentFlag] [bit] NOT NULL,
[SalesPersonFlag] [bit] NOT NULL,
[DepartmentName] [nvarchar](50) NULL,
[StartDate] [date] NULL,
[EndDate] [date] NULL,
[Status] [nvarchar](50) NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX);
CREATE TABLE [dbo].[DimEntity](
[EntityKey] [int] NOT NULL,
[EntityLabel] [nvarchar](100) NULL,
[ParentEntityKey] [int] NULL,
[ParentEntityLabel] [nvarchar](100) NULL,
[EntityName] [nvarchar](50) NULL,
[EntityDescription] [nvarchar](100) NULL,
[EntityType] [nvarchar](100) NULL,
[StartDate] [datetime] NULL,
[EndDate] [datetime] NULL,
[Status] [nvarchar](50) NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX);
CREATE TABLE [dbo].[DimGeography](
[GeographyKey] [int] NOT NULL,
[GeographyType] [nvarchar](50) NOT NULL,
[ContinentName] [nvarchar](50) NOT NULL,
[CityName] [nvarchar](100) NULL,
[StateProvinceName] [nvarchar](100) NULL,
[RegionCountryName] [nvarchar](100) NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX);
CREATE TABLE [dbo].[DimMachine](
[MachineKey] [int] NOT NULL,
[MachineLabel] [nvarchar](100) NULL,
[StoreKey] [int] NOT NULL,
[MachineType] [nvarchar](50) NOT NULL,
[MachineName] [nvarchar](100) NOT NULL,
[MachineDescription] [nvarchar](200) NOT NULL,
[VendorName] [nvarchar](50) NOT NULL,
[MachineOS] [nvarchar](50) NOT NULL,
[MachineSource] [nvarchar](100) NOT NULL,
[MachineHardware] [nvarchar](100) NULL,
[MachineSoftware] [nvarchar](100) NOT NULL,
[Status] [nvarchar](50) NOT NULL,
[ServiceStartDate] [datetime] NOT NULL,
[DecommissionDate] [datetime] NULL,
[LastModifiedDate] [datetime] NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX);
CREATE TABLE [dbo].[DimOutage](
[OutageKey] [int] NOT NULL,
[OutageLabel] [nvarchar](100) NOT NULL,
[OutageName] [nvarchar](50) NOT NULL,
[OutageDescription] [nvarchar](200) NOT NULL,
[OutageType] [nvarchar](50) NOT NULL,
[OutageTypeDescription] [nvarchar](200) NOT NULL,
[OutageSubType] [nvarchar](50) NOT NULL,
[OutageSubTypeDescription] [nvarchar](200) NOT NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX);
CREATE TABLE [dbo].[DimProduct](
[ProductKey] [int] NOT NULL,
[ProductLabel] [nvarchar](255) NULL,
[ProductName] [nvarchar](500) NULL,
[ProductDescription] [nvarchar](400) NULL,
[ProductSubcategoryKey] [int] NULL,
[Manufacturer] [nvarchar](50) NULL,
[BrandName] [nvarchar](50) NULL,
[ClassID] [nvarchar](10) NULL,
[ClassName] [nvarchar](20) NULL,
[StyleID] [nvarchar](10) NULL,
[StyleName] [nvarchar](20) NULL,
[ColorID] [nvarchar](10) NULL,
[ColorName] [nvarchar](20) NOT NULL,
[Size] [nvarchar](50) NULL,
[SizeRange] [nvarchar](50) NULL,
[SizeUnitMeasureID] [nvarchar](20) NULL,
[Weight] [float] NULL,
[WeightUnitMeasureID] [nvarchar](20) NULL,
[UnitOfMeasureID] [nvarchar](10) NULL,
[UnitOfMeasureName] [nvarchar](40) NULL,
[StockTypeID] [nvarchar](10) NULL,
[StockTypeName] [nvarchar](40) NULL,
[UnitCost] [money] NULL,
[UnitPrice] [money] NULL,
[AvailableForSaleDate] [datetime] NULL,
[StopSaleDate] [datetime] NULL,
[Status] [nvarchar](7) NULL,
[ImageURL] [nvarchar](150) NULL,
[ProductURL] [nvarchar](150) NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX, DISTRIBUTION = HASH(ProductKey));
CREATE TABLE [dbo].[DimProductCategory](
[ProductCategoryKey] [int] NOT NULL,
[ProductCategoryLabel] [nvarchar](100) NULL,
[ProductCategoryName] [nvarchar](30) NOT NULL,
[ProductCategoryDescription] [nvarchar](50) NOT NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX);
CREATE TABLE [dbo].[DimProductSubcategory](
[ProductSubcategoryKey] [int] NOT NULL,
[ProductSubcategoryLabel] [nvarchar](100) NULL,
[ProductSubcategoryName] [nvarchar](50) NOT NULL,
[ProductSubcategoryDescription] [nvarchar](100) NULL,
[ProductCategoryKey] [int] NULL
) WITH (CLUSTERED COLUMNSTORE INDEX);
CREATE TABLE [dbo].[DimPromotion](
[PromotionKey] [int] NOT NULL,
[PromotionLabel] [nvarchar](100) NULL,
[PromotionName] [nvarchar](100) NULL,
[PromotionDescription] [nvarchar](255) NULL,
[DiscountPercent] [float] NULL,
[PromotionType] [nvarchar](50) NULL,
[PromotionCategory] [nvarchar](50) NULL,
[StartDate] [datetime] NOT NULL,
[EndDate] [datetime] NULL,
[MinQuantity] [int] NULL,
[MaxQuantity] [int] NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX);
CREATE TABLE [dbo].[DimSalesTerritory](
[SalesTerritoryKey] [int] NOT NULL,
[GeographyKey] [int] NOT NULL,
[SalesTerritoryLabel] [nvarchar](100) NULL,
[SalesTerritoryName] [nvarchar](50) NOT NULL,
[SalesTerritoryRegion] [nvarchar](50) NOT NULL,
[SalesTerritoryCountry] [nvarchar](50) NOT NULL,
[SalesTerritoryGroup] [nvarchar](50) NULL,
[SalesTerritoryLevel] [nvarchar](10) NULL,
[SalesTerritoryManager] [int] NULL,
[StartDate] [datetime] NULL,
[EndDate] [datetime] NULL,
[Status] [nvarchar](50) NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX);
CREATE TABLE [dbo].[DimScenario](
[ScenarioKey] [int] NOT NULL,
[ScenarioLabel] [nvarchar](100) NOT NULL,
[ScenarioName] [nvarchar](20) NULL,
[ScenarioDescription] [nvarchar](50) NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX);
CREATE TABLE [dbo].[DimStore](
[StoreKey] [int] NOT NULL,
[GeographyKey] [int] NOT NULL,
[StoreManager] [int] NULL,
[StoreType] [nvarchar](15) NULL,
[StoreName] [nvarchar](100) NOT NULL,
[StoreDescription] [nvarchar](300) NOT NULL,
[Status] [nvarchar](20) NOT NULL,
[OpenDate] [datetime] NOT NULL,
[CloseDate] [datetime] NULL,
[EntityKey] [int] NULL,
[ZipCode] [nvarchar](20) NULL,
[ZipCodeExtension] [nvarchar](10) NULL,
[StorePhone] [nvarchar](15) NULL,
[StoreFax] [nvarchar](14) NULL,
[AddressLine1] [nvarchar](100) NULL,
[AddressLine2] [nvarchar](100) NULL,
[CloseReason] [nvarchar](20) NULL,
[EmployeeCount] [int] NULL,
[SellingAreaSize] [float] NULL,
[LastRemodelDate] [datetime] NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX);
CREATE TABLE [dbo].[FactExchangeRate](
[ExchangeRateKey] [int] NOT NULL,
[CurrencyKey] [int] NOT NULL,
[DateKey] [datetime] NOT NULL,
[AverageRate] [float] NOT NULL,
[EndOfDayRate] [float] NOT NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX);
CREATE TABLE [dbo].[FactInventory](
[InventoryKey] [int] NOT NULL,
[DateKey] [datetime] NOT NULL,
[StoreKey] [int] NOT NULL,
[ProductKey] [int] NOT NULL,
[CurrencyKey] [int] NOT NULL,
[OnHandQuantity] [int] NOT NULL,
[OnOrderQuantity] [int] NOT NULL,
[SafetyStockQuantity] [int] NULL,
[UnitCost] [money] NOT NULL,
[DaysInStock] [int] NULL,
[MinDayInStock] [int] NULL,
[MaxDayInStock] [int] NULL,
[Aging] [int] NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX, DISTRIBUTION = HASH(InventoryKey));
CREATE TABLE [dbo].[FactITMachine](
[ITMachinekey] [int] NOT NULL,
[MachineKey] [int] NOT NULL,
[Datekey] [datetime] NOT NULL,
[CostAmount] [money] NULL,
[CostType] [nvarchar](200) NOT NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX);
CREATE TABLE [dbo].[FactITSLA](
[ITSLAkey] [int] NOT NULL,
[DateKey] [datetime] NOT NULL,
[StoreKey] [int] NOT NULL,
[MachineKey] [int] NOT NULL,
[OutageKey] [int] NOT NULL,
[OutageStartTime] [datetime] NOT NULL,
[OutageEndTime] [datetime] NOT NULL,
[DownTime] [int] NOT NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX);
CREATE TABLE [dbo].[FactOnlineSales](
[OnlineSalesKey] [int] NOT NULL,
[DateKey] [datetime] NOT NULL,
[StoreKey] [int] NOT NULL,
[ProductKey] [int] NOT NULL,
[PromotionKey] [int] NOT NULL,
[CurrencyKey] [int] NOT NULL,
[CustomerKey] [int] NOT NULL,
[SalesOrderNumber] [nvarchar](20) NOT NULL,
[SalesOrderLineNumber] [int] NULL,
[SalesQuantity] [int] NOT NULL,
[SalesAmount] [money] NOT NULL,
[ReturnQuantity] [int] NOT NULL,
[ReturnAmount] [money] NULL,
[DiscountQuantity] [int] NULL,
[DiscountAmount] [money] NULL,
[TotalCost] [money] NOT NULL,
[UnitCost] [money] NULL,
[UnitPrice] [money] NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX, DISTRIBUTION = HASH(OnlineSalesKey),
PARTITION(DateKey RANGE RIGHT FOR VALUES ( '2005-01-01','2006-01-01','2007-01-01','2008-01-01','2009-01-01',
'2010-01-01','2011-01-01','2012-01-01','2013-01-01','2014-01-01',
'2015-01-01','2016-01-01','2017-01-01','2018-01-01','2019-01-01','2020-01-01')));
CREATE TABLE [dbo].[FactSales](
[SalesKey] [int] NOT NULL,
[DateKey] [datetime] NOT NULL,
[channelKey] [int] NOT NULL,
[StoreKey] [int] NOT NULL,
[ProductKey] [int] NOT NULL,
[PromotionKey] [int] NOT NULL,
[CurrencyKey] [int] NOT NULL,
[UnitCost] [money] NOT NULL,
[UnitPrice] [money] NOT NULL,
[SalesQuantity] [int] NOT NULL,
[ReturnQuantity] [int] NOT NULL,
[ReturnAmount] [money] NULL,
[DiscountQuantity] [int] NULL,
[DiscountAmount] [money] NULL,
[TotalCost] [money] NOT NULL,
[SalesAmount] [money] NOT NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX, DISTRIBUTION = HASH(SalesKey));
CREATE TABLE [dbo].[FactSalesQuota](
[SalesQuotaKey] [int] NOT NULL,
[ChannelKey] [int] NOT NULL,
[StoreKey] [int] NOT NULL,
[ProductKey] [int] NOT NULL,
[DateKey] [datetime] NOT NULL,
[CurrencyKey] [int] NOT NULL,
[ScenarioKey] [int] NOT NULL,
[SalesQuantityQuota] [money] NOT NULL,
[SalesAmountQuota] [money] NOT NULL,
[GrossMarginQuota] [money] NOT NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX, DISTRIBUTION = HASH(SalesQuotaKey));
CREATE TABLE [dbo].[FactStrategyPlan](
[StrategyPlanKey] [int] NOT NULL,
[Datekey] [datetime] NOT NULL,
[EntityKey] [int] NOT NULL,
[ScenarioKey] [int] NOT NULL,
[AccountKey] [int] NOT NULL,
[CurrencyKey] [int] NOT NULL,
[ProductCategoryKey] [int] NULL,
[Amount] [money] NOT NULL
)
WITH (CLUSTERED COLUMNSTORE INDEX, DISTRIBUTION = HASH(StrategyPlanKey));
Got Data!
We need to load some data! The GZip data files, 25 of them totalling approx. 413MB are located here on OneDrive -> https://1drv.ms/f/s!Al_yetnJltJSmUP0M0MBPTUSNOqa. You will need to load these files to an Azure blob storage. James Serra has a great post to help you get started: Getting data into Azure Blob Storage. For the purpose of this exercise I created a blob storage container name ‘contosoretaildw’ with a ‘data’ folder and imported the GZip files using the AzCopy Command-Line Utility Transfer data with the AzCopy Command-Line Utility.
Will be loading data from blob storage to SQL Data Warehouse using Polybase Load data with PolyBase in SQL Data Warehouse. The following script assumes we already have defined and created a schema for the ContosoRetailDW database. You could also load data using the CTAS (Create Table As Select) feature only if you have not yet defined and created the ContosoRetailDW schema. The script for this scenario PolybaseLoadCTAS.dsql is available on GitHub.
Loading data from Azure Blog Storage – PolybaseLoadInsert.dsql
--Create Schema for External Tables
CREATE SCHEMA ext;
--Create Master Key. Required to encrypt the credential secret in the next step.
CREATE MASTER KEY;
--Create Database Scoped Credential. Provide the Azure storage account key.
--The identity name does not affect authentication to Azure storage.
CREATE DATABASE SCOPED CREDENTIAL AzureStorageCredential
WITH IDENTITY = 'user',
SECRET = 'azure_storage_account_key_goes_here';
--Create External Data Source. Specify location and credential to access your Azure blob storage.
CREATE EXTERNAL DATA SOURCE AzureStorage
WITH (
TYPE = Hadoop,
LOCATION = 'wasbs://contosoretaildw@stephdefaultstorage.blob.core.windows.net/',
CREDENTIAL = AzureStorageCredential
);
--Create External File format. Specify the layout of data stored in Azure storage blobs.
CREATE EXTERNAL FILE FORMAT TextFileFormat
WITH
(
FORMAT_TYPE = DELIMITEDTEXT,
FORMAT_OPTIONS
(
FIELD_TERMINATOR ='|',
USE_TYPE_DEFAULT = FALSE
),
DATA_COMPRESSION = 'org.apache.hadoop.io.compress.GzipCodec'
);
--DimAccount
CREATE EXTERNAL TABLE ext.DimAccount (
[AccountKey] [int] NOT NULL,
[ParentAccountKey] [int] NULL,
[AccountLabel] [nvarchar](100) NULL,
[AccountName] [nvarchar](50) NULL,
[AccountDescription] [nvarchar](50) NULL,
[AccountType] [nvarchar](50) NULL,
[Operator] [nvarchar](50) NULL,
[CustomMembers] [nvarchar](300) NULL,
[ValueType] [nvarchar](50) NULL,
[CustomMemberOptions] [nvarchar](200) NULL
)
WITH (
LOCATION='data/DimAccount.txt.gz',
DATA_SOURCE=AzureStorage,
FILE_FORMAT=TextFileFormat
);
INSERT INTO dbo.DimAccount
SELECT * FROM ext.DimAccount;
--DimChannel
CREATE EXTERNAL TABLE ext.DimChannel (
[ChannelKey] [int] NOT NULL,
[ChannelLabel] [nvarchar](100) NOT NULL,
[ChannelName] [nvarchar](20) NULL,
[ChannelDescription] [nvarchar](50) NULL
)
WITH (
LOCATION='data/DimChannel.txt.gz',
DATA_SOURCE=AzureStorage,
FILE_FORMAT=TextFileFormat
);
INSERT INTO dbo.DimChannel
SELECT * FROM ext.DimChannel;
--DimCurrency
CREATE EXTERNAL TABLE ext.DimCurrency (
[CurrencyKey] [int] NOT NULL,
[CurrencyLabel] [nvarchar](10) NOT NULL,
[CurrencyName] [nvarchar](20) NOT NULL,
[CurrencyDescription] [nvarchar](50) NOT NULL
)
WITH (
LOCATION='data/DimCurrency.txt.gz',
DATA_SOURCE=AzureStorage,
FILE_FORMAT=TextFileFormat
);
INSERT INTO dbo.DimCurrency
SELECT * FROM ext.DimCurrency;
--DimCustomer
CREATE EXTERNAL TABLE ext.DimCustomer (
[CustomerKey] [int] NOT NULL,
[GeographyKey] [int] NOT NULL,
[CustomerLabel] [nvarchar](100) NOT NULL,
[Title] [nvarchar](8) NULL,
[FirstName] [nvarchar](50) NULL,
[MiddleName] [nvarchar](50) NULL,
[LastName] [nvarchar](50) NULL,
[NameStyle] [bit] NULL,
[BirthDate] [date] NULL,
[MaritalStatus] [nchar](1) NULL,
[Suffix] [nvarchar](10) NULL,
[Gender] [nvarchar](1) NULL,
[EmailAddress] [nvarchar](50) NULL,
[YearlyIncome] [money] NULL,
[TotalChildren] [tinyint] NULL,
[NumberChildrenAtHome] [tinyint] NULL,
[Education] [nvarchar](40) NULL,
[Occupation] [nvarchar](100) NULL,
[HouseOwnerFlag] [nchar](1) NULL,
[NumberCarsOwned] [tinyint] NULL,
[AddressLine1] [nvarchar](120) NULL,
[AddressLine2] [nvarchar](120) NULL,
[Phone] [nvarchar](20) NULL,
[DateFirstPurchase] [date] NULL,
[CustomerType] [nvarchar](15) NULL,
[CompanyName] [nvarchar](100) NULL
)
WITH (
LOCATION='data/DimCustomer.txt.gz',
DATA_SOURCE=AzureStorage,
FILE_FORMAT=TextFileFormat
);
INSERT INTO dbo.DimCustomer
SELECT * FROM ext.DimCustomer;
--DimDate
CREATE EXTERNAL TABLE ext.DimDate (
[Datekey] [datetime] NOT NULL,
[FullDateLabel] [nvarchar](20) NOT NULL,
[DateDescription] [nvarchar](20) NOT NULL,
[CalendarYear] [int] NOT NULL,
[CalendarYearLabel] [nvarchar](20) NOT NULL,
[CalendarHalfYear] [int] NOT NULL,
[CalendarHalfYearLabel] [nvarchar](20) NOT NULL,
[CalendarQuarter] [int] NOT NULL,
[CalendarQuarterLabel] [nvarchar](20) NULL,
[CalendarMonth] [int] NOT NULL,
[CalendarMonthLabel] [nvarchar](20) NOT NULL,
[CalendarWeek] [int] NOT NULL,
[CalendarWeekLabel] [nvarchar](20) NOT NULL,
[CalendarDayOfWeek] [int] NOT NULL,
[CalendarDayOfWeekLabel] [nvarchar](10) NOT NULL,
[FiscalYear] [int] NOT NULL,
[FiscalYearLabel] [nvarchar](20) NOT NULL,
[FiscalHalfYear] [int] NOT NULL,
[FiscalHalfYearLabel] [nvarchar](20) NOT NULL,
[FiscalQuarter] [int] NOT NULL,
[FiscalQuarterLabel] [nvarchar](20) NOT NULL,
[FiscalMonth] [int] NOT NULL,
[FiscalMonthLabel] [nvarchar](20) NOT NULL,
[IsWorkDay] [nvarchar](20) NOT NULL,
[IsHoliday] [int] NOT NULL,
[HolidayName] [nvarchar](20) NOT NULL,
[EuropeSeason] [nvarchar](50) NULL,
[NorthAmericaSeason] [nvarchar](50) NULL,
[AsiaSeason] [nvarchar](50) NULL
)
WITH (
LOCATION='data/DimDate.txt.gz',
DATA_SOURCE=AzureStorage,
FILE_FORMAT=TextFileFormat
);
INSERT INTO dbo.DimDate
SELECT * FROM ext.DimDate;
--DimEmployee
CREATE EXTERNAL TABLE ext.DimEmployee (
[EmployeeKey] [int] NOT NULL,
[ParentEmployeeKey] [int] NULL,
[FirstName] [nvarchar](50) NOT NULL,
[LastName] [nvarchar](50) NOT NULL,
[MiddleName] [nvarchar](50) NULL,
[Title] [nvarchar](50) NULL,
[HireDate] [date] NULL,
[BirthDate] [date] NULL,
[EmailAddress] [nvarchar](50) NULL,
[Phone] [nvarchar](25) NULL,
[MaritalStatus] [nchar](1) NULL,
[EmergencyContactName] [nvarchar](50) NULL,
[EmergencyContactPhone] [nvarchar](25) NULL,
[SalariedFlag] [bit] NULL,
[Gender] [nchar](1) NULL,
[PayFrequency] [tinyint] NULL,
[BaseRate] [money] NULL,
[VacationHours] [smallint] NULL,
[CurrentFlag] [bit] NOT NULL,
[SalesPersonFlag] [bit] NOT NULL,
[DepartmentName] [nvarchar](50) NULL,
[StartDate] [date] NULL,
[EndDate] [date] NULL,
[Status] [nvarchar](50) NULL
)
WITH (
LOCATION='data/DimEmployee.txt.gz',
DATA_SOURCE=AzureStorage,
FILE_FORMAT=TextFileFormat
);
INSERT INTO dbo.DimEmployee
SELECT * FROM ext.DimEmployee;
--DimEntity
CREATE EXTERNAL TABLE ext.DimEntity (
[EntityKey] [int] NOT NULL,
[EntityLabel] [nvarchar](100) NULL,
[ParentEntityKey] [int] NULL,
[ParentEntityLabel] [nvarchar](100) NULL,
[EntityName] [nvarchar](50) NULL,
[EntityDescription] [nvarchar](100) NULL,
[EntityType] [nvarchar](100) NULL,
[StartDate] [datetime] NULL,
[EndDate] [datetime] NULL,
[Status] [nvarchar](50) NULL
)
WITH (
LOCATION='data/DimEntity.txt.gz',
DATA_SOURCE=AzureStorage,
FILE_FORMAT=TextFileFormat
);
INSERT INTO dbo.DimEntity
SELECT * FROM ext.DimEntity;
--DimGeography
CREATE EXTERNAL TABLE ext.DimGeography (
[GeographyKey] [int] NOT NULL,
[GeographyType] [nvarchar](50) NOT NULL,
[ContinentName] [nvarchar](50) NOT NULL,
[CityName] [nvarchar](100) NULL,
[StateProvinceName] [nvarchar](100) NULL,
[RegionCountryName] [nvarchar](100) NULL
)
WITH (
LOCATION='data/DimGeography.txt.gz',
DATA_SOURCE=AzureStorage,
FILE_FORMAT=TextFileFormat
);
INSERT INTO dbo.DimGeography
SELECT * FROM ext.DimGeography;
--DimMachine
CREATE EXTERNAL TABLE ext.DimMachine (
[MachineKey] [int] NOT NULL,
[MachineLabel] [nvarchar](100) NULL,
[StoreKey] [int] NOT NULL,
[MachineType] [nvarchar](50) NOT NULL,
[MachineName] [nvarchar](100) NOT NULL,
[MachineDescription] [nvarchar](200) NOT NULL,
[VendorName] [nvarchar](50) NOT NULL,
[MachineOS] [nvarchar](50) NOT NULL,
[MachineSource] [nvarchar](100) NOT NULL,
[MachineHardware] [nvarchar](100) NULL,
[MachineSoftware] [nvarchar](100) NOT NULL,
[Status] [nvarchar](50) NOT NULL,
[ServiceStartDate] [datetime] NOT NULL,
[DecommissionDate] [datetime] NULL,
[LastModifiedDate] [datetime] NULL
)
WITH (
LOCATION='data/DimMachine.txt.gz',
DATA_SOURCE=AzureStorage,
FILE_FORMAT=TextFileFormat
);
INSERT INTO dbo.DimMachine
SELECT * FROM ext.DimMachine;
--DimOutage
CREATE EXTERNAL TABLE ext.DimOutage (
[OutageKey] [int] NOT NULL,
[OutageLabel] [nvarchar](100) NOT NULL,
[OutageName] [nvarchar](50) NOT NULL,
[OutageDescription] [nvarchar](200) NOT NULL,
[OutageType] [nvarchar](50) NOT NULL,
[OutageTypeDescription] [nvarchar](200) NOT NULL,
[OutageSubType] [nvarchar](50) NOT NULL,
[OutageSubTypeDescription] [nvarchar](200) NOT NULL
)
WITH (
LOCATION='data/DimOutage.txt.gz',
DATA_SOURCE=AzureStorage,
FILE_FORMAT=TextFileFormat
);
INSERT INTO dbo.DimOutage
SELECT * FROM ext.DimOutage;
--DimProduct
CREATE EXTERNAL TABLE ext.DimProduct (
[ProductKey] [int] NOT NULL,
[ProductLabel] [nvarchar](255) NULL,
[ProductName] [nvarchar](500) NULL,
[ProductDescription] [nvarchar](400) NULL,
[ProductSubcategoryKey] [int] NULL,
[Manufacturer] [nvarchar](50) NULL,
[BrandName] [nvarchar](50) NULL,
[ClassID] [nvarchar](10) NULL,
[ClassName] [nvarchar](20) NULL,
[StyleID] [nvarchar](10) NULL,
[StyleName] [nvarchar](20) NULL,
[ColorID] [nvarchar](10) NULL,
[ColorName] [nvarchar](20) NOT NULL,
[Size] [nvarchar](50) NULL,
[SizeRange] [nvarchar](50) NULL,
[SizeUnitMeasureID] [nvarchar](20) NULL,
[Weight] [float] NULL,
[WeightUnitMeasureID] [nvarchar](20) NULL,
[UnitOfMeasureID] [nvarchar](10) NULL,
[UnitOfMeasureName] [nvarchar](40) NULL,
[StockTypeID] [nvarchar](10) NULL,
[StockTypeName] [nvarchar](40) NULL,
[UnitCost] [money] NULL,
[UnitPrice] [money] NULL,
[AvailableForSaleDate] [datetime] NULL,
[StopSaleDate] [datetime] NULL,
[Status] [nvarchar](7) NULL,
[ImageURL] [nvarchar](150) NULL,
[ProductURL] [nvarchar](150) NULL
)
WITH (
LOCATION='data/DimProduct.txt.gz',
DATA_SOURCE=AzureStorage,
FILE_FORMAT=TextFileFormat
);
INSERT INTO dbo.DimProduct
SELECT * FROM ext.DimProduct;
--DimProductCategory
CREATE EXTERNAL TABLE ext.DimProductCategory (
[ProductCategoryKey] [int] NOT NULL,
[ProductCategoryLabel] [nvarchar](100) NULL,
[ProductCategoryName] [nvarchar](30) NOT NULL,
[ProductCategoryDescription] [nvarchar](50) NOT NULL
)
WITH (
LOCATION='data/DimProductCategory.txt.gz',
DATA_SOURCE=AzureStorage,
FILE_FORMAT=TextFileFormat
);
INSERT INTO dbo.DimProductCategory
SELECT * FROM ext.DimProductCategory;
--DimProductSubcategory
CREATE EXTERNAL TABLE ext.DimProductSubcategory (
[ProductSubcategoryKey] [int] NOT NULL,
[ProductSubcategoryLabel] [nvarchar](100) NULL,
[ProductSubcategoryName] [nvarchar](50) NOT NULL,
[ProductSubcategoryDescription] [nvarchar](100) NULL,
[ProductCategoryKey] [int] NULL
)
WITH (
LOCATION='data/DimProductSubcategory.txt.gz',
DATA_SOURCE=AzureStorage,
FILE_FORMAT=TextFileFormat
);
INSERT INTO dbo.DimProductSubcategory
SELECT * FROM ext.DimProductSubcategory;
--DimPromotion
CREATE EXTERNAL TABLE ext.DimPromotion (
[PromotionKey] [int] NOT NULL,
[PromotionLabel] [nvarchar](100) NULL,
[PromotionName] [nvarchar](100) NULL,
[PromotionDescription] [nvarchar](255) NULL,
[DiscountPercent] [float] NULL,
[PromotionType] [nvarchar](50) NULL,
[PromotionCategory] [nvarchar](50) NULL,
[StartDate] [datetime] NOT NULL,
[EndDate] [datetime] NULL,
[MinQuantity] [int] NULL,
[MaxQuantity] [int] NULL
)
WITH (
LOCATION='data/DimPromotion.txt.gz',
DATA_SOURCE=AzureStorage,
FILE_FORMAT=TextFileFormat
);
INSERT INTO dbo.DimPromotion
SELECT * FROM ext.DimPromotion;
--DimSalesTerritory
CREATE EXTERNAL TABLE ext.DimSalesTerritory (
[SalesTerritoryKey] [int] NOT NULL,
[GeographyKey] [int] NOT NULL,
[SalesTerritoryLabel] [nvarchar](100) NULL,
[SalesTerritoryName] [nvarchar](50) NOT NULL,
[SalesTerritoryRegion] [nvarchar](50) NOT NULL,
[SalesTerritoryCountry] [nvarchar](50) NOT NULL,
[SalesTerritoryGroup] [nvarchar](50) NULL,
[SalesTerritoryLevel] [nvarchar](10) NULL,
[SalesTerritoryManager] [int] NULL,
[StartDate] [datetime] NULL,
[EndDate] [datetime] NULL,
[Status] [nvarchar](50) NULL
)
WITH (
LOCATION='data/DimSalesTerritory.txt.gz',
DATA_SOURCE=AzureStorage,
FILE_FORMAT=TextFileFormat
);
INSERT INTO dbo.DimSalesTerritory
SELECT * FROM ext.DimSalesTerritory;
--DimScenario
CREATE EXTERNAL TABLE ext.DimScenario (
[ScenarioKey] [int] NOT NULL,
[ScenarioLabel] [nvarchar](100) NOT NULL,
[ScenarioName] [nvarchar](20) NULL,
[ScenarioDescription] [nvarchar](50) NULL
)
WITH (
LOCATION='data/DimScenario.txt.gz',
DATA_SOURCE=AzureStorage,
FILE_FORMAT=TextFileFormat
);
INSERT INTO dbo.DimScenario
SELECT * FROM ext.DimScenario;
--DimStore
CREATE EXTERNAL TABLE ext.DimStore (
[StoreKey] [int] NOT NULL,
[GeographyKey] [int] NOT NULL,
[StoreManager] [int] NULL,
[StoreType] [nvarchar](15) NULL,
[StoreName] [nvarchar](100) NOT NULL,
[StoreDescription] [nvarchar](300) NOT NULL,
[Status] [nvarchar](20) NOT NULL,
[OpenDate] [datetime] NOT NULL,
[CloseDate] [datetime] NULL,
[EntityKey] [int] NULL,
[ZipCode] [nvarchar](20) NULL,
[ZipCodeExtension] [nvarchar](10) NULL,
[StorePhone] [nvarchar](15) NULL,
[StoreFax] [nvarchar](14) NULL,
[AddressLine1] [nvarchar](100) NULL,
[AddressLine2] [nvarchar](100) NULL,
[CloseReason] [nvarchar](20) NULL,
[EmployeeCount] [int] NULL,
[SellingAreaSize] [float] NULL,
[LastRemodelDate] [datetime] NULL
)
WITH (
LOCATION='data/DimStore.txt.gz',
DATA_SOURCE=AzureStorage,
FILE_FORMAT=TextFileFormat
);
INSERT INTO dbo.DimStore
SELECT * FROM ext.DimStore;
--FactExchangeRate
CREATE EXTERNAL TABLE ext.FactExchangeRate (
[ExchangeRateKey] [int] NOT NULL,
[CurrencyKey] [int] NOT NULL,
[DateKey] [datetime] NOT NULL,
[AverageRate] [float] NOT NULL,
[EndOfDayRate] [float] NOT NULL
)
WITH (
LOCATION='data/FactExchangeRate.txt.gz',
DATA_SOURCE=AzureStorage,
FILE_FORMAT=TextFileFormat
);
INSERT INTO dbo.FactExchangeRate
SELECT * FROM ext.FactExchangeRate;
--FactInventory
CREATE EXTERNAL TABLE ext.FactInventory (
[InventoryKey] [int] NOT NULL,
[DateKey] [datetime] NOT NULL,
[StoreKey] [int] NOT NULL,
[ProductKey] [int] NOT NULL,
[CurrencyKey] [int] NOT NULL,
[OnHandQuantity] [int] NOT NULL,
[OnOrderQuantity] [int] NOT NULL,
[SafetyStockQuantity] [int] NULL,
[UnitCost] [money] NOT NULL,
[DaysInStock] [int] NULL,
[MinDayInStock] [int] NULL,
[MaxDayInStock] [int] NULL,
[Aging] [int] NULL
)
WITH (
LOCATION='data/FactInventory.txt.gz',
DATA_SOURCE=AzureStorage,
FILE_FORMAT=TextFileFormat
);
INSERT INTO dbo.FactInventory
SELECT * FROM ext.FactInventory;
--FactITMachine
CREATE EXTERNAL TABLE ext.FactITMachine (
[ITMachinekey] [int] NOT NULL,
[MachineKey] [int] NOT NULL,
[Datekey] [datetime] NOT NULL,
[CostAmount] [money] NULL,
[CostType] [nvarchar](200) NOT NULL
)
WITH (
LOCATION='data/FactITMachine.txt.gz',
DATA_SOURCE=AzureStorage,
FILE_FORMAT=TextFileFormat
);
INSERT INTO dbo.FactITMachine
SELECT * FROM ext.FactITMachine;
--FactITSLA
CREATE EXTERNAL TABLE ext.FactITSLA (
[ITSLAkey] [int] NOT NULL,
[DateKey] [datetime] NOT NULL,
[StoreKey] [int] NOT NULL,
[MachineKey] [int] NOT NULL,
[OutageKey] [int] NOT NULL,
[OutageStartTime] [datetime] NOT NULL,
[OutageEndTime] [datetime] NOT NULL,
[DownTime] [int] NOT NULL
)
WITH (
LOCATION='data/FactITSLA.txt.gz',
DATA_SOURCE=AzureStorage,
FILE_FORMAT=TextFileFormat
);
INSERT INTO dbo.FactITSLA
SELECT * FROM ext.FactITSLA;
--FactOnlineSales
CREATE EXTERNAL TABLE ext.FactOnlineSales (
[OnlineSalesKey] [int] NOT NULL,
[DateKey] [datetime] NOT NULL,
[StoreKey] [int] NOT NULL,
[ProductKey] [int] NOT NULL,
[PromotionKey] [int] NOT NULL,
[CurrencyKey] [int] NOT NULL,
[CustomerKey] [int] NOT NULL,
[SalesOrderNumber] [nvarchar](20) NOT NULL,
[SalesOrderLineNumber] [int] NULL,
[SalesQuantity] [int] NOT NULL,
[SalesAmount] [money] NOT NULL,
[ReturnQuantity] [int] NOT NULL,
[ReturnAmount] [money] NULL,
[DiscountQuantity] [int] NULL,
[DiscountAmount] [money] NULL,
[TotalCost] [money] NOT NULL,
[UnitCost] [money] NULL,
[UnitPrice] [money] NULL
)
WITH (
LOCATION='data/FactOnlineSales.txt.gz',
DATA_SOURCE=AzureStorage,
FILE_FORMAT=TextFileFormat
);
INSERT INTO dbo.FactOnlineSales
SELECT * FROM ext.FactOnlineSales;
--FactSales
CREATE EXTERNAL TABLE ext.FactSales (
[SalesKey] [int] NOT NULL,
[DateKey] [datetime] NOT NULL,
[channelKey] [int] NOT NULL,
[StoreKey] [int] NOT NULL,
[ProductKey] [int] NOT NULL,
[PromotionKey] [int] NOT NULL,
[CurrencyKey] [int] NOT NULL,
[UnitCost] [money] NOT NULL,
[UnitPrice] [money] NOT NULL,
[SalesQuantity] [int] NOT NULL,
[ReturnQuantity] [int] NOT NULL,
[ReturnAmount] [money] NULL,
[DiscountQuantity] [int] NULL,
[DiscountAmount] [money] NULL,
[TotalCost] [money] NOT NULL,
[SalesAmount] [money] NOT NULL
)
WITH (
LOCATION='data/FactSales.txt.gz',
DATA_SOURCE=AzureStorage,
FILE_FORMAT=TextFileFormat
);
INSERT INTO dbo.FactSales
SELECT * FROM ext.FactSales;
--FactSalesQuota
CREATE EXTERNAL TABLE ext.FactSalesQuota (
[SalesQuotaKey] [int] NOT NULL,
[ChannelKey] [int] NOT NULL,
[StoreKey] [int] NOT NULL,
[ProductKey] [int] NOT NULL,
[DateKey] [datetime] NOT NULL,
[CurrencyKey] [int] NOT NULL,
[ScenarioKey] [int] NOT NULL,
[SalesQuantityQuota] [money] NOT NULL,
[SalesAmountQuota] [money] NOT NULL,
[GrossMarginQuota] [money] NOT NULL
)
WITH (
LOCATION='data/FactSalesQuota.txt.gz',
DATA_SOURCE=AzureStorage,
FILE_FORMAT=TextFileFormat
);
INSERT INTO dbo.FactSalesQuota
SELECT * FROM ext.FactSalesQuota;
--FactStrategyPlan
CREATE EXTERNAL TABLE ext.FactStrategyPlan (
[StrategyPlanKey] [int] NOT NULL,
[Datekey] [datetime] NOT NULL,
[EntityKey] [int] NOT NULL,
[ScenarioKey] [int] NOT NULL,
[AccountKey] [int] NOT NULL,
[CurrencyKey] [int] NOT NULL,
[ProductCategoryKey] [int] NULL,
[Amount] [money] NOT NULL
)
WITH (
LOCATION='data/FactStrategyPlan.txt.gz',
DATA_SOURCE=AzureStorage,
FILE_FORMAT=TextFileFormat
);
INSERT INTO dbo.FactStrategyPlan
SELECT * FROM ext.FactStrategyPlan;
To conclude we now need to update statistics on our ContosoRetailDW SQL Data Warehouse. Without proper statistics, you will not get the performance that SQL Data Warehouse is designed to provide. Tables and columns do not have statistics automatically generated by SQL Data Warehouse and so you need to create them yourself. Will be using a stored procedure named dbo.prc_sqldw_create_stats to create statistics on all columns in a database. Manage statistics in SQL Data Warehouse.
To create statistics on all columns in the table with this procedure, simply call the procedure:
exec prc_sqldw_create_stats;
dbo.prc_sqldw_create_stats
CREATE PROCEDURE [dbo].[prc_sqldw_create_stats]
( @create_type tinyint -- 1 default 2 Fullscan 3 Sample
, @sample_pct tinyint
)
AS
IF @create_type NOT IN (1,2,3)
BEGIN
THROW 151000,'Invalid value for @stats_type parameter. Valid range 1 (default), 2 (fullscan) or 3 (sample).',1;
END;
IF @sample_pct IS NULL
BEGIN;
SET @sample_pct = 20;
END;
IF OBJECT_ID('tempdb..#stats_ddl') IS NOT NULL
BEGIN;
DROP TABLE #stats_ddl;
END;
CREATE TABLE #stats_ddl
WITH ( DISTRIBUTION = HASH([seq_nmbr])
, LOCATION = USER_DB
)
AS
WITH T
AS
(
SELECT t.[name] AS [table_name]
, s.[name] AS [table_schema_name]
, c.[name] AS [column_name]
, c.[column_id] AS [column_id]
, t.[object_id] AS [object_id]
, ROW_NUMBER()
OVER(ORDER BY (SELECT NULL)) AS [seq_nmbr]
FROM sys.[tables] t
JOIN sys.[schemas] s ON t.[schema_id] = s.[schema_id]
JOIN sys.[columns] c ON t.[object_id] = c.[object_id]
LEFT JOIN sys.[stats_columns] l ON l.[object_id] = c.[object_id]
AND l.[column_id] = c.[column_id]
AND l.[stats_column_id] = 1
LEFT JOIN sys.[external_tables] e ON e.[object_id] = t.[object_id]
WHERE l.[object_id] IS NULL
AND e.[object_id] IS NULL -- not an external table
)
SELECT [table_schema_name]
, [table_name]
, [column_name]
, [column_id]
, [object_id]
, [seq_nmbr]
, CASE @create_type
WHEN 1
THEN CAST('CREATE STATISTICS '+QUOTENAME('stat_'+table_schema_name+ '_' + table_name + '_'+column_name)+' ON '+QUOTENAME(table_schema_name)+'.'+QUOTENAME(table_name)+'('+QUOTENAME(column_name)+')' AS VARCHAR(8000))
WHEN 2
THEN CAST('CREATE STATISTICS '+QUOTENAME('stat_'+table_schema_name+ '_' + table_name + '_'+column_name)+' ON '+QUOTENAME(table_schema_name)+'.'+QUOTENAME(table_name)+'('+QUOTENAME(column_name)+') WITH FULLSCAN' AS VARCHAR(8000))
WHEN 3
THEN CAST('CREATE STATISTICS '+QUOTENAME('stat_'+table_schema_name+ '_' + table_name + '_'+column_name)+' ON '+QUOTENAME(table_schema_name)+'.'+QUOTENAME(table_name)+'('+QUOTENAME(column_name)+') WITH SAMPLE '+@sample_pct+'PERCENT' AS VARCHAR(8000))
END AS create_stat_ddl
FROM T
;
DECLARE @i INT = 1
, @t INT = (SELECT COUNT(*) FROM #stats_ddl)
, @s NVARCHAR(4000) = N''
;
WHILE @i < = @t
BEGIN
SET @s=(SELECT create_stat_ddl FROM #stats_ddl WHERE seq_nmbr = @i);
PRINT @s
EXEC sp_executesql @s
SET @i+=1;
END
DROP TABLE #stats_ddl;
We are done! Explore, write some queries, scale compute resources up and down, connect with PowerBI and create awesome data visualizations. Just remember to pause your SQL Data Warehouse when not using it.
Enjoy!