To predict the sales prices, I had set up on my SQL 2017 instance to work with R.
Once SQL had been set up, I could load the data using my Fact table that I have in Data Warehouse.
--// This is the table schema from my Fact table:
--==========================================================
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [Fact].[VehicleSales](
[ReferenceOwnerListing] [int] NOT NULL,
[VehicleDataHash] [uniqueidentifier] NOT NULL,
[Source] [varchar](60) NOT NULL,
[NroRelisted] [int] NULL,
[ReferenceSourceCode] [int] NULL,
[ReferenceDate] [datetime] NULL,
[StartListingDate] [datetime] NULL,
[StartListingDateID] [int] NULL,
[Make] [nvarchar](60) NULL,
[Model] [nvarchar](60) NULL,
[YearModel] [int] NULL,
[BodyStyle] [nvarchar](60) NULL,
[Odometer] [int] NULL,
[Transmission] [nvarchar](60) NULL,
[Fuel] [nvarchar](60) NULL,
[IsOnRoadCostIncluded] [int] NULL,
[IsNZNew] [int] NULL,
[IsDealer] [int] NULL,
[Is4WD] [int] NULL,
[Is7Seats] [int] NULL,
[FirstStartPrice] [money] NULL,
[LastStartPrice] [money] NULL,
[Region] [nvarchar](120) NULL,
[Suburb] [nvarchar](120) NULL,
[FirstBuyNowPrice] [money] NULL,
[LastBuyNowPrice] [money] NULL,
[SoldPrice] [money] NULL,
[PredictedSoldPrice] [money] NULL,
[VehicleID] [int] NULL,
[VehicleBodyStyleID] [int] NULL,
[CreatedOn] [datetime] NULL,
[ModifiedOn] [datetime] NULL
) ON [PRIMARY]
GO
I'm training on this statement a car model called RAV4 from Toyota as an example:
--//========================================================
--//
--// Tests 90% of Vehicle Base
--// Train with 10% of Vehicle Base
--// Variables 1: Open vehicle Ages
--// Variables 2: Odometer Rounded
--//
--//========================================================
--//========================================================
-- Select 10% of the rows into a new test table
--==========================================================
select top 90 percent
Make_Toyota = Case When Make = 'Toyota' Then 1 Else 0 End,
Model_Rav = Case When Model like '%RAV%' Then 1 Else 0 End,
FuelGroup = Case When Fuel like '%Petrol%' Then 1
When Fuel like '%Diesel%' Then 2
When Fuel is null Then 1
Else 0 End,
VehicleAgeGroup = (year(getdate()) - YearModel),
OdometerGroup = (Odometer / 10000)*10000,
TransmissionGroup = Case When Transmission = 'Manual' Then 1
Else 2 End,
RankSoldPricePerc = ((Count(SoldPrice) over (partition by Make, Model, YearModel,SoldPrice))*1.0)/((Count(SoldPrice) over (partition by Make, Model, YearModel))*1.0)*100,
BodyStyleGroup = 1,
*
Into VehicleSales_Test_Toyota_Rav
from Fact.VehicleSales
where make = 'Toyota' and model like '%RAV%' and referenceDate > '2018-01-01' and SoldPrice > 1000 and odometer > 0
order by VehicleDataHash
--// Removed Inconsistent Sold Prices
Delete VehicleSales_Test_Toyota_Rav where RankSoldPricePerc < 1
--// Removed Inconsistent Odometers
Delete VehicleSales_Test_Toyota_Rav where OdometerGroup = 0 and VehicleAgeGroup > 0
-- Place the remaining 10% of the rows into a train table
--==========================================================
select
Make_Toyota = Case When Make = 'Toyota' Then 1 Else 0 End,
Model_Rav = Case When Model like '%RAV%' Then 1 Else 0 End,
FuelGroup = Case When Fuel like '%Petrol%' Then 1
When Fuel like '%Diesel%' Then 2 Else 0 End,
VehicleAgeGroup = (year(getdate()) - YearModel),
OdometerGroup = (Odometer / 10000)*10000,
TransmissionGroup = Case When Transmission = 'Manual' Then 1
Else 0 End,
RankSoldPricePerc = ((Count(SoldPrice) over (partition by Make, Model, YearModel,SoldPrice))*1.0)/((Count(SoldPrice) over (partition by Make, Model, YearModel))*1.0)*100,
BodyStyleGroup = 1,
*
into VehicleSales_train_Toyota_Rav
from Fact.VehicleSales
where make = 'Toyota' and model like '%RAV%' and referenceDate > '2018-01-01' and SoldPrice > 1000 and Fuel in ('Petrol','Diesel') and odometer > 0
Except
Select * from VehicleSales_Test_Toyota_Rav
--// Remove Inconsistent Sold Price
Delete VehicleSales_train_Toyota_Rav where RankSoldPricePerc < 1
-- Create a table to hold our model
--==========================================================
CREATE TABLE VehicleSalesModels
(
model_name nvarchar(100) not null,
model_version nvarchar(100) not null,
model_object varbinary(max) not null
)
GO
-- Create a table to hold our model
--==========================================================
DECLARE @model VARBINARY(MAX)
EXEC sp_execute_external_script
@language = N'R',
@script = N'
automobiles.linmod <- rxLinMod(SoldPrice ~ Make_Toyota + Model_Rav + FuelGroup + VehicleAgeGroup + OdometerGroup + TransmissionGroup + BodyStyleGroup + IsDealer + Is4WD + Is7Seats, data = automobiles)
model <- rxSerializeModel(automobiles.linmod, realtimeScoringOnly = FALSE)',
@input_data_1 = N'
SELECT *
FROM VehicleSales_train_Toyota_Rav where RankSoldPricePerc > 1',
@input_data_1_name = N'automobiles',
@params = N'@model varbinary(max) OUTPUT',
@model = @model OUTPUT
INSERT VehicleSalesModels
(
model_name,
model_version,
model_object
)
VALUES
(
'automobiles.linmod',
'v1',
@model
)
-- Predicting Sold Price
--==========================================================
DECLARE @lin_model_raw VARBINARY(MAX) = (SELECT model_object FROM VehicleSalesModels WHERE model_name = 'automobiles.linmod')
EXECUTE sp_execute_external_script
@language = N'R',
@script = N'
model = rxUnserializeModel(lin_model);
automobiles_prediction = rxPredict(model, VehicleSales_Test_Toyota_Rav)
automobiles_pred_results <- cbind(VehicleSales_Test_Toyota_Rav, automobiles_prediction)',
@input_data_1 = N'
SELECT
Distinct
Make_Toyota,
Model_Rav,
FuelGroup,
VehicleAgeGroup,
OdometerGroup,
BodyStyleGroup,
TransmissionGroup,
IsDealer,
Is4WD,
Is7Seats,
soldprice
FROM VehicleSales_Test_Toyota_Rav where RankSoldPricePerc > 1',
@input_data_1_name = N'VehicleSales_Test_Toyota_Rav',
@output_data_1_name = N'automobiles_pred_results',
@params = N'@lin_model varbinary(max)',
@lin_model = @lin_model_raw
WITH RESULT SETS (("Make_Toyota" int,"Model_Rav" int,"FuelGroup" int,"VehicleAgeGroup" int,"OdometerGroup" int, "BodyStyleGroup" int, "TransmissionGroup" int, "IsDealer" int, "Is4WD" int, "Is7Seats" int, "SoldPrice" money, "PredictedSoldPrice" money))
--// This is the result set:
--==========================================================