Machine Learning with SQL 2017 and R for Sales Price Predictions

To predict the sales prices, I had set up on my SQL 2017 instance to work with R.
 
Once SQL had been set up, I could load the data using my Fact table that I have in Data Warehouse. 
 
 
--// This is the table schema from my Fact table:
--==========================================================
 
SET ANSI_NULLS ON
GO
 
SET QUOTED_IDENTIFIER ON
GO
 
CREATE TABLE [Fact].[VehicleSales](
[ReferenceOwnerListing] [int] NOT NULL,
[VehicleDataHash] [uniqueidentifier] NOT NULL,
[Source] [varchar](60) NOT NULL,
[NroRelisted] [int] NULL,
[ReferenceSourceCode] [int] NULL,
[ReferenceDate] [datetime] NULL,
[StartListingDate] [datetime] NULL,
[StartListingDateID] [int] NULL,
[Make] [nvarchar](60) NULL,
[Model] [nvarchar](60) NULL,
[YearModel] [int] NULL,
[BodyStyle] [nvarchar](60) NULL,
[Odometer] [int] NULL,
[Transmission] [nvarchar](60) NULL,
[Fuel] [nvarchar](60) NULL,
[IsOnRoadCostIncluded] [int] NULL,
[IsNZNew] [int] NULL,
[IsDealer] [int] NULL,
[Is4WD] [int] NULL,
[Is7Seats] [int] NULL,
[FirstStartPrice] [money] NULL,
[LastStartPrice] [money] NULL,
[Region] [nvarchar](120) NULL,
[Suburb] [nvarchar](120) NULL,
[FirstBuyNowPrice] [money] NULL,
[LastBuyNowPrice] [money] NULL,
[SoldPrice] [money] NULL,
[PredictedSoldPrice] [money] NULL,
[VehicleID] [int] NULL,
[VehicleBodyStyleID] [int] NULL,
[CreatedOn] [datetime] NULL,
[ModifiedOn] [datetime] NULL
) ON [PRIMARY]
GO
 
I'm training on this statement a car model called RAV4 from Toyota as an example:
 
 
 
--//========================================================
--//
--// Tests 90% of Vehicle Base 
--// Train with 10% of Vehicle Base  
--// Variables 1: Open vehicle Ages
--// Variables 2: Odometer Rounded
--// 
--//========================================================
 
--//========================================================
-- Select 10% of the rows into a new test table
--==========================================================
 
 
select top 90 percent
Make_Toyota = Case When Make = 'Toyota' Then 1 Else 0 End,
Model_Rav = Case When Model like '%RAV%' Then 1 Else 0 End,
FuelGroup = Case When Fuel like '%Petrol%' Then 1 
When Fuel like '%Diesel%' Then 2 
When Fuel is null Then 1 
Else 0 End,
VehicleAgeGroup = (year(getdate()) - YearModel),
OdometerGroup = (Odometer / 10000)*10000,
TransmissionGroup = Case When Transmission = 'Manual' Then 1
Else 2 End,
RankSoldPricePerc = ((Count(SoldPrice) over (partition by Make, Model, YearModel,SoldPrice))*1.0)/((Count(SoldPrice) over (partition by Make, Model, YearModel))*1.0)*100,
BodyStyleGroup =  1,
*
Into VehicleSales_Test_Toyota_Rav 
from Fact.VehicleSales 
where make = 'Toyota' and model like '%RAV%' and referenceDate > '2018-01-01' and SoldPrice > 1000  and odometer > 0 
order by VehicleDataHash
 
--// Removed Inconsistent Sold Prices
Delete VehicleSales_Test_Toyota_Rav where RankSoldPricePerc < 1
 
--// Removed Inconsistent Odometers
Delete VehicleSales_Test_Toyota_Rav where OdometerGroup = 0 and VehicleAgeGroup > 0 
 
-- Place the remaining 10% of the rows into a train table
--==========================================================
select 
Make_Toyota = Case When Make = 'Toyota' Then 1 Else 0 End,
Model_Rav = Case When Model like '%RAV%' Then 1 Else 0 End,
FuelGroup = Case When Fuel like '%Petrol%' Then 1 
When Fuel like '%Diesel%' Then 2 Else 0 End,
VehicleAgeGroup = (year(getdate()) - YearModel),
OdometerGroup = (Odometer / 10000)*10000,
TransmissionGroup = Case When Transmission = 'Manual' Then 1
Else 0 End,
RankSoldPricePerc = ((Count(SoldPrice) over (partition by Make, Model, YearModel,SoldPrice))*1.0)/((Count(SoldPrice) over (partition by Make, Model, YearModel))*1.0)*100,
BodyStyleGroup =  1,
*
into VehicleSales_train_Toyota_Rav
from Fact.VehicleSales
where make = 'Toyota' and model like '%RAV%' and referenceDate > '2018-01-01'  and SoldPrice > 1000 and Fuel in ('Petrol','Diesel') and odometer > 0 
Except 
Select * from VehicleSales_Test_Toyota_Rav
 
 
--// Remove Inconsistent Sold Price
Delete VehicleSales_train_Toyota_Rav where RankSoldPricePerc < 1
 
-- Create a table to hold our model
 --==========================================================
 CREATE TABLE VehicleSalesModels 
               ( 
               model_name nvarchar(100) not null,
               model_version nvarchar(100) not null,
               model_object varbinary(max) not null
               )
 GO
 
 -- Create a table to hold our model
 --==========================================================
 DECLARE @model VARBINARY(MAX)
 
 EXEC sp_execute_external_script  
               @language = N'R', 
               @script = N'  
               automobiles.linmod <- rxLinMod(SoldPrice ~ Make_Toyota + Model_Rav + FuelGroup + VehicleAgeGroup + OdometerGroup + TransmissionGroup + BodyStyleGroup + IsDealer + Is4WD + Is7Seats, data = automobiles)             
               model <- rxSerializeModel(automobiles.linmod, realtimeScoringOnly = FALSE)',
               @input_data_1 = N'  
                                 SELECT *  
                                 FROM VehicleSales_train_Toyota_Rav where RankSoldPricePerc > 1',
               @input_data_1_name = N'automobiles',
               @params = N'@model varbinary(max) OUTPUT',
               @model = @model OUTPUT
 
 
 
INSERT VehicleSalesModels
   (
   model_name, 
   model_version, 
   model_object
   )
 VALUES
   (
   'automobiles.linmod',
   'v1', 
   @model
   )
 
 
 
 
-- Predicting Sold Price
--==========================================================
 
DECLARE @lin_model_raw VARBINARY(MAX) = (SELECT model_object FROM VehicleSalesModels WHERE model_name = 'automobiles.linmod')
EXECUTE sp_execute_external_script
               @language = N'R',
               @script = N'
                          model = rxUnserializeModel(lin_model);
                          automobiles_prediction = rxPredict(model, VehicleSales_Test_Toyota_Rav)
                          automobiles_pred_results <- cbind(VehicleSales_Test_Toyota_Rav, automobiles_prediction)',
               @input_data_1 = N'  
                                 SELECT
   Distinct
   Make_Toyota, 
   Model_Rav, 
   FuelGroup, 
   VehicleAgeGroup, 
   OdometerGroup, 
   BodyStyleGroup, 
   TransmissionGroup,
   IsDealer, 
   Is4WD, 
   Is7Seats,
   soldprice
                                 FROM VehicleSales_Test_Toyota_Rav where RankSoldPricePerc > 1',
               @input_data_1_name = N'VehicleSales_Test_Toyota_Rav',
               @output_data_1_name = N'automobiles_pred_results',
               @params = N'@lin_model varbinary(max)',
               @lin_model = @lin_model_raw
 WITH RESULT SETS (("Make_Toyota" int,"Model_Rav" int,"FuelGroup" int,"VehicleAgeGroup" int,"OdometerGroup" int, "BodyStyleGroup" int, "TransmissionGroup" int, "IsDealer" int, "Is4WD" int, "Is7Seats" int, "SoldPrice" money, "PredictedSoldPrice" money))
 
 
--// This is the result set:
--==========================================================