# Load package
library(leaf)
if (!backend_available()) {
message("Install backend with leaf::install_leaf()")
} # Initialize the symbolic regressor
regressor = leaf::SymbolicRegressor$new(
engine = "rsrm",
loss = 'PoissonDeviance',
num_iterations = 3,
base = list(verbose = FALSE)
)# Load data
train_data = leaf::leaf_data("GDM")
head(train_data)
#> Archipelago species A T H Dm Di Do y
#> 1 Canary Islands Spiders 278 1.2 2423 381 61 61 85
#> 2 Canary Islands Spiders 729 2.0 2425 414 57 57 98
#> 3 Canary Islands Spiders 378 11.0 1484 333 28 254 137
#> 4 Canary Islands Spiders 2058 12.0 3711 286 28 28 229
#> 5 Canary Islands Spiders 1532 15.0 1950 195 61 61 159
#> 6 Canary Islands Spiders 1725 23.0 807 95 11 95 86# Stage 1: Generate subsets
folds = leaf::generate_group_subsets(
train_data,
group_cols = "species",
n_splits = "logo"
)# Stage 2: Discover equation skeletons
search_results <- regressor$search_equations(
data = train_data,
formula = "y ~ f(A, T | species)",
normalization = 'divide_by_gmd',
folds = folds
)
#> 1. Processing data for equation search based on formula...
#> 2. Running engine 'rsrm' over 5 folds using up to 1 processes...
#> -- FINAL RESULTS --
#> Episode: 3/3
#> time: 59.85s
#> loss: 18.63331198471096
#> form: X1+X2**3+1+X1*X2**3+F
#> HOF:
#> equation complexity loss
#> 0 0 0 999999999999999967336168804116691273849533185806555472917961779471295845921727862608739868455469056.00
#> 1 36.4917 1 96.17
#> 2 67.6781*X1 + 1 2 65.94
#> 3 42.0845*X1**0.5 3 35.03
#> 4 12.9398*X1**1.2886 4 34.70
#> 5 39.7451*X1**0.5 - 4.7380*X2 5 32.45
#> 6 77.5044*X1**0.5 + 5.4535*X2 - 8.1433 6 32.45
#> 7 27.7485*X1**0.5 + 0.0751/X2**2 7 30.95
#> 8 ((78.8156*X1**0.5 - 6.3010)*(X2 - 0.1690) + 0.1148)/(X2 - 0.1690) 8 22.56
#> 9 ((X1*X2 - 0.6460)*(69.7163*X1 + 7.6966*X2 + 2.1710) - 2.2056)/(X1*X2 - 0.6460) 10 20.24
#> 10 31.0360*X1**0.5 + 6.4929 + 1.0065/(X2 + 0.0105*exp(-154.1501*X1*X2**2)) 13 20.11
#> 11 75.4312*X1**0.5 - 1.1332*X2**0.25 - 5.4296/(X2 + 0.5335*exp(-5.5694*X2**2)) 14 18.63
#> ---
#>
task:dataset_93df7a40-b8c5-41c1-827b-9b00049c4968 expr:75.43118432679253*X1**0.5 + -1.133214801020806*X2**0.25 + -5.429620123418192/(0.5334978123305567*exp(-5.569384925113638*X2**2) + X2) Loss_PoissonDeviance:18.63 Test 0/1.
#> final result:
#> success rate : 0%
#> average discovery time is 59.858 seconds
#> Number of equations looked at (per test) [Total, Timed out, Successful]: [[2808, 0, 2808]]
#> -- FINAL RESULTS --
#> Episode: 3/3
#> time: 59.38s
#> loss: 17.59757848151243
#> form: X1*X2+X2+X1+1/(X2 + 1)+F
#> HOF:
#> equation complexity loss
#> 0 0 0 999999999999999967336168804116691273849533185806555472917961779471295845921727862608739868455469056.00
#> 1 36.4460 1 96.17
#> 2 36.5029 - 0.0141*X1 3 36.30
#> 3 48.7534*X1**0.7881 4 34.70
#> 4 8.8407*exp(0.8668*X1) - 1.7462 5 33.96
#> 5 0.1729*X1**0.5 + 42.9840*X2 + 7.0218 6 32.45
#> 6 ((82.3106*X1 + 5.0661)*(X2 - 0.6089) + 2.4446)/(X2 - 0.6089) 7 29.88
#> 7 ((77.8477*X1**0.5 - 5.6270)*(X2 - 1.7642) - 4.7431)/(X2 - 1.7642) 8 24.16
#> 8 (7.1006*X2 + (42.3987*X1**0.5 - 3.7818)*(X2 - 0.7305))/(X2 - 0.7305) 9 22.80
#> 9 44.5543*X1**0.5 - 2.0770/(X2 + 0.1104) + 0.0850/X1 10 22.03
#> 10 ((X2 - 2.1283)*(22.4709*X1*X2 + 17.4438*X1 - 0.8406*X2) - 0.9144)/(X2 - 2.1283) 11 19.68
#> 11 40.1253*X1**0.5 - 5.9888*exp(-1.3785*X2) + 4.1379/(1 - 0.6646/X2) 12 17.60
#> ---
#>
task:dataset_f0546bc5-399f-4a77-9cfe-f6ca2e85cef6 expr:40.1252851669824*X1**0.5 + -5.988838918042952*exp(-1.378456644669992*X2) + -4.137867123616743/(0.6645899121484922/X2 - 1) Loss_PoissonDeviance:17.60 Test 0/1.
#> final result:
#> success rate : 0%
#> average discovery time is 59.387 seconds
#> Number of equations looked at (per test) [Total, Timed out, Successful]: [[2622, 0, 2622]]
#> -- FINAL RESULTS --
#> Episode: 3/3
#> time: 58.05s
#> loss: 5.962939720605339
#> form: exp(C*X1**2)*exp(C*X2)+F
#> HOF:
#> equation complexity loss
#> 0 0 0 999999999999999967336168804116691273849533185806555472917961779471295845921727862608739868455469056.00
#> 1 36.5019 1 28.81
#> 2 70.6407*X2 + 16.8314 3 26.82
#> 3 4.0612*X2**3 + 16.4502 4 25.46
#> 4 3.9899*X1 + 12.5641*X2 + 12.9638 5 18.22
#> 5 39.0775*X1 + 1.0661*X2**1.5 + 1.9261 6 16.05
#> 6 35.3703*X1 - 18.9153*X2*log(X2) + 14.4395 7 15.47
#> 7 ((38.5484*X1 + 2.6461)*(X1*X2 + 1.7735) + 0.1864)/(X1*X2 + 1.7735) 8 10.50
#> 8 (7.2087*X1*X2 + (36.3870*X1 + 0.0429)*(X1*X2 + 0.0587))/(X1*X2 + 0.0587) 10 9.31
#> 9 67.1542*X1 - 10.2438*X2/(X2 - 0.6284/X1**2) + 5.5596 11 9.07
#> 10 58.3751*X1 - 1.7373*X2 + 4.1695*exp(-1.4453*X1**2 + 2.0496*X2) 12 8.01
#> 11 30.6219*X1 + 7.3247*X2 - 0.0773*exp(1.8572*X1**2*X2 - 53102.3205*X2**2) 14 5.96
#> ---
#>
task:dataset_6c36e797-aad1-4cd4-95c6-270def6e9c44 expr:30.621939528772103*X1 + 7.324680913287321*X2 + -0.07725981565640147*exp(-53102.32045940429*X2**2)*exp(1.8572477531564031*X1**2*X2) Loss_PoissonDeviance:5.96 Test 0/1.
#> final result:
#> success rate : 0%
#> average discovery time is 58.057 seconds
#> Number of equations looked at (per test) [Total, Timed out, Successful]: [[2504, 0, 2504]]
#> -- FINAL RESULTS --
#> Episode: 3/3
#> time: 108.04s
#> loss: 19.50435371631782
#> form: X1**0.5+F
#> HOF:
#> equation complexity loss
#> 0 0 0 999999999999999967336168804116691273849533185806555472917961779471295845921727862608739868455469056.00
#> 1 67.2097 1 96.17
#> 2 34.2120 2 96.17
#> 3 38.7829*X1 + 14.0659 3 36.30
#> 4 77.0625*X1**0.5 - 4.6835 4 35.01
#> 5 33.4054 + 1.3282*exp(-0.5158*X1) 5 33.95
#> 6 (33.6519*X1*(0.3080*X2 - 1) - 1.2838)/(0.3080*X2 - 1) 6 30.02
#> 7 ((72.1724*X1 + 7.6670)*(1.7186*X2 - 1) + 0.8432)/(1.7186*X2 - 1) 7 27.44
#> 8 ((43.5431*X1 + 10.5063)*(X2**0.5 - 0.3359) - 0.2592)/(X2**0.5 - 0.3359) 8 26.13
#> 9 ((X1**0.5 + 0.6660*X2)*(35.2524*X1 + 16.7830) + 1.9087)/(X1**0.5 + 0.6660*X2) 9 24.65
#> 10 2.2859*X1*X2**3 + 31.7147*X1 - 2.8622*X2**3 + 16.9945 10 24.57
#> 11 69.5182*X1 + 7.5648 - 1.6803/(X1**(-0.5) - 1.5438*X2) 11 23.53
#> 12 38.4328*X1**0.5 + 8.0335 - 10.8348/(X1**(-0.5) - 0.1255*X2) 12 19.95
#> 13 53.1144*X1**0.5/(0.5510*X2 + 1) + 0.3749*X1/(0.2771*X2 - 1) - 1.2407/(0.0908*X2 - 1) 15 19.50
#> ---
#>
task:dataset_3913e139-bfed-404b-90a1-36cc9bc4f774 expr:53.11439018348418*X1**0.5/(--0.5509822956642375*X2 + 1) + -0.3748504829813716*X1/(-0.27709108049022535*X2 + 1) + 1.2406776184902666/(-0.09082438278008342*X2 + 1) Loss_PoissonDeviance:19.50 Test 0/1.
#> final result:
#> success rate : 0%
#> average discovery time is 108.041 seconds
#> Number of equations looked at (per test) [Total, Timed out, Successful]: [[2542, 0, 2542]]
#> -- FINAL RESULTS --
#> Episode: 3/3
#> time: 56.66s
#> loss: 18.762297920666533
#> form: 1+X1+F
#> HOF:
#> equation complexity loss
#> 0 1 0 999999999999999967336168804116691273849533185806555472917961779471295845921727862608739868455469056.00
#> 1 36.5557 1 96.17
#> 2 33.8333*X1 + 15.7211 3 36.30
#> 3 10.9228*X1**1.4121 4 34.70
#> 4 44.6580*X1 + 5.2839*X2 + 10.4877 5 34.02
#> 5 34.9336*X1**0.5 - 3.9694*X2 + 11.9954 6 32.45
#> 6 44.0426*X1**0.5 - 0.1832 - 0.6801/X2 7 31.41
#> 7 39.8360*X1 - 0.7837*X2**4 + 8.0852*X2 + 11.7166 8 28.08
#> 8 31.3763*X1 - 0.4998*X2**4 - 0.4158*X2**2 + 9.4675 9 26.03
#> 9 0.1881*X1*X2**3 + 28.2664*X1 - 1.9153*X2**3 + 19.3747 10 24.57
#> 10 11.3111*X1**2 + 0.2430*X1 + 1.4187*X1/log(X2) + 3.8953*X2 + 8.7231 13 19.21
#> 11 3.7521*X1**3 - 11.1122*X1 + 9.0340*X1/log(1/X2) + 5.3617*X2 + 21.6463 14 18.76
#> ---
#>
task:dataset_9ebaecd3-4c1d-405a-bc9c-86afabe3bbf4 expr:--3.7520887270100056*X1**3 + -11.11221617132173*X1 + 9.03397575247341*X1/log(1/X2) + 5.36166614199925*X2 + 21.64631742931612 Loss_PoissonDeviance:18.76 Test 0/1.
#> final result:
#> success rate : 0%
#> average discovery time is 56.657 seconds
#> Number of equations looked at (per test) [Total, Timed out, Successful]: [[2515, 0, 2515]]
#> 3. Found 62 raw skeletons. Deduplicating...
print("=== Search results ===")
#> [1] "=== Search results ==="
head(search_results)
#> Equation Complexity
#> 0 u1 1
#> 1 u1⋅A + u2 3
#> 2 u1⋅A^0.5 3
#> 3 u1⋅A^0.79 3
#> 4 u1⋅A^1.29 3
#> 5 u1⋅A^1.41 3# Stage 3: Fit parameters and compute loss
fit_results <- regressor$fit(data=train_data)
#> Fitting parameters for 48 equations...
#> Parameter fitting complete.
print("\n=== Fit ===")
#> [1] "\n=== Fit ==="
head(fit_results)
#> Equation Complexity Loss
#> 0 u1 1 33.52978
#> 1 u1⋅A + u2 3 16.13773
#> 2 u1⋅A^0.5 3 22.30063
#> 3 u1⋅A^0.79 3 34.60230
#> 4 u1⋅A^1.29 3 78.06473
#> 5 u1⋅A^1.41 3 92.01513# Stage 4: Evaluate additional metrics
eval_table <- regressor$evaluate(metrics = c('PseudoR2', 'Elbow'))
# Show results
pf <- regressor$get_pareto_front()
pf
#> Equation Complexity
#> 0 u1 1
#> 7 u2⋅-1⋅A + u1 3
#> 8 u1⋅A^0.5 + -1⋅u2 4
#> 11 u1⋅A + u2⋅T + u3 5
#> 15 u1⋅A^0.5 + -1⋅u2⋅T + u3 6
#> 21 u1⋅A + -1⋅u2⋅T^4 + u3⋅T + u4 8
#> 22 (u4 + (T + -1⋅u3)⋅(u1⋅A + u2))⋅(T + -1⋅u5)^-1 9
#> 27 u1⋅A + u2 + -1⋅u3⋅(u5⋅-1⋅T + A^-1⋅u4)^-1 10
#> 36 u1⋅A^0.5 + u2 + -1⋅u3⋅(u5⋅-1⋅T + A^-1⋅u4)^-1 11
#> 37 (-1⋅u5 + (A⋅T + -1⋅u1)⋅(u2⋅A + u3⋅T + u4))⋅(A⋅T + -1⋅u6)^-1 13
#> Loss PseudoR2 Elbow
#> 0 33.529782 0.0000000 0.10618305
#> 7 16.137732 0.5187045 0.67001869
#> 8 15.710403 0.5314493 0.30059450
#> 11 13.468675 0.5983071 0.46352610
#> 15 13.018381 0.6117368 0.11457889
#> 21 11.148259 0.6675117 -0.10550209
#> 22 9.841585 0.7064823 -0.05861504
#> 27 8.250629 0.7539313 0.22047479
#> 36 6.658312 0.8014210 0.41404096
#> 37 5.906289 0.8238495 0.42228590