From 876dd49d76a1602ab3bccd357b428b0dbab1497a Mon Sep 17 00:00:00 2001 From: salbert83 Date: Sun, 15 Jan 2023 09:07:33 -0500 Subject: [PATCH 1/2] Multithreaded support for apply_forest_proba --- src/classification/main.jl | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/src/classification/main.jl b/src/classification/main.jl index 9bbf82b6..0de9dec6 100644 --- a/src/classification/main.jl +++ b/src/classification/main.jl @@ -23,12 +23,19 @@ end # Applies `row_fun(X_row)::AbstractVector` to each row in X # and returns a matrix containing the resulting vectors, stacked vertically -function stack_function_results(row_fun::Function, X::AbstractMatrix) +function stack_function_results(row_fun::Function, X::AbstractMatrix; + use_multithreading = false) N = size(X, 1) N_cols = length(row_fun(X[1, :])) # gets the number of columns out = Array{Float64}(undef, N, N_cols) - for i in 1:N - out[i, :] = row_fun(X[i, :]) + if use_multithreading + for i in 1:N + out[i, :] = row_fun(X[i, :]) + end + else + for i in 1:N + out[i, :] = row_fun(X[i, :]) + end end return out end @@ -329,10 +336,10 @@ function apply_tree_proba( return apply_tree_proba(tree.right, features, labels) end end -apply_tree_proba(tree::Root{S, T}, features::AbstractMatrix{S}, labels) where {S, T} = - apply_tree_proba(tree.node, features, labels) -apply_tree_proba(tree::LeafOrNode{S, T}, features::AbstractMatrix{S}, labels) where {S, T} = - stack_function_results(row->apply_tree_proba(tree, row, labels), features) +apply_tree_proba(tree::Root{S, T}, features::AbstractMatrix{S}, labels; use_multithreading = false) where {S, T} = + apply_tree_proba(tree.node, features, labels, use_multithreading = use_multithreading) +apply_tree_proba(tree::LeafOrNode{S, T}, features::AbstractMatrix{S}, labels; use_multithreading = false) where {S, T} = + stack_function_results(row->apply_tree_proba(tree, row, labels), features, use_multithreading = use_multithreading) function build_forest( labels :: AbstractVector{T}, @@ -488,10 +495,12 @@ end apply_forest_proba( forest::Ensemble{S, T}, features::AbstractMatrix{S}, - labels + labels; + use_multithreading = false ) where {S, T} = stack_function_results(row->apply_forest_proba(forest, row, labels), - features) + features, + use_multithreading = use_multithreading) function build_adaboost_stumps( labels :: AbstractVector{T}, @@ -597,10 +606,12 @@ function apply_adaboost_stumps_proba( stumps::Ensemble{S, T}, coeffs::AbstractVector{Float64}, features::AbstractMatrix{S}, - labels::AbstractVector{T} + labels::AbstractVector{T}; + use_multithreading = false ) where {S, T} stack_function_results( row->apply_adaboost_stumps_proba(stumps, coeffs, row, labels), - features + features, + use_multithreading = use_multithreading ) end From 686a44b0f5c8d4c21f63daf6e525758424df41f4 Mon Sep 17 00:00:00 2001 From: salbert83 Date: Sun, 15 Jan 2023 09:08:58 -0500 Subject: [PATCH 2/2] Unit tests for multithreaded support for apply_forest_proba --- test/classification/iris.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/classification/iris.jl b/test/classification/iris.jl index a4eb0b05..7bd3baa9 100644 --- a/test/classification/iris.jl +++ b/test/classification/iris.jl @@ -16,6 +16,8 @@ cm = confusion_matrix(labels, preds) @test depth(model) == 1 probs = apply_tree_proba(model, features, classes) @test reshape(sum(probs, dims=2), n) ≈ ones(n) +probs_m = apply_tree_proba(model, features, classes, use_multithreading=true) +@test reshape(sum(probs_m, dims=2), n) ≈ ones(n) # train full-tree classifier (over-fit) model = build_tree(labels, features) @@ -28,6 +30,8 @@ cm = confusion_matrix(labels, preds) print_tree(model) probs = apply_tree_proba(model, features, classes) @test reshape(sum(probs, dims=2), n) ≈ ones(n) +probs_m = apply_tree_proba(model, features, classes, use_multithreading = true) +@test reshape(sum(probs_m, dims=2), n) ≈ ones(n) i1 = impurity_importance(model) s1 = split_importance(model) @@ -52,6 +56,8 @@ cm = confusion_matrix(labels, preds) @test 0.95 < cm.accuracy < 1.0 probs = apply_tree_proba(model, features, classes) @test reshape(sum(probs, dims=2), n) ≈ ones(n) +probs_m = apply_tree_proba(model, features, classes, use_multithreading = true) +@test reshape(sum(probs_m, dims=2), n) ≈ ones(n) # prune tree to a stump, 2 leaves pruning_purity = 0.5