-
Notifications
You must be signed in to change notification settings - Fork 239
Added new api and fixed type errors in cuStateVec #2728
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Conversation
fix the type error in custatevecComputeExpectationBatched
Add new API for expectationBatched and measureBatched!
fixed format
Your PR requires formatting changes to meet the project's style guidelines. Click here to view the suggested changes.diff --git a/lib/custatevec/src/libcustatevec.jl b/lib/custatevec/src/libcustatevec.jl
index 62b72e654..a7037cb29 100644
--- a/lib/custatevec/src/libcustatevec.jl
+++ b/lib/custatevec/src/libcustatevec.jl
@@ -1000,7 +1000,7 @@ end
nIndexBits::UInt32,
nSVs::UInt32,
svStride::custatevecIndex_t,
- matrices::PtrOrCuPtr{Cvoid},
+ matrices::PtrOrCuPtr{Cvoid},
matrixDataType::cudaDataType_t,
layout::custatevecMatrixLayout_t,
nMatrices::UInt32,
@@ -1018,20 +1018,20 @@ end
extraWorkspaceSizeInBytes)
initialize_context()
@gcsafe_ccall libcustatevec.custatevecComputeExpectationBatched(handle::custatevecHandle_t,
- batchedSv::CuPtr{Cvoid},
+ batchedSv::CuPtr{Cvoid},
svDataType::cudaDataType_t,
nIndexBits::UInt32,
nSVs::UInt32,
svStride::custatevecIndex_t,
- expectationValues::PtrOrCuPtr{ComplexF64},
- matrices::PtrOrCuPtr{Cvoid},
+ expectationValues::PtrOrCuPtr{ComplexF64},
+ matrices::PtrOrCuPtr{Cvoid},
matrixDataType::cudaDataType_t,
layout::custatevecMatrixLayout_t,
nMatrices::UInt32,
basisBits::Ptr{Int32},
nBasisBits::UInt32,
computeType::custatevecComputeType_t,
- extraWorkspace::CuPtr{Cvoid},
+ extraWorkspace::CuPtr{Cvoid},
extraWorkspaceSizeInBytes::Csize_t)::custatevecStatus_t
end
diff --git a/lib/custatevec/src/statevec.jl b/lib/custatevec/src/statevec.jl
index 722680d8e..c51fd6ad8 100644
--- a/lib/custatevec/src/statevec.jl
+++ b/lib/custatevec/src/statevec.jl
@@ -107,10 +107,10 @@ function batchMeasure!(sv::CuStateVec, bitordering::Vector{<:Integer}, randnum::
return sv, bitstring
end
-function measureBatched!(sv::CuStateVec, n_svs::Int, bitordering::Vector{<:Integer},randnums::Vector{<:Integer}, collapse::custatevecCollapseOp_t=CUSTATEVEC_COLLAPSE_NONE)
+function measureBatched!(sv::CuStateVec, n_svs::Int, bitordering::Vector{<:Integer}, randnums::Vector{<:Integer}, collapse::custatevecCollapseOp_t = CUSTATEVEC_COLLAPSE_NONE)
all(0.0 .<= randnums .< 1.0) && length(randnums) == n_svs || throw(ArgumentError("randnums must have length $nsvs and all elements must be in the interval [0, 1)."))
- bitStrings = zeros(Int32, length(bitordering)*n_svs)
- sv_stride = div(length(sv.data), n_svs)
+ bitStrings = zeros(Int32, length(bitordering) * n_svs)
+ sv_stride = div(length(sv.data), n_svs)
n_index_bits = Int(log2(div(length(sv.data), n_svs)))
custatevecMeasureBatched(handle(), sv.data, eltype(sv), n_index_bits, n_svs, sv_stride, bitStrings, convert(Vector{Int32}, bitordering), length(bitstring), randnums, collapse)
return sv, bitstrings
@@ -138,14 +138,14 @@ function expectation(sv::CuStateVec, matrix::Union{Matrix, CuMatrix}, basis_bits
end
function expectationBatched(sv::CuStateVec, n_svs::Int, matrix::Union{CuVector, Vector}, n_matrices::Int, basis_bits::Vector{<:Integer})
- sv_stride = div(length(sv.data), n_svs)
+ sv_stride = div(length(sv.data), n_svs)
n_index_bits = Int(log2(div(length(sv.data), n_svs)))
function bufferSize()
out = Ref{Csize_t}()
custatevecComputeExpectationBatchedGetWorkspaceSize(handle(), eltype(sv), n_index_bits, n_svs, sv_stride, matrix, eltype(matrix), CUSTATEVEC_MATRIX_LAYOUT_COL, n_matrices, length(basis_bits), compute_type(eltype(sv), eltype(matrix)), out)
- out[]
+ return out[]
end
- expVals = zeros(ComplexF64, n_matrices*n_svs)
+ expVals = zeros(ComplexF64, n_matrices * n_svs)
with_workspace(handle().cache, bufferSize) do buffer
custatevecComputeExpectationBatched(handle(), sv.data, eltype(sv), n_index_bits, n_svs, sv_stride, expVals, matrix, eltype(matrix), CUSTATEVEC_MATRIX_LAYOUT_COL, n_matrices, convert(Vector{Int32}, basis_bits), length(basis_bits), compute_type(eltype(sv), eltype(matrix)), buffer, sizeof(buffer))
end |
Codecov ReportAttention: Patch coverage is
Additional details and impacted files@@ Coverage Diff @@
## master #2728 +/- ##
=======================================
Coverage 88.62% 88.63%
=======================================
Files 153 153
Lines 13156 13174 +18
=======================================
+ Hits 11660 11677 +17
- Misses 1496 1497 +1 ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CUDA.jl Benchmarks
Benchmark suite | Current: 1207c36 | Previous: 430b7d6 | Ratio |
---|---|---|---|
latency/precompile |
45868224655.5 ns |
45833846958 ns |
1.00 |
latency/ttfp |
6631324465.5 ns |
6614791754 ns |
1.00 |
latency/import |
3171755571 ns |
3161117471.5 ns |
1.00 |
integration/volumerhs |
9622766 ns |
9611888 ns |
1.00 |
integration/byval/slices=1 |
147117 ns |
146919 ns |
1.00 |
integration/byval/slices=3 |
425071 ns |
425277.5 ns |
1.00 |
integration/byval/reference |
144896 ns |
145019 ns |
1.00 |
integration/byval/slices=2 |
286006 ns |
286025 ns |
1.00 |
integration/cudadevrt |
103444.5 ns |
103516 ns |
1.00 |
kernel/indexing |
14184.5 ns |
14168.5 ns |
1.00 |
kernel/indexing_checked |
14834 ns |
15018 ns |
0.99 |
kernel/occupancy |
715.0915492957746 ns |
746.9731543624162 ns |
0.96 |
kernel/launch |
2131.2 ns |
2173.4444444444443 ns |
0.98 |
kernel/rand |
14790 ns |
14748 ns |
1.00 |
array/reverse/1d |
19865 ns |
20211 ns |
0.98 |
array/reverse/2d |
25633 ns |
25621 ns |
1.00 |
array/reverse/1d_inplace |
10667 ns |
11311 ns |
0.94 |
array/reverse/2d_inplace |
12172 ns |
13122 ns |
0.93 |
array/copy |
21321 ns |
21481 ns |
0.99 |
array/iteration/findall/int |
159149 ns |
159934.5 ns |
1.00 |
array/iteration/findall/bool |
139797 ns |
140354 ns |
1.00 |
array/iteration/findfirst/int |
153977 ns |
162145 ns |
0.95 |
array/iteration/findfirst/bool |
154701 ns |
163586 ns |
0.95 |
array/iteration/scalar |
71235.5 ns |
74124 ns |
0.96 |
array/iteration/logical |
216741 ns |
218118.5 ns |
0.99 |
array/iteration/findmin/1d |
41778 ns |
42265 ns |
0.99 |
array/iteration/findmin/2d |
94170 ns |
94863 ns |
0.99 |
array/reductions/reduce/1d |
36252 ns |
36895 ns |
0.98 |
array/reductions/reduce/2d |
40764 ns |
41510 ns |
0.98 |
array/reductions/mapreduce/1d |
34041 ns |
34847 ns |
0.98 |
array/reductions/mapreduce/2d |
40801 ns |
50343 ns |
0.81 |
array/broadcast |
20916 ns |
21159 ns |
0.99 |
array/copyto!/gpu_to_gpu |
13588 ns |
11835 ns |
1.15 |
array/copyto!/cpu_to_gpu |
210253 ns |
210538 ns |
1.00 |
array/copyto!/gpu_to_cpu |
243469 ns |
243720 ns |
1.00 |
array/accumulate/1d |
109533 ns |
109687 ns |
1.00 |
array/accumulate/2d |
79832 ns |
80651 ns |
0.99 |
array/construct |
1274.8 ns |
1283.7 ns |
0.99 |
array/random/randn/Float32 |
49820.5 ns |
45696 ns |
1.09 |
array/random/randn!/Float32 |
26745 ns |
27045 ns |
0.99 |
array/random/rand!/Int64 |
27053 ns |
27352 ns |
0.99 |
array/random/rand!/Float32 |
8759 ns |
8850 ns |
0.99 |
array/random/rand/Int64 |
38262 ns |
30500 ns |
1.25 |
array/random/rand/Float32 |
13158 ns |
13331 ns |
0.99 |
array/permutedims/4d |
62107 ns |
62035 ns |
1.00 |
array/permutedims/2d |
55838 ns |
55848 ns |
1.00 |
array/permutedims/3d |
56424 ns |
56692 ns |
1.00 |
array/sorting/1d |
2776270 ns |
2758536 ns |
1.01 |
array/sorting/by |
3368691.5 ns |
3369076 ns |
1.00 |
array/sorting/2d |
1085447.5 ns |
1078977 ns |
1.01 |
cuda/synchronization/stream/auto |
1066.7272727272727 ns |
1033.4 ns |
1.03 |
cuda/synchronization/stream/nonblocking |
6479.6 ns |
6533.8 ns |
0.99 |
cuda/synchronization/stream/blocking |
842 ns |
857.2446808510638 ns |
0.98 |
cuda/synchronization/context/auto |
1204.6 ns |
1184 ns |
1.02 |
cuda/synchronization/context/nonblocking |
6709 ns |
6691.700000000001 ns |
1.00 |
cuda/synchronization/context/blocking |
935.3333333333334 ns |
945.1481481481482 ns |
0.99 |
This comment was automatically generated by workflow using github-action-benchmark.
svDataType::cudaDataType_t, | ||
nIndexBits::UInt32, | ||
nSVs::UInt32, | ||
svStride::custatevecIndex_t, | ||
expectationValues::Ptr{ComplexF64}, | ||
matrices::Ptr{Cvoid}, | ||
expectationValues::PtrOrCuPtr{ComplexF64}, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These changes need to be replicated in the res/wrap
logic too
@@ -128,6 +137,21 @@ function expectation(sv::CuStateVec, matrix::Union{Matrix, CuMatrix}, basis_bits | |||
return expVal[], residualNorm[] | |||
end | |||
|
|||
function expectationBatched(sv::CuStateVec, n_svs::Int, matrix::Union{CuVector, Vector}, n_matrices::Int, basis_bits::Vector{<:Integer}) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we add some tests for these new functions?
Added new api and fixed type errors in cuStateVec