水水水水水水水水水水水水水水

前端

352.74KB

10 需要积分: 1

立即下载

资源介绍:

水水水水水水水水水水水水水水

// Copyright (c) 2021 Horizon Robotics.All Rights Reserved. // // The material in this file is confidential and contains trade secrets // of Horizon Robotics Inc. This is proprietary information owned by // Horizon Robotics Inc. No part of this work may be disclosed, // reproduced, copied, transmitted, or used in any way for any purpose, // without the express written permission of Horizon Robotics Inc. #include "averagepool.h" #include #include "layer_common.h" #include "pooling_common.h" #include "util/common.h" namespace hobot { namespace dnn { DEFINE_AND_REGISTER_LAYER_CREATOR(AveragePool) static inline int32_t AveragePool3DOffset(TShape const &shape, int32_t i0, int32_t i1, int32_t i2, int32_t i3, int32_t i4) { return (((i0 * shape[1] + i1) * shape[2] + i2) * shape[3] + i3) * shape[4] + i4; } template static void AveragePool3D_float32(DType const *input_data, DType *output_data, //static inline void AveragePool3D(DType const *input_data, DType *output_data, TShape const &ishape, TShape const &oshape, std::vector &kernel_shape, std::vector &strides, std::vector &pads, bool count_include_pad) { if (ishape[kDim0] != oshape[kDim0]) { DNN_LOGE(TAG_LAYER, "input[0] is not equal output[0]"); } if (ishape[kDim1] != oshape[kDim1]) { DNN_LOGE(TAG_LAYER, "input[1] is not equal output[1]"); } if (pads[0] != pads[3]) { DNN_LOGE(TAG_LAYER, "pads[0] is not equal pads[3]"); } if (pads[1] != pads[4]) { DNN_LOGE(TAG_LAYER, "pads[1] is not equal pads[4]"); } if (pads[2] != pads[5]) { DNN_LOGE(TAG_LAYER, "pads[2] is not equal pads[5]"); } const int32_t batches = static_cast(ishape[kDim0]); const int32_t channels = static_cast(ishape[kDim1]); const int32_t in_spatial_dim_1{static_cast(ishape[kDim2])}; const int32_t in_spatial_dim_2{static_cast(ishape[kDim3])}; const int32_t in_spatial_dim_3{static_cast(ishape[kDim4])}; const int32_t out_spatial_dim_1{static_cast(oshape[kDim2])}; const int32_t out_spatial_dim_2{static_cast(oshape[kDim3])}; const int32_t out_spatial_dim_3{static_cast(oshape[kDim4])}; const int32_t stride_spatial_dim_1{strides[0]}; const int32_t stride_spatial_dim_2{strides[1]}; const int32_t stride_spatial_dim_3{strides[2]}; const int32_t filter_spatial_dim_1{kernel_shape[0]}; const int32_t filter_spatial_dim_2{kernel_shape[1]}; const int32_t filter_spatial_dim_3{kernel_shape[2]}; const int32_t padding_spatial_dim_1{pads[0]}; const int32_t padding_spatial_dim_2{pads[1]}; const int32_t padding_spatial_dim_3{pads[2]}; //RVV if (((padding_spatial_dim_1 == 0) && (padding_spatial_dim_2 == 0)) && (padding_spatial_dim_3 == 0)) { if (((filter_spatial_dim_1 == 2) && (filter_spatial_dim_1 == 2)) && (filter_spatial_dim_1 == 2)) { // in_spatial_dim_1 % 2 == 0 && in_spatial_dim_2 % 2 == 0 && in_spatial_dim_3 % 2 == 0 (also != 0) if (((stride_spatial_dim_1 == filter_spatial_dim_1) && (stride_spatial_dim_2 == filter_spatial_dim_2)) && (stride_spatial_dim_3 == filter_spatial_dim_3)) { size_t vl; float_t mid_data_list[batches][channels][in_spatial_dim_1][out_spatial_dim_2][out_spatial_dim_3]; float_t *mid_data = (float *)mid_data_list; for (int32_t batch = 0; batch < batches; ++batch) { for (int32_t channel = 0; channel < channels; ++channel) { for (int32_t in_dim1{0}; in_dim1 < in_spatial_dim_1; in_dim1++) { float_t *out_ptr = mid_data + in_dim1 * out_spatial_dim_2 * out_spatial_dim_3; for (int32_t out_dim2{0}; out_dim2 < out_spatial_dim_2; out_dim2++) { const float_t *line0 = input_data + in_dim1 * in_spatial_dim_2 * in_spatial_dim_3 + out_dim2 * in_spatial_dim_3 * 2; const float_t *line1 = line0 + in_spatial_dim_3; int32_t w = out_spatial_dim_3; while (w > 0) { vl = vsetvl_e32m2(w); vfloat32m2_t vline0_seg1, vline0_seg2; vfloat32m2_t vline1_seg1, vline1_seg2; vlseg2e32_v_f32m2(&vline0_seg1, &vline0_seg2, line0, vl); vlseg2e32_v_f32m2(&vline1_seg1, &vline1_seg2, line1, vl); vfloat32m2_t vsum0 = vfadd_vv_f32m2(vline0_seg1, vline0_seg2, vl); vfloat32m2_t vsum1 = vfadd_vv_f32m2(vline1_seg1, vline1_seg2, vl); vfloat32m2_t vsum = vfadd_vv_f32m2(vsum0, vsum1, vl); vfloat32m2_t vavg = vfmul_vf_f32m2(vsum, 0.25f, vl); vse32_v_f32m2(out_ptr, vavg, vl); w -= vl; out_ptr += vl; line0 += 2 * vl; line1 += 2 * vl; } //line0 += in_spatial_dim_3; //line1 += in_spatial_dim_3; } } input_data += in_spatial_dim_1 * in_spatial_dim_2 * in_spatial_dim_3; int32_t hw = out_spatial_dim_2 * out_spatial_dim_3; for (int32_t out_dim1{0}; out_dim1 < out_spatial_dim_1; out_dim1++) { const float_t *line0 = mid_data + out_dim1 * out_spatial_dim_2 * out_spatial_dim_3 * 2; const float_t *line1 = line0 + out_spatial_dim_2 * out_spatial_dim_3; for (int32_t i{0}; i < hw; i += vl) { vl = vsetvl_e32m2(hw - i); vfloat32m2_t vdim1_0 = vle32_v_f32m2(line0 + i, vl); vfloat32m2_t vdim1_1 = vle32_v_f32m2(line1 + i, vl); vfloat32m2_t vsum = vfadd_vv_f32m2(vdim1_0, vdim1_1, vl); vfloat32m2_t vavg = vfmul_vf_f32m2(vsum, 0.5f, vl); vse32_v_f32m2(output_data + i, vavg, vl); } //line0 += 2 * out_spatial_dim_2 * out_spatial_dim_3; //line1 += 2 * out_spatial_dim_2 * out_spatial_dim_3; output_data += out_spatial_dim_2 * out_spatial_dim_3; } mid_data += in_spatial_dim_1 * out_spatial_dim_2 * out_spatial_dim_3; } // channel loop // input_data += channels * in_spatial_dim_1 * in_spatial_dim_2 * in_spatial_dim_3; // mid_data += channels * in_spatial_dim_1 * out_spatial_dim_2 * out_spatial_dim_3; } // batch loop } // stride == kernel loop else { // kernel = 2 && stride = 1 size_t vl; float_t mid_data_list[batches][channels][in_spatial_dim_1][out_spatial_dim_2][out_spatial_dim_3]; float_t *mid_data = (float *)mid_data_list; for (int32_t batch = 0; batch < batches; ++batch) { for (int32_t channel = 0; channel < channels; ++channel) { for (int32_t in_dim1{0}; in_dim1 < in_spatial_dim_1; in_dim1++) { const float_t *line0 = input_data + in_dim1 * in_spatial_dim_2 * in_spatial_dim_3; const float_t *line1 = line0 + in_spatial_dim_3; float_t *out_ptr = mid_data + in_dim1 * out_spatial_dim_2 * out_spatial_dim_3; for (int32_t out_dim2{0}; out_dim2 < out_spatial_dim_2; out_dim2++) { int32_t w = out_spatial_dim_3; for (int32_t i{0}; i < w; i += vl) { vl = vsetvl_e32m2(w - i); vfloat32m2_t vline0_seg1 = vle32_v_f32m2(line0 + i, vl); vfloat32m2_t vline0_seg2 = vle32_v_f32m2(line0 + i + 1, vl); vfloat32m2_t vline1_seg1 = vle32_v_f32m2(line1 + i, vl); vfloat32m2_t vline1_seg2 = vle32_v_f32m2(line1 + i + 1, vl); ; vfloat32m2_t vsum0 = vfadd_vv_f32m2(vline0_seg1, vline0_seg2, vl); vfloat32m2_t vsum1 = vfadd_vv_f32m2(vline1_seg1, vline1_seg2, vl); vfloat32m2_t vsum = vfadd_vv_f32m2(vsum0, vsum1, vl); vfloat32m2_t vavg = vfmul_vf_f32m2(vsum, 0.25f, vl); vse32_v_f32m2(out_ptr + i, vavg, vl); } line0 += in_spatial_dim_3; line1 += in_spatial_dim_3; out_ptr += out_spatial_dim_3; } } input_data += in_spatial_dim_1 * in_spatial_dim_2 * in_spatial_dim_3;

资源文件列表:

gtest_op (2).zip 大约有236个文件

gtest_op/
gtest_op/layer/
gtest_op/layer/pooling_common.h 591.03KB
gtest_op/layer/normalize.cpp 12.56KB
gtest_op/layer/hz_rsqrt.cpp 3.24KB
gtest_op/layer/abs.cpp 1.12KB
gtest_op/layer/abs.h 1.29KB
gtest_op/layer/argmin.cpp.bk 21.37KB
gtest_op/layer/argmax.h 1.43KB
gtest_op/layer/argmin.h 1.44KB
gtest_op/layer/averagepool.cpp 226.08KB
gtest_op/layer/crelu.cpp 3.07KB
gtest_op/layer/averagepool.h 4.04KB
gtest_op/layer/axpy.cpp 2.36KB
gtest_op/layer/axpy.h 1.37KB
gtest_op/layer/batchnormalization.cpp 3.99KB
gtest_op/layer/batchnormalization.h 3.79KB
gtest_op/layer/bbox_decode.cpp 9.06KB
gtest_op/layer/bbox_decode.h 2.47KB
gtest_op/layer/bbox_to_roi.cpp 1.81KB
gtest_op/layer/bbox_to_roi.h 1.21KB
gtest_op/layer/cast.cpp 15.89KB
gtest_op/layer/cast.h 3.03KB
gtest_op/layer/ceil.cpp 2.13KB
gtest_op/layer/ceil.h 1.3KB
gtest_op/layer/clip.cpp 6.49KB
gtest_op/layer/clip.h 1.68KB
gtest_op/layer/concat.cpp 4.14KB
gtest_op/layer/concat.h 1.75KB
gtest_op/layer/constant.cpp 2.59KB
gtest_op/layer/constant.h 1.47KB
gtest_op/layer/const_of_shape.cpp 2.86KB
gtest_op/layer/const_of_shape.h 1.51KB
gtest_op/layer/conv.cpp.bk 21.12KB
gtest_op/layer/conv.h 3.29KB
gtest_op/layer/global_average_pool.cpp 3.36KB
gtest_op/layer/crelu.h 1.46KB
gtest_op/layer/crop.cpp 3.88KB
gtest_op/layer/crop.h 1.86KB
gtest_op/layer/cumsum.cpp 4.67KB
gtest_op/layer/cumsum.h 2.17KB
gtest_op/layer/deconvolution.cpp.bk 13.73KB
gtest_op/layer/deconvolution.h 3.4KB
gtest_op/layer/depth_to_space.cpp 4.93KB
gtest_op/layer/depth_to_space.h 1.77KB
gtest_op/layer/dequantize.cpp 23.48KB
gtest_op/layer/dequantize.h 2.41KB
gtest_op/layer/dequantize_linear.cpp 6.82KB
gtest_op/layer/dequantize_linear.h 1.59KB
gtest_op/layer/dropout.cpp 1.01KB
gtest_op/layer/dropout.h 1.34KB
gtest_op/layer/elementwise_binary_broadcast.cpp 41.1KB
gtest_op/layer/elementwise_binary_broadcast.h 2.32KB
gtest_op/layer/eltwise.cpp 28.6KB
gtest_op/layer/eltwise.h 2.03KB
gtest_op/layer/elu.cpp 1.62KB
gtest_op/layer/elu.h 1.44KB
gtest_op/layer/equal.cpp 13.32KB
gtest_op/layer/equal.h 1.38KB
gtest_op/layer/erf.cpp 2.02KB
gtest_op/layer/erf.h 1.28KB
gtest_op/layer/exp.cpp 1.36KB
gtest_op/layer/exp.h 1.29KB
gtest_op/layer/expand.cpp 6.95KB
gtest_op/layer/expand.h 1.41KB
gtest_op/layer/eyelike.cpp 4.24KB
gtest_op/layer/eyelike.h 1.49KB
gtest_op/layer/flatten.cpp 1.17KB
gtest_op/layer/flatten.h 1.45KB
gtest_op/layer/floor.cpp 1.16KB
gtest_op/layer/floor.h 1.31KB
gtest_op/layer/gather.cpp 4.81KB
gtest_op/layer/gather.h 1.52KB
gtest_op/layer/gather_elements.cpp 7.26KB
gtest_op/layer/gather_elements.h 1.81KB
gtest_op/layer/gather_nd.cpp 3.12KB
gtest_op/layer/gather_nd.h 1.42KB
gtest_op/layer/gemm.cpp.bk 4.89KB
gtest_op/layer/gemm.h 1.78KB
gtest_op/layer/global_lp_pool.cpp 6.96KB
gtest_op/layer/global_average_pool.h 1.46KB
gtest_op/layer/hz_channel_shuffle.cpp 6.09KB
gtest_op/layer/global_lp_pool.h 1.49KB
gtest_op/layer/global_max_pool.cpp 1.94KB
gtest_op/layer/global_max_pool.h 1.41KB
gtest_op/layer/graph.h 3.01KB
gtest_op/layer/grid_sample.cpp 8.09KB
gtest_op/layer/grid_sample.h 1.62KB
gtest_op/layer/gru.cpp.bk 18.47KB
gtest_op/layer/gru.h 4.82KB
gtest_op/layer/hardsigmoid.cpp 1.67KB
gtest_op/layer/hardsigmoid.h 1.59KB
gtest_op/layer/hardswish.cpp 1.28KB
gtest_op/layer/hardswish.h 1.33KB
gtest_op/layer/hb_dnn_ndarray.cpp 11.8KB
gtest_op/layer/horizon_layer.cpp 995B
gtest_op/layer/horizon_layer.h 1.62KB
gtest_op/layer/hz_channel_shuffle.h 1.57KB
gtest_op/layer/hz_resize.cpp 9.86KB
gtest_op/layer/hz_resize.h 1.77KB
gtest_op/layer/instance_normalization.cpp 6.66KB
gtest_op/layer/hz_rsqrt.h 1.57KB
gtest_op/layer/hz_softmax.cpp 3.89KB
gtest_op/layer/hz_softmax.h 1.48KB
gtest_op/layer/identity.cpp 1.05KB
gtest_op/layer/identity.h 1.35KB
gtest_op/layer/neg.cpp 8.71KB
gtest_op/layer/instance_normalization.h 2.39KB
gtest_op/layer/layer_common.cpp 1.12KB
gtest_op/layer/layer_common.h 2.96KB
gtest_op/layer/layer_normalization.cpp 4.83KB
gtest_op/layer/layer_normalization.h 1.65KB
gtest_op/layer/leakyrelu.cpp 1.43KB
gtest_op/layer/leakyrelu.h 1.49KB
gtest_op/layer/log.cpp 1.15KB
gtest_op/layer/log.h 1.29KB
gtest_op/layer/log_softmax.cpp 2.75KB
gtest_op/layer/log_softmax.h 1.49KB
gtest_op/layer/lp_normalization.cpp 3.57KB
gtest_op/layer/lp_normalization.h 1.52KB
gtest_op/layer/lp_pool.cpp 3.12KB
gtest_op/layer/lp_pool.h 1.51KB
gtest_op/layer/lrn.cpp 5.92KB
gtest_op/layer/lrn.h 2.48KB
gtest_op/layer/lstm.cpp.bk 13.66KB
gtest_op/layer/lstm.h 3.71KB
gtest_op/layer/matmul.cpp.bk 2.46KB
gtest_op/layer/matmul.h 1.38KB
gtest_op/layer/matmul_helper.h 14.29KB
gtest_op/layer/maxpool.cpp 10.66KB
gtest_op/layer/maxpool.h 1.85KB
gtest_op/layer/maxunpool.cpp 2.32KB
gtest_op/layer/maxunpool.h 1.87KB
gtest_op/layer/mvn.cpp 3.22KB
gtest_op/layer/mvn.h 1.51KB
gtest_op/layer/neg.h 1.29KB
gtest_op/layer/nms.cpp 6.21KB
gtest_op/layer/nms.h 1.63KB
gtest_op/layer/nonzero.cpp 3KB
gtest_op/layer/nonzero.h 1.31KB
gtest_op/layer/normalize.h 2.06KB
gtest_op/layer/onehot.cpp 8.2KB
gtest_op/layer/onehot.h 1.47KB
gtest_op/layer/pad.cpp 15.63KB
gtest_op/layer/pad.h 2.35KB
gtest_op/layer/power.cpp 1.37KB
gtest_op/layer/power.h 1.41KB
gtest_op/layer/prelu.cpp 8.93KB
gtest_op/layer/prelu.h 1.41KB
gtest_op/layer/psroi_pooling.cpp 7.01KB
gtest_op/layer/psroi_pooling.h 2.57KB
gtest_op/layer/quantize_linear.cpp 7.71KB
gtest_op/layer/quantize_linear.h 1.57KB
gtest_op/layer/randomuniform.cpp 3.49KB
gtest_op/layer/randomuniform.h 1.59KB
gtest_op/layer/randomuniformlike.cpp 3.71KB
gtest_op/layer/randomuniformlike.h 1.67KB
gtest_op/layer/range.cpp 2.92KB
gtest_op/layer/range.h 1.38KB
gtest_op/layer/reducel1.cpp 7.53KB
gtest_op/layer/reducel1.h 1.46KB
gtest_op/layer/reducel2.cpp 7.78KB
gtest_op/layer/reducel2.h 1.46KB
gtest_op/layer/reducelogsumexp.cpp 7.03KB
gtest_op/layer/reducelogsumexp.h 1.54KB
gtest_op/layer/reduction.cpp 34.95KB
gtest_op/layer/reduction.h 1.65KB
gtest_op/layer/relu.cpp 1.19KB
gtest_op/layer/relu.h 1.28KB
gtest_op/layer/relux.cpp 1.46KB
gtest_op/layer/relux.h 1.45KB
gtest_op/layer/reshape.cpp 1.36KB
gtest_op/layer/reshape.h 1.28KB
gtest_op/layer/reverse_sequence.cpp 9.93KB
gtest_op/layer/reverse_sequence.h 1.66KB
gtest_op/layer/rnn.cpp 6.4KB
gtest_op/layer/rnn.h 2.6KB
gtest_op/layer/roialign.cpp 13.66KB
gtest_op/layer/roialign.h 1.69KB
gtest_op/layer/roi_decode.cpp 6.29KB
gtest_op/layer/roi_decode.h 2.17KB
gtest_op/layer/roi_pooling.cpp 5.37KB
gtest_op/layer/roi_pooling.h 1.94KB
gtest_op/layer/selu.cpp 1.63KB
gtest_op/layer/selu.h 1.42KB
gtest_op/layer/shape.cpp 1.13KB
gtest_op/layer/shape.h 1.29KB
gtest_op/layer/sigmoid.cpp 1.2KB
gtest_op/layer/sigmoid.h 1.31KB
gtest_op/layer/sign.cpp 1.23KB
gtest_op/layer/sign.h 1.28KB
gtest_op/layer/slice.cpp 17.52KB
gtest_op/layer/slice.h 1.35KB
gtest_op/layer/softmax.cpp 2.28KB
gtest_op/layer/softmax.h 1.45KB
gtest_op/layer/softplus.cpp 1.47KB
gtest_op/layer/softplus.h 1.34KB
gtest_op/layer/softsign.cpp 1.18KB
gtest_op/layer/softsign.h 1.32KB
gtest_op/layer/space_to_depth.cpp 2.65KB
gtest_op/layer/space_to_depth.h 1.56KB
gtest_op/layer/split.cpp 4.09KB
gtest_op/layer/split.h 1.86KB
gtest_op/layer/squeeze.cpp 1.04KB
gtest_op/layer/squeeze.h 1.33KB
gtest_op/layer/tanh.cpp 1.46KB
gtest_op/layer/tanh.h 1.27KB
gtest_op/layer/thresholdedrelu.cpp 1.46KB
gtest_op/layer/thresholdedrelu.h 1.54KB
gtest_op/layer/tile.cpp 7.57KB
gtest_op/layer/tile.h 1.4KB
gtest_op/layer/topk.cpp 6.69KB
gtest_op/layer/topk.h 4.21KB
gtest_op/layer/transpose.cpp 16.77KB
gtest_op/layer/transpose.h 1.61KB
gtest_op/layer/unsqueeze.cpp 1.06KB
gtest_op/layer/unsqueeze.h 1.35KB
gtest_op/layer/upsample.cpp 23.96KB
gtest_op/layer/upsample.h 3.8KB
gtest_op/layer/where.cpp 8.74KB
gtest_op/layer/where.h 1.39KB
gtest_op/layer/argmax.cpp.bk 21.84KB
gtest_op/test_op/
gtest_op/test_op/test_averagepool.cpp 74.04KB
gtest_op/test_op/test_global_lp_pool.cpp 10.62KB
gtest_op/test_op/test_normalize.cpp 3.21KB
gtest_op/test_op/test_hz_rsqrt.cpp 7.49KB
gtest_op/test_op/test_crelu.cpp 5.34KB
gtest_op/test_op/test_hz_channel_shuffle.cpp 4.93KB
gtest_op/test_op/test_global_average_pool.cpp 2.99KB
gtest_op/test_op/test_neg.cpp 14.4KB
gtest_op/test_op/test_instance_normalization.cpp 3.68KB
gtest_op/test_op/main.cpp 599B
gtest_op/test_op/test_op.h 804B
gtest_op/test_op/test_argmax.cpp.bk 9.68KB
gtest_op/test_op/test_argmin.cpp.bk 9.64KB