Flame Graph
Reset Zoom
Search
ic
mkl_blas_avx512_sgemm_kernel_nocopy_NT_b0 (261 samples, 0.01%)
__mprotect (1,368 samples, 0.05%)
perf_iterate_sb (422 samples, 0.02%)
__do_global_dtors_aux (439 samples, 0.02%)
__do_sys_clone (35,308 samples, 1.41%)
entry_SYSCALL_64_after_hwframe (346 samples, 0.01%)
pthread_create@@GLIBC_2.2.5 (2,454 samples, 0.10%)
exc_page_fault (942 samples, 0.04%)
do_syscall_64 (220 samples, 0.01%)
[unknown] (14,134 samples, 0.57%)
native_flush_tlb_others (3,969 samples, 0.16%)
[unknown] (20,922 samples, 0.84%)
__alloc_pages_nodemask (424 samples, 0.02%)
__x64_sys_futex (332 samples, 0.01%)
do_user_addr_fault (732 samples, 0.03%)
flush_tlb_mm_range (37,357 samples, 1.50%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
__vm_munmap (1,670 samples, 0.07%)
__handle_mm_fault (1,198 samples, 0.05%)
[unknown] (20,928 samples, 0.84%)
[unknown] (3,794 samples, 0.15%)
__do_global_dtors_aux (439 samples, 0.02%)
std::_Destroy_aux<false>::__destroy<at::Tensor*> (398 samples, 0.02%)
asm_sysvec_apic_timer_interrupt (929 samples, 0.04%)
[libgomp-a34b3233.so.1] (1,294,659 samples, 51.88%)
[libgomp-a34b3233.so.1]
do_syscall_64 (337 samples, 0.01%)
torch::data::transforms::Stack<torch::data::Example<at::Tensor, at::Tensor> >::apply_batch (583 samples, 0.02%)
__do_munmap (219 samples, 0.01%)
torch::autograd::Engine::thread_main (748 samples, 0.03%)
_raw_spin_lock (2,260 samples, 0.09%)
do_syscall_64 (375 samples, 0.02%)
do_syscall_64 (509 samples, 0.02%)
mkl_blas_avx512_sgemm_kernel_nocopy_TN_b1 (35,045 samples, 1.40%)
perf_swevent_init_hrtimer (386 samples, 0.02%)
[unknown] (20,928 samples, 0.84%)
tlb_flush_mmu (703 samples, 0.03%)
__vm_munmap (266 samples, 0.01%)
kernel_clone (35,215 samples, 1.41%)
update_blocked_averages (310 samples, 0.01%)
wake_up_q (322 samples, 0.01%)
[unknown] (20,928 samples, 0.84%)
std::_Destroy<at::Tensor> (267 samples, 0.01%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
mm_cleanup_thread_cbk (986 samples, 0.04%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
zap_pte_range.isra.0 (733 samples, 0.03%)
wake_up_q (235 samples, 0.01%)
try_to_wake_up (495 samples, 0.02%)
__x64_sys_mprotect (218 samples, 0.01%)
[unknown] (20,929 samples, 0.84%)
do_syscall_64 (315 samples, 0.01%)
[unknown] (20,928 samples, 0.84%)
mem_cgroup_charge (237 samples, 0.01%)
__munmap (1,670 samples, 0.07%)
stbiw__encode_png_line (1,236 samples, 0.05%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
change_p4d_range (318 samples, 0.01%)
__init_waitqueue_head (967 samples, 0.04%)
schedule (218 samples, 0.01%)
do_syscall_64 (560 samples, 0.02%)
unmap_region (343 samples, 0.01%)
torch::autograd::VariableType::(anonymous namespace)::stack (212 samples, 0.01%)
native_queued_spin_lock_slowpath (2,260 samples, 0.09%)
do_futex (463 samples, 0.02%)
__do_global_dtors_aux (439 samples, 0.02%)
mkl_blas_avx512_sgemm_scopy_down48_ea (6,723 samples, 0.27%)
get_futex_key (331 samples, 0.01%)
[unknown] (10,212 samples, 0.41%)
tlb_flush_mmu (38,232 samples, 1.53%)
flush_tlb_mm_range (1,325 samples, 0.05%)
mem_cgroup_charge (394 samples, 0.02%)
[unknown] (20,940 samples, 0.84%)
[unknown] (20,928 samples, 0.84%)
__x64_sys_munmap (977 samples, 0.04%)
__x64_sys_madvise (2,964 samples, 0.12%)
__x64_sys_exit (563 samples, 0.02%)
__libc_start_main (6,564 samples, 0.26%)
__do_munmap (454 samples, 0.02%)
entry_SYSCALL_64_after_hwframe (344 samples, 0.01%)
asm_call_sysvec_on_stack (874 samples, 0.04%)
prep_compound_page (227 samples, 0.01%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
lru_add_drain (594 samples, 0.02%)
__do_global_dtors_aux (439 samples, 0.02%)
__do_global_dtors_aux (439 samples, 0.02%)
___slab_alloc (6,132 samples, 0.25%)
perf_iterate_ctx (384 samples, 0.02%)
[unknown] (20,928 samples, 0.84%)
copy_process (30,555 samples, 1.22%)
do_anonymous_page (2,681 samples, 0.11%)
__do_munmap (870 samples, 0.03%)
__lll_lock_wait_private (344 samples, 0.01%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
__do_global_dtors_aux (439 samples, 0.02%)
[unknown] (1,715 samples, 0.07%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
c10::function_ref<void (18,714 samples, 0.75%)
__x2apic_send_IPI_mask (1,943 samples, 0.08%)
asm_exc_page_fault (825 samples, 0.03%)
omp_get_num_threads (267 samples, 0.01%)
mkl_vml_kernel_sSqrt_Z0HAynn (19,046 samples, 0.76%)
do_syscall_64 (344 samples, 0.01%)
_raw_spin_lock (820 samples, 0.03%)
__x64_sys_munmap (266 samples, 0.01%)
[unknown] (20,970 samples, 0.84%)
__alloc_pages_nodemask (434 samples, 0.02%)
unlink_chunk.isra.0 (580 samples, 0.02%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
__do_munmap (344 samples, 0.01%)
__do_global_dtors_aux (439 samples, 0.02%)
__do_global_dtors_aux (439 samples, 0.02%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
alloc_pid (354 samples, 0.01%)
memset (2,420 samples, 0.10%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
[unknown] (20,908 samples, 0.84%)
do_syscall_64 (247 samples, 0.01%)
[unknown] (20,931 samples, 0.84%)
malloc_consolidate (988 samples, 0.04%)
[unknown] (20,928 samples, 0.84%)
mkl_blas_avx512_sgemm_kernel_nocopy_TN_b0 (282 samples, 0.01%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
free_pages_and_swap_cache (778 samples, 0.03%)
do_user_addr_fault (1,555 samples, 0.06%)
uncharge_batch (242 samples, 0.01%)
[unknown] (20,962 samples, 0.84%)
x2apic_send_IPI_mask (1,944 samples, 0.08%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
__do_global_dtors_aux (439 samples, 0.02%)
[unknown] (7,073 samples, 0.28%)
entry_SYSCALL_64_after_hwframe (3,907 samples, 0.16%)
asm_exc_page_fault (2,705 samples, 0.11%)
__do_global_dtors_aux (439 samples, 0.02%)
__alloc_pages_nodemask (259 samples, 0.01%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
[unknown] (21,132 samples, 0.85%)
asm_exc_page_fault (2,117 samples, 0.08%)
sysvec_apic_timer_interrupt (2,238 samples, 0.09%)
exc_page_fault (580 samples, 0.02%)
handle_mm_fault (1,250 samples, 0.05%)
zap_pte_range.isra.0 (608 samples, 0.02%)
[unknown] (3,264 samples, 0.13%)
__free_tcb (540 samples, 0.02%)
memmove (1,096 samples, 0.04%)
mkl_blas_sgemm (1,326 samples, 0.05%)
[unknown] (20,984 samples, 0.84%)
c10::TensorImpl::~TensorImpl (223 samples, 0.01%)
futex_wake (1,907 samples, 0.08%)
torch::data::detail::ValidIterator<torch::data::Example<at::Tensor, at::Tensor> >::next (1,251 samples, 0.05%)
[unknown] (20,981 samples, 0.84%)
zap_pte_range.isra.0 (718 samples, 0.03%)
[unknown] (20,928 samples, 0.84%)
do_anonymous_page (299 samples, 0.01%)
unmap_page_range (1,228 samples, 0.05%)
x2apic_send_IPI_mask (666 samples, 0.03%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
unmap_vmas (725 samples, 0.03%)
asm_exc_page_fault (3,124 samples, 0.13%)
__free_tcb (1,908 samples, 0.08%)
__munmap (2,132 samples, 0.09%)
__do_global_dtors_aux (439 samples, 0.02%)
do_syscall_64 (1,670 samples, 0.07%)
mm_account_ptr_by_tid (57,109 samples, 2.29%)
m..
unmap_region (952 samples, 0.04%)
__do_global_dtors_aux (439 samples, 0.02%)
__do_global_dtors_aux (439 samples, 0.02%)
free_pages_and_swap_cache (409 samples, 0.02%)
get_page_from_freelist (325 samples, 0.01%)
__x64_sys_futex (849 samples, 0.03%)
[unknown] (20,928 samples, 0.84%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
kfree (221 samples, 0.01%)
flush_tlb_mm_range (3,969 samples, 0.16%)
variational-aut (2,495,587 samples, 100.00%)
variational-aut
torch::autograd::DifferentiableViewMeta::DifferentiableViewMeta (730 samples, 0.03%)
arena_get2.part.0 (862 samples, 0.03%)
__do_global_dtors_aux (439 samples, 0.02%)
futex_wait (849 samples, 0.03%)
__x2apic_send_IPI_mask (666 samples, 0.03%)
do_mmap (559 samples, 0.02%)
tcache_init.part.0 (492 samples, 0.02%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
at::native::unsqueeze (235 samples, 0.01%)
__split_vma (421 samples, 0.02%)
rcu_core (1,337 samples, 0.05%)
get_page_from_freelist (294 samples, 0.01%)
find_vma (370 samples, 0.01%)
vssqrt_cout_rare (3,444 samples, 0.14%)
__do_global_dtors_aux (439 samples, 0.02%)
do_syscall_64 (849 samples, 0.03%)
[unknown] (20,929 samples, 0.84%)
entry_SYSCALL_64_after_hwframe (374 samples, 0.01%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
__munmap (4,037 samples, 0.16%)
rcu_core (557 samples, 0.02%)
__x64_sys_futex (315 samples, 0.01%)
alloc_vmap_area (1,029 samples, 0.04%)
native_send_call_func_ipi (667 samples, 0.03%)
__vm_munmap (344 samples, 0.01%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
handle_mm_fault (3,145 samples, 0.13%)
[unknown] (20,928 samples, 0.84%)
[unknown] (20,929 samples, 0.84%)
mm_release (479 samples, 0.02%)
free_event_rcu (815 samples, 0.03%)
__x64_sys_munmap (344 samples, 0.01%)
[unknown] (20,936 samples, 0.84%)
mem_cgroup_uncharge_list (299 samples, 0.01%)
kmem_cache_alloc_node (1,052 samples, 0.04%)
do_futex (377 samples, 0.02%)
[unknown] (20,990 samples, 0.84%)
pte_alloc_one (278 samples, 0.01%)
omp_simple_3d.omp_fn.2 (38,699 samples, 1.55%)
std::_Destroy<at::Tensor*> (292 samples, 0.01%)
[unknown] (20,925 samples, 0.84%)
_int_malloc (2,687 samples, 0.11%)
entry_SYSCALL_64_after_hwframe (537 samples, 0.02%)
clear_page_erms (4,689 samples, 0.19%)
__do_global_dtors_aux (439 samples, 0.02%)
__sched_text_start (216 samples, 0.01%)
GOMP_parallel (7,273 samples, 0.29%)
c10::VariableVersion::VersionCounter::~VersionCounter (291 samples, 0.01%)
c10::intrusive_ptr<c10::TensorImpl, c10::UndefinedTensorImpl>::~intrusive_ptr (364 samples, 0.01%)
[unknown] (20,932 samples, 0.84%)
[unknown] (20,897 samples, 0.84%)
do_syscall_64 (2,980 samples, 0.12%)
[unknown] (20,928 samples, 0.84%)
[unknown] (20,943 samples, 0.84%)
std::function<c10::optional<torch::data::Example<at::Tensor, at::Tensor> > (1,227 samples, 0.05%)
get_page_from_freelist (2,054 samples, 0.08%)
torch::data::Iterator<torch::data::Example<at::Tensor, at::Tensor> >::operator++ (1,259 samples, 0.05%)
do_mprotect_pkey (1,143 samples, 0.05%)
hrtimer_init (350 samples, 0.01%)
[unknown] (20,932 samples, 0.84%)
[unknown] (20,928 samples, 0.84%)
up_read (312 samples, 0.01%)
stbi_write_png_to_mem (4,682 samples, 0.19%)
on_each_cpu_cond_mask (2,535 samples, 0.10%)
[unknown] (4,005 samples, 0.16%)
_raw_spin_lock (296 samples, 0.01%)
c10::impl::wrap_kernel_functor_unboxed_<c10::impl::detail::WrapFunctionIntoFunctor_<c10::CompileTimeFunctionPointer<at::Tensor (932 samples, 0.04%)
run_rebalance_domains (344 samples, 0.01%)
all (2,495,587 samples, 100%)
[unknown] (20,997 samples, 0.84%)
do_anonymous_page (1,080 samples, 0.04%)
handle_mm_fault (565 samples, 0.02%)
futex_wait (302 samples, 0.01%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
[unknown] (679 samples, 0.03%)
vm_area_dup (228 samples, 0.01%)
[unknown] (20,941 samples, 0.84%)
__handle_mm_fault (341 samples, 0.01%)
entry_SYSCALL_64_after_hwframe (35,452 samples, 1.42%)
[unknown] (20,993 samples, 0.84%)
__do_global_dtors_aux (439 samples, 0.02%)
mkl_blas_avx512_sgemm_get_bufs (668 samples, 0.03%)
mkl_blas_avx512_xsgemm_nocopy_driver (298 samples, 0.01%)
do_exit (488 samples, 0.02%)
do_anonymous_page (262 samples, 0.01%)
alloc_pages_vma (233 samples, 0.01%)
__do_global_dtors_aux (439 samples, 0.02%)
__x64_sys_brk (245 samples, 0.01%)
native_send_call_func_ipi (1,723 samples, 0.07%)
at::native::(anonymous namespace)::vectorized_outer_sum<float> (584 samples, 0.02%)
native_queued_spin_lock_slowpath (288 samples, 0.01%)
rcu_core_si (576 samples, 0.02%)
unmap_single_vma (725 samples, 0.03%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
[unknown] (21,149 samples, 0.85%)
entry_SYSCALL_64_after_hwframe (4,037 samples, 0.16%)
__memcg_kmem_charge_page (342 samples, 0.01%)
c10::intrusive_ptr<c10::TensorImpl, c10::UndefinedTensorImpl>::reset_ (624 samples, 0.03%)
mutex_lock (387 samples, 0.02%)
release_pages (742 samples, 0.03%)
memcpy_erms (261 samples, 0.01%)
do_syscall_64 (1,907 samples, 0.08%)
at::native::copy_ (622 samples, 0.02%)
get_user_pages_fast (263 samples, 0.01%)
tlb_flush_mmu (3,982 samples, 0.16%)
logf@plt (932 samples, 0.04%)
native_send_call_func_ipi (1,946 samples, 0.08%)
mem_cgroup_charge (232 samples, 0.01%)
[unknown] (20,934 samples, 0.84%)
tlb_finish_mmu (1,772 samples, 0.07%)
unmap_single_vma (802 samples, 0.03%)
exc_page_fault (955 samples, 0.04%)
futex_wake (358 samples, 0.01%)
prep_new_page (256 samples, 0.01%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
tlb_finish_mmu (2,011 samples, 0.08%)
__handle_mm_fault (1,322 samples, 0.05%)
std::_Destroy<at::Tensor*, at::Tensor> (404 samples, 0.02%)
wake_up_q (292 samples, 0.01%)
futex_wake (275 samples, 0.01%)
[unknown] (1,735,005 samples, 69.52%)
[unknown]
stbiw__paeth (518 samples, 0.02%)
try_to_wake_up (226 samples, 0.01%)
do_syscall_64 (977 samples, 0.04%)
__vm_munmap (977 samples, 0.04%)
internal_get_user_pages_fast (259 samples, 0.01%)
do_syscall_64 (344 samples, 0.01%)
__do_global_dtors_aux (439 samples, 0.02%)
[unknown] (20,928 samples, 0.84%)
__do_global_dtors_aux (439 samples, 0.02%)
futex_wait (441 samples, 0.02%)
__do_global_dtors_aux (439 samples, 0.02%)
torch::autograd::Engine::evaluate_function (318 samples, 0.01%)
at::native::copy_impl (261 samples, 0.01%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
entry_SYSCALL_64_after_hwframe (849 samples, 0.03%)
bacct_add_tsk (244 samples, 0.01%)
perf_event_task (433 samples, 0.02%)
[unknown] (20,928 samples, 0.84%)
tlb_flush_mmu (1,750 samples, 0.07%)
stbi_write_png (4,684 samples, 0.19%)
irq_exit_rcu (2,124 samples, 0.09%)
do_syscall_64 (4,037 samples, 0.16%)
c10::impl::wrap_kernel_functor_unboxed_<c10::impl::detail::WrapFunctionIntoFunctor_<c10::CompileTimeFunctionPointer<at::Tensor (758 samples, 0.03%)
[unknown] (20,753 samples, 0.83%)
asm_exc_page_fault (1,565 samples, 0.06%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
[unknown] (21,028 samples, 0.84%)
x2apic_send_IPI_mask (1,723 samples, 0.07%)
do_futex (295 samples, 0.01%)
[unknown] (16,268 samples, 0.65%)
__lock_text_start (283 samples, 0.01%)
[libgomp-a34b3233.so.1] (871 samples, 0.03%)
__munmap (344 samples, 0.01%)
__sched_text_start (315 samples, 0.01%)
[unknown] (20,937 samples, 0.84%)
unmap_page_range (798 samples, 0.03%)
do_madvise (2,958 samples, 0.12%)
do_futex (214 samples, 0.01%)
__mutex_init (229 samples, 0.01%)
do_syscall_64 (35,428 samples, 1.42%)
[unknown] (20,931 samples, 0.84%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
exc_page_fault (1,213 samples, 0.05%)
handle_mm_fault (713 samples, 0.03%)
release_pages (389 samples, 0.02%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
native_queued_spin_lock_slowpath (820 samples, 0.03%)
down_read_trylock (301 samples, 0.01%)
[unknown] (178,415 samples, 7.15%)
[unknown]
__lock_text_start (318 samples, 0.01%)
smp_call_function_many (33,382 samples, 1.34%)
change_pte_range (216 samples, 0.01%)
[unknown] (20,947 samples, 0.84%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
__alloc_skb (403 samples, 0.02%)
do_syscall_64 (3,876 samples, 0.16%)
futex_wait_setup (820 samples, 0.03%)
mprotect_fixup (1,007 samples, 0.04%)
__slab_alloc (545 samples, 0.02%)
exc_page_fault (750 samples, 0.03%)
do_user_addr_fault (1,170 samples, 0.05%)
at::parallel_for<at::TensorIteratorBase::for_each (2,007 samples, 0.08%)
[unknown] (20,936 samples, 0.84%)
__x2apic_send_IPI_mask (2,246 samples, 0.09%)
unmap_page_range (725 samples, 0.03%)
logf@GLIBC_2.2.5 (4,167 samples, 0.17%)
sysmalloc (867 samples, 0.03%)
tlb_finish_mmu (698 samples, 0.03%)
asm_sysvec_apic_timer_interrupt (2,238 samples, 0.09%)
do_syscall_64 (316 samples, 0.01%)
__do_global_dtors_aux (439 samples, 0.02%)
handle_mm_fault (376 samples, 0.02%)
smp_call_function_many (1,136 samples, 0.05%)
finish_task_switch (315 samples, 0.01%)
[unknown] (20,932 samples, 0.84%)
torch::data::DataLoaderBase<torch::data::datasets::MapDataset<torch::data::datasets::MNIST, torch::data::transforms::Stack<torch::data::Example<at::Tensor, at::Tensor> > >, torch::data::Example<at::Tensor, at::Tensor>, std::vector<unsigned long, std::allocator<unsigned long> > >::begin (1,218 samples, 0.05%)
[unknown] (6,064 samples, 0.24%)
[unknown] (247 samples, 0.01%)
unmap_region (989 samples, 0.04%)
native_send_call_func_ipi (9,658 samples, 0.39%)
__do_global_dtors_aux (439 samples, 0.02%)
unmap_single_vma (1,255 samples, 0.05%)
__do_global_dtors_aux (439 samples, 0.02%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
[unknown] (20,955 samples, 0.84%)
tlb_flush_mmu (261 samples, 0.01%)
torch::data::datasets::MapDataset<torch::data::datasets::MNIST, torch::data::transforms::Stack<torch::data::Example<at::Tensor, at::Tensor> > >::get_batch_impl<torch::data::datasets::MNIST, void> (1,071 samples, 0.04%)
__handle_mm_fault (710 samples, 0.03%)
mkl_serv_inspector_unsuppress (434 samples, 0.02%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
[unknown] (2,990 samples, 0.12%)
entry_SYSCALL_64_after_hwframe (42,359 samples, 1.70%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
find_vma_prev (388 samples, 0.02%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
__do_global_dtors_aux (439 samples, 0.02%)
[unknown] (20,911 samples, 0.84%)
[unknown] (20,929 samples, 0.84%)
at::TensorIteratorBase::get_data_ptrs (266 samples, 0.01%)
__munmap (977 samples, 0.04%)
do_madvise.part.0 (41,985 samples, 1.68%)
[unknown] (20,934 samples, 0.84%)
entry_SYSCALL_64_after_hwframe (220 samples, 0.01%)
downgrade_write (495 samples, 0.02%)
[unknown] (20,961 samples, 0.84%)
at::TensorIteratorBase::compute_types (245 samples, 0.01%)
down_read_trylock (335 samples, 0.01%)
[unknown] (20,938 samples, 0.84%)
do_user_addr_fault (916 samples, 0.04%)
sched_fork (295 samples, 0.01%)
[unknown] (20,928 samples, 0.84%)
llist_add_batch (2,432 samples, 0.10%)
handle_mm_fault (1,188 samples, 0.05%)
finish_task_switch (214 samples, 0.01%)
at::TensorIteratorBase::serial_for_each (243 samples, 0.01%)
main (6,445 samples, 0.26%)
__do_global_dtors_aux (439 samples, 0.02%)
[unknown] (20,932 samples, 0.84%)
clear_page_erms (934 samples, 0.04%)
exc_page_fault (3,527 samples, 0.14%)
__alloc_pages_nodemask (222 samples, 0.01%)
mkl_vml_serv_threader_s_1i_1o (6,901 samples, 0.28%)
smp_call_function_many (753 samples, 0.03%)
__pagevec_lru_add (541 samples, 0.02%)
entry_SYSCALL_64_after_hwframe (214 samples, 0.01%)
mkl_blas_avx512_sgemm_kernel_nocopy_NN_b0 (53,688 samples, 2.15%)
m..
handle_mm_fault (750 samples, 0.03%)
[unknown] (21,047 samples, 0.84%)
at::TensorIteratorBase::get_strides (1,269 samples, 0.05%)
entry_SYSCALL_64_after_hwframe (315 samples, 0.01%)
do_mprotect_pkey (217 samples, 0.01%)
__lll_lock_wait_private (849 samples, 0.03%)
torch::data::DataLoaderBase<torch::data::datasets::MapDataset<torch::data::datasets::MNIST, torch::data::transforms::Stack<torch::data::Example<at::Tensor, at::Tensor> > >, torch::data::Example<at::Tensor, at::Tensor>, std::vector<unsigned long, std::allocator<unsigned long> > >::next (1,212 samples, 0.05%)
__do_global_dtors_aux (439 samples, 0.02%)
mem_cgroup_charge (385 samples, 0.02%)
release_pages (218 samples, 0.01%)
at::Tensor::~Tensor (261 samples, 0.01%)
__do_global_dtors_aux (439 samples, 0.02%)
_raw_spin_lock (379 samples, 0.02%)
syscall_enter_from_user_mode (343 samples, 0.01%)
__do_global_dtors_aux (439 samples, 0.02%)
tlb_finish_mmu (3,982 samples, 0.16%)
native_flush_tlb_others (1,289 samples, 0.05%)
entry_SYSCALL_64_after_hwframe (340 samples, 0.01%)
__do_global_dtors_aux (439 samples, 0.02%)
__x64_sys_futex (1,907 samples, 0.08%)
[unknown] (20,928 samples, 0.84%)
cpu_clock_event_init (407 samples, 0.02%)
prep_new_page (693 samples, 0.03%)
at::native::(anonymous namespace)::vectorized_loop<at::native::(anonymous namespace)::sigmoid_backward_kernel (982 samples, 0.04%)
perf_event_alloc (14,385 samples, 0.58%)
free_pages_and_swap_cache (627 samples, 0.03%)
__nptl_deallocate_tsd.part.0 (1,660 samples, 0.07%)
sysvec_apic_timer_interrupt (929 samples, 0.04%)
[unknown] (20,931 samples, 0.84%)
__do_global_dtors_aux (439 samples, 0.02%)
tlb_finish_mmu (706 samples, 0.03%)
smp_call_function_many_cond (895 samples, 0.04%)
do_futex (425 samples, 0.02%)
__alloc_pages_nodemask (346 samples, 0.01%)
__free_tcb (214 samples, 0.01%)
__lock_text_start (287 samples, 0.01%)
lru_add_drain_cpu (581 samples, 0.02%)
tlb_finish_mmu (38,630 samples, 1.55%)
stbiw__zlib_countm (1,475 samples, 0.06%)
torch::data::datasets::MapDataset<torch::data::datasets::MNIST, torch::data::transforms::Stack<torch::data::Example<at::Tensor, at::Tensor> > >::get_batch (1,075 samples, 0.04%)
do_futex (312 samples, 0.01%)
__do_global_dtors_aux (439 samples, 0.02%)
__alloc_pages_nodemask (2,554 samples, 0.10%)
__lock_text_start (1,776 samples, 0.07%)
smp_call_function_many_cond (2,378 samples, 0.10%)
do_syscall_64 (334 samples, 0.01%)
stbi_zlib_compress (2,740 samples, 0.11%)
zap_page_range (40,888 samples, 1.64%)
__handle_mm_fault (1,024 samples, 0.04%)
__x64_sys_futex (377 samples, 0.02%)
fill_stats (494 samples, 0.02%)
c10::function_ref<void (1,409 samples, 0.06%)
prepare_reply (462 samples, 0.02%)
__x64_sys_futex (297 samples, 0.01%)
asm_exc_page_fault (1,213 samples, 0.05%)
__do_global_dtors_aux (439 samples, 0.02%)
entry_SYSCALL_64_after_hwframe (870 samples, 0.03%)
at::Tensor::~Tensor (376 samples, 0.02%)
unmap_region (4,037 samples, 0.16%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
at::native::(anonymous namespace)::vectorized_loop<at::native::(anonymous namespace)::mul_kernel (44,338 samples, 1.78%)
rwsem_wake.isra.0 (309 samples, 0.01%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
[unknown] (20,928 samples, 0.84%)
alloc_pages_vma (403 samples, 0.02%)
exc_page_fault (2,117 samples, 0.08%)
_raw_spin_lock (298 samples, 0.01%)
__do_global_dtors_aux (439 samples, 0.02%)
try_to_wake_up (1,907 samples, 0.08%)
do_syscall_64 (1,221 samples, 0.05%)
__lll_lock_wait_private (949 samples, 0.04%)
__lock_text_start (349 samples, 0.01%)
down_read (265 samples, 0.01%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
[unknown] (16,715 samples, 0.67%)
__do_global_dtors_aux (439 samples, 0.02%)
__munmap (1,388 samples, 0.06%)
[unknown] (20,928 samples, 0.84%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
[unknown] (21,287 samples, 0.85%)
mm_account_ptr_by_tid..0 (372 samples, 0.01%)
handle_mm_fault (729 samples, 0.03%)
do_futex (344 samples, 0.01%)
mmap_region (332 samples, 0.01%)
at::native::select (336 samples, 0.01%)
uncharge_batch (218 samples, 0.01%)
free_pgtables (213 samples, 0.01%)
clear_page_erms (347 samples, 0.01%)
[unknown] (20,928 samples, 0.84%)
[unknown] (20,893 samples, 0.84%)
[unknown] (20,932 samples, 0.84%)
c10::UndefinedTensorImpl::~UndefinedTensorImpl (3,098 samples, 0.12%)
__do_global_dtors_aux (439 samples, 0.02%)
[unknown] (20,928 samples, 0.84%)
[unknown] (20,923 samples, 0.84%)
torch::data::datasets::Dataset<torch::data::datasets::MNIST, torch::data::Example<at::Tensor, at::Tensor> >::get_batch (307 samples, 0.01%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
entry_SYSCALL_64_after_hwframe (378 samples, 0.02%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
ksys_mmap_pgoff (655 samples, 0.03%)
alloc_pages_current (425 samples, 0.02%)
entry_SYSCALL_64_after_hwframe (2,132 samples, 0.09%)
__slab_free (450 samples, 0.02%)
taskstats_exit (2,374 samples, 0.10%)
unmap_region (867 samples, 0.03%)
exc_page_fault (1,565 samples, 0.06%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
entry_SYSCALL_64_after_hwframe (1,388 samples, 0.06%)
do_syscall_64 (377 samples, 0.02%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
exc_page_fault (2,705 samples, 0.11%)
__do_global_dtors_aux (439 samples, 0.02%)
__mprotect (225 samples, 0.01%)
__lock_text_start (1,906 samples, 0.08%)
flush_tlb_mm_range (663 samples, 0.03%)
__madvise (42,670 samples, 1.71%)
futex_wake (311 samples, 0.01%)
[unknown] (20,884 samples, 0.84%)
do_madvise (42,167 samples, 1.69%)
__do_global_dtors_aux (439 samples, 0.02%)
__lock_text_start (249 samples, 0.01%)
tlb_finish_mmu (261 samples, 0.01%)
[unknown] (20,932 samples, 0.84%)
__softirqentry_text_start (2,092 samples, 0.08%)
__tls_get_addr (244 samples, 0.01%)
[unknown] (21,738 samples, 0.87%)
__do_global_dtors_aux (439 samples, 0.02%)
entry_SYSCALL_64_after_hwframe (1,228 samples, 0.05%)
entry_SYSCALL_64_after_hwframe (344 samples, 0.01%)
do_futex (315 samples, 0.01%)
image_io::save_image (4,687 samples, 0.19%)
mkl_blas_avx512_sgemm_kernel_0_b0 (15,899 samples, 0.64%)
do_anonymous_page (979 samples, 0.04%)
[unknown] (3,098 samples, 0.12%)
__do_global_dtors_aux (439 samples, 0.02%)
[unknown] (21,006 samples, 0.84%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
__do_global_dtors_aux (439 samples, 0.02%)
entry_SYSCALL_64_after_hwframe (2,993 samples, 0.12%)
entry_SYSCALL_64_after_hwframe (743 samples, 0.03%)
__softirqentry_text_start (848 samples, 0.03%)
__munmap (374 samples, 0.01%)
__x64_sys_futex (486 samples, 0.02%)
perf_event_fork (437 samples, 0.02%)
[unknown] (20,939 samples, 0.84%)
memset_erms (536 samples, 0.02%)
[unknown] (20,932 samples, 0.84%)
__lll_lock_wait_private (316 samples, 0.01%)
[unknown] (21,036 samples, 0.84%)
__do_munmap (363 samples, 0.01%)
[unknown] (20,928 samples, 0.84%)
__munmap (870 samples, 0.03%)
__vm_munmap (374 samples, 0.01%)
c10::MaybeOwned<at::Tensor>::MaybeOwned (930 samples, 0.04%)
free_event_rcu (327 samples, 0.01%)
__alloc_pages_nodemask (305 samples, 0.01%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
asm_exc_page_fault (580 samples, 0.02%)
gemm_omp_driver_v2 (1,076 samples, 0.04%)
rwsem_down_write_slowpath (897 samples, 0.04%)
[unknown] (21,017 samples, 0.84%)
on_each_cpu_cond_mask (1,115 samples, 0.04%)
kmem_cache_alloc_trace (6,805 samples, 0.27%)
do_user_addr_fault (1,465 samples, 0.06%)
c10::TensorImpl::release_resources (324 samples, 0.01%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
[unknown] (20,932 samples, 0.84%)
__handle_mm_fault (3,108 samples, 0.12%)
std::_Function_handler<c10::optional<torch::data::Example<at::Tensor, at::Tensor> > (1,223 samples, 0.05%)
[unknown] (20,928 samples, 0.84%)
mutex_unlock (964 samples, 0.04%)
asm_exc_page_fault (955 samples, 0.04%)
__do_global_dtors_aux (439 samples, 0.02%)
at::native::(anonymous namespace)::vectorized_loop<at::native::(anonymous namespace)::add_kernel (3,877 samples, 0.16%)
alloc_pages_current (266 samples, 0.01%)
c10::intrusive_ptr<c10::TensorImpl, c10::UndefinedTensorImpl>::~intrusive_ptr (249 samples, 0.01%)
[unknown] (20,930 samples, 0.84%)
__do_global_dtors_aux (439 samples, 0.02%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
[unknown] (20,891 samples, 0.84%)
__do_global_dtors_aux (439 samples, 0.02%)
__x64_sys_futex (344 samples, 0.01%)
__srcu_read_unlock (375 samples, 0.02%)
do_anonymous_page (682 samples, 0.03%)
futex_wait (344 samples, 0.01%)
futex_wait_queue_me (315 samples, 0.01%)
__x64_sys_munmap (1,388 samples, 0.06%)
mkl_serv_inspector_suppress (627 samples, 0.03%)
[unknown] (20,934 samples, 0.84%)
___slab_alloc (453 samples, 0.02%)
smp_call_function_many_cond (1,061 samples, 0.04%)
__do_global_dtors_aux (439 samples, 0.02%)
__lock_text_start (4,537 samples, 0.18%)
[unknown] (20,929 samples, 0.84%)
try_to_wake_up (321 samples, 0.01%)
std::_Destroy<at::Tensor*, at::Tensor> (292 samples, 0.01%)
do_anonymous_page (640 samples, 0.03%)
[unknown] (6,398 samples, 0.26%)
[unknown] (20,944 samples, 0.84%)
get_page_from_freelist (329 samples, 0.01%)
entry_SYSCALL_64_after_hwframe (568 samples, 0.02%)
change_protection (347 samples, 0.01%)
[unknown] (21,420 samples, 0.86%)
down_write_killable (934 samples, 0.04%)
__do_global_dtors_aux (439 samples, 0.02%)
at::TensorIteratorBase::serial_for_each (2,653 samples, 0.11%)
std::_Destroy_aux<false>::__destroy<at::Tensor*> (285 samples, 0.01%)
perf_try_init_event (498 samples, 0.02%)
do_user_addr_fault (2,056 samples, 0.08%)
c10::TensorImpl::~TensorImpl (8,398 samples, 0.34%)
_int_free (2,204 samples, 0.09%)
entry_SYSCALL_64_after_hwframe (247 samples, 0.01%)
get_page_from_freelist (5,608 samples, 0.22%)
__x64_sys_munmap (4,037 samples, 0.16%)
[unknown] (20,932 samples, 0.84%)
irq_exit_rcu (874 samples, 0.04%)
vmsSqrt (6,986 samples, 0.28%)
[libgomp-a34b3233.so.1] (512,539 samples, 20.54%)
[libgomp-a34b3233.so.1]
[unknown] (20,989 samples, 0.84%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
do_anonymous_page (513 samples, 0.02%)
at::native::(anonymous namespace)::vectorized_loop<at::native::(anonymous namespace)::div_true_kernel (14,596 samples, 0.58%)
__alloc_pages_nodemask (5,664 samples, 0.23%)
at::TensorIterator::maybe_get_output (679 samples, 0.03%)
torch::autograd::generated::details::isFwGradDefined (309 samples, 0.01%)
malloc (2,075 samples, 0.08%)
get_page_from_freelist (373 samples, 0.01%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
entry_SYSCALL_64_after_hwframe (377 samples, 0.02%)
entry_SYSCALL_64_after_hwframe (339 samples, 0.01%)
mkl_blas_avx512_sgemm_kernel_nocopy_NT_b0 (27,337 samples, 1.10%)
on_each_cpu_cond_mask (1,723 samples, 0.07%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
do_futex (849 samples, 0.03%)
__free_tcb (377 samples, 0.02%)
do_syscall_64 (870 samples, 0.03%)
at::TensorIteratorBase::compute_types (243 samples, 0.01%)
zap_page_range (2,718 samples, 0.11%)
mkl_blas_avx512_sgemm_kernel_nocopy_TN_b0 (55,792 samples, 2.24%)
m..
native_queued_spin_lock_slowpath (296 samples, 0.01%)
x2apic_send_IPI_mask (2,246 samples, 0.09%)
[unknown] (20,942 samples, 0.84%)
__madvise (3,027 samples, 0.12%)
entry_SYSCALL_64_after_hwframe (1,670 samples, 0.07%)
__x64_sys_clone (35,310 samples, 1.41%)
entry_SYSCALL_64_after_hwframe (378 samples, 0.02%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
__lock_text_start (305 samples, 0.01%)
c10::function_ref<void (139,713 samples, 5.60%)
c10::fu..
smp_call_function_many_cond (2,246 samples, 0.09%)
do_anonymous_page (1,072 samples, 0.04%)
exit_mm_release (727 samples, 0.03%)
native_queued_spin_lock_slowpath (298 samples, 0.01%)
mkl_blas_avx512_xsgemm_nocopy_driver (568 samples, 0.02%)
mkl_blas_avx512_sgemm_kernel_0_b0 (1,490 samples, 0.06%)
std::vector<at::Tensor, std::allocator<at::Tensor> >::~vector (294 samples, 0.01%)
entry_SYSCALL_64_after_hwframe (266 samples, 0.01%)
mem_cgroup_charge (393 samples, 0.02%)
__do_global_dtors_aux (439 samples, 0.02%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
netlink_unicast (932 samples, 0.04%)
at::native::stack (411 samples, 0.02%)
mkl_vml_serv_threader_s_1i_1o.omp_fn.21 (6,581 samples, 0.26%)
do_user_addr_fault (3,439 samples, 0.14%)
[unknown] (20,928 samples, 0.84%)
free_unref_page_list (251 samples, 0.01%)
__x2apic_send_IPI_mask (1,723 samples, 0.07%)
[unknown] (20,928 samples, 0.84%)
release_pages (622 samples, 0.02%)
__lll_lock_wait_private (1,066 samples, 0.04%)
tlb_flush_mmu (2,004 samples, 0.08%)
do_softirq_own_stack (874 samples, 0.04%)
do_user_addr_fault (2,575 samples, 0.10%)
do_futex (361 samples, 0.01%)
entry_SYSCALL_64_after_hwframe (610 samples, 0.02%)
__do_global_dtors_aux (439 samples, 0.02%)
__x64_sys_munmap (1,670 samples, 0.07%)
__vm_munmap (2,132 samples, 0.09%)
[unknown] (20,933 samples, 0.84%)
[unknown] (20,928 samples, 0.84%)
[unknown] (20,904 samples, 0.84%)
[unknown] (20,913 samples, 0.84%)
alloc_pages_current (2,688 samples, 0.11%)
entry_SYSCALL_64_after_hwframe (977 samples, 0.04%)
handle_mm_fault (1,615 samples, 0.06%)
rcu_core_si (1,375 samples, 0.06%)
page_counter_cancel (215 samples, 0.01%)
[unknown] (166,754 samples, 6.68%)
[unknown]
__logf_fma (20,279 samples, 0.81%)
futex_wake (404 samples, 0.02%)
handle_mm_fault (352 samples, 0.01%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
do_syscall_64 (42,314 samples, 1.70%)
__mmap (873 samples, 0.03%)
[unknown] (20,974 samples, 0.84%)
kfree (552 samples, 0.02%)
__handle_mm_fault (1,141 samples, 0.05%)
std::vector<at::Tensor, std::allocator<at::Tensor> >::~vector (407 samples, 0.02%)
inherit_event.isra.0 (18,862 samples, 0.76%)
__do_global_dtors_aux (439 samples, 0.02%)
Sleef_finz_expf8_u10avx2 (1,462 samples, 0.06%)
__handle_mm_fault (713 samples, 0.03%)
__posix_memalign (218 samples, 0.01%)
asm_exc_page_fault (750 samples, 0.03%)
__do_global_dtors_aux (439 samples, 0.02%)
__alloc_pages_nodemask (376 samples, 0.02%)
__slab_alloc (6,409 samples, 0.26%)
x2apic_send_IPI_mask (9,638 samples, 0.39%)
__lock_text_start (225 samples, 0.01%)
__lock_text_start (494 samples, 0.02%)
smp_call_function_many (2,246 samples, 0.09%)
do_syscall_64 (728 samples, 0.03%)
asm_exc_page_fault (3,527 samples, 0.14%)
mkl_vml_serv_threader_s_1i_1o.omp_fn.21 (22,541 samples, 0.90%)
[unknown] (20,940 samples, 0.84%)
inherit_task_group.isra.0.part.0 (20,233 samples, 0.81%)
c10::TensorImpl::set_sizes_and_strides (283 samples, 0.01%)
exc_page_fault (3,124 samples, 0.13%)
[unknown] (13,260 samples, 0.53%)
futex_wake (377 samples, 0.02%)
native_send_call_func_ipi (2,246 samples, 0.09%)
do_anonymous_page (661 samples, 0.03%)
wake_up_q (495 samples, 0.02%)
allocate_slab (451 samples, 0.02%)
do_syscall_64 (377 samples, 0.02%)
alloc_pages_vma (361 samples, 0.01%)
[unknown] (20,929 samples, 0.84%)
__handle_mm_fault (327 samples, 0.01%)
do_syscall_64 (610 samples, 0.02%)
__x64_sys_futex (364 samples, 0.01%)
tlb_flush_mmu (661 samples, 0.03%)
__cxa_pure_virtual (1,193 samples, 0.05%)
std::_Destroy<at::Tensor> (383 samples, 0.02%)
[unknown] (20,934 samples, 0.84%)
__vm_munmap (4,037 samples, 0.16%)
vm_mmap_pgoff (635 samples, 0.03%)
__brk (251 samples, 0.01%)
do_anonymous_page (1,087 samples, 0.04%)
smp_call_function_many_cond (30,812 samples, 1.23%)
[libgomp-a34b3233.so.1] (7,236 samples, 0.29%)
perf_event_init_task (20,471 samples, 0.82%)
__get_vm_area_node (1,209 samples, 0.05%)
omp_simple_3d (935 samples, 0.04%)
lru_cache_add_inactive_or_unevictable (216 samples, 0.01%)
split_vma (428 samples, 0.02%)
lru_cache_add (214 samples, 0.01%)
do_user_addr_fault (552 samples, 0.02%)
__x64_sys_munmap (870 samples, 0.03%)
do_user_addr_fault (2,999 samples, 0.12%)
__do_munmap (2,132 samples, 0.09%)
exc_page_fault (825 samples, 0.03%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
do_syscall_64 (2,132 samples, 0.09%)
__x64_sys_futex (214 samples, 0.01%)
mem_cgroup_charge (359 samples, 0.01%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
rmqueue (326 samples, 0.01%)
[unknown] (2,228 samples, 0.09%)
__do_global_dtors_aux (439 samples, 0.02%)
__do_munmap (1,534 samples, 0.06%)
do_syscall_64 (266 samples, 0.01%)
vssqrt_cout_rare (1,084 samples, 0.04%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
__x2apic_send_IPI_mask (9,634 samples, 0.39%)
__clone (36,856 samples, 1.48%)
[unknown] (20,972 samples, 0.84%)
[unknown] (20,928 samples, 0.84%)
do_anonymous_page (1,193 samples, 0.05%)
mem_cgroup_uncharge_list (218 samples, 0.01%)
__vm_munmap (1,388 samples, 0.06%)
__munmap (266 samples, 0.01%)
__do_global_dtors_aux (439 samples, 0.02%)
at::parallel_dim_reduction (693 samples, 0.03%)
__x64_sys_futex (315 samples, 0.01%)
futex_wait (315 samples, 0.01%)
asm_exc_page_fault (1,497 samples, 0.06%)
__do_global_dtors_aux (439 samples, 0.02%)
[unknown] (20,943 samples, 0.84%)
[unknown] (20,928 samples, 0.84%)
omp_simple_3d.omp_fn.2 (466 samples, 0.02%)
__do_global_dtors_aux (439 samples, 0.02%)
[unknown] (4,545 samples, 0.18%)
[unknown] (20,928 samples, 0.84%)
try_to_wake_up (287 samples, 0.01%)
schedule (315 samples, 0.01%)
[unknown] (8,392 samples, 0.34%)
__do_global_dtors_aux (439 samples, 0.02%)
start_thread (4,074 samples, 0.16%)
__x64_sys_madvise (42,177 samples, 1.69%)
do_syscall_64 (1,388 samples, 0.06%)
__x64_sys_exit (3,713 samples, 0.15%)
do_futex (1,907 samples, 0.08%)
[unknown] (20,936 samples, 0.84%)
futex_wait_queue_me (223 samples, 0.01%)
do_madvise.part.0 (2,946 samples, 0.12%)
do_user_addr_fault (944 samples, 0.04%)
[unknown] (20,966 samples, 0.84%)
do_syscall_64 (214 samples, 0.01%)
__do_global_dtors_aux (439 samples, 0.02%)
flush_tlb_mm_range (1,868 samples, 0.07%)
__do_global_dtors_aux (439 samples, 0.02%)
alloc_pages_vma (324 samples, 0.01%)
do_syscall_64 (374 samples, 0.01%)
__handle_mm_fault (700 samples, 0.03%)
[unknown] (20,932 samples, 0.84%)
page_counter_uncharge (218 samples, 0.01%)
get_page_from_freelist (416 samples, 0.02%)
[unknown] (20,945 samples, 0.84%)
exc_page_fault (1,497 samples, 0.06%)
__memcg_kmem_charge (288 samples, 0.01%)
__do_global_dtors_aux (439 samples, 0.02%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
[unknown] (20,938 samples, 0.84%)
c10::function_ref<void (5,486 samples, 0.22%)
__handle_mm_fault (1,500 samples, 0.06%)
[unknown] (20,928 samples, 0.84%)
c10::intrusive_ptr_target::~intrusive_ptr_target (342 samples, 0.01%)
__lll_lock_wait_private (512 samples, 0.02%)
smp_call_function_many_cond (1,723 samples, 0.07%)
do_softirq_own_stack (2,124 samples, 0.09%)
__x64_sys_munmap (2,132 samples, 0.09%)
__vmalloc_node_range (4,606 samples, 0.18%)
mkl_vml_kernel_sSqrt_Z0HAynn (240 samples, 0.01%)
gemm_omp_driver_v2.omp_fn.1 (11,560 samples, 0.46%)
__do_global_dtors_aux (439 samples, 0.02%)
__do_global_dtors_aux (439 samples, 0.02%)
do_futex (328 samples, 0.01%)
__do_global_dtors_aux (439 samples, 0.02%)
at::CPUGeneratorImpl::random (229 samples, 0.01%)
__do_munmap (977 samples, 0.04%)
native_flush_tlb_others (35,935 samples, 1.44%)
__pte_alloc (282 samples, 0.01%)
at::TensorIteratorBase::serial_for_each (141,846 samples, 5.68%)
at::Ten..
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
mem_cgroup_charge (218 samples, 0.01%)
__init_waitqueue_head (338 samples, 0.01%)
[unknown] (20,928 samples, 0.84%)
__x64_sys_munmap (374 samples, 0.01%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
do_exit (3,678 samples, 0.15%)
native_flush_tlb_others (1,868 samples, 0.07%)
entry_SYSCALL_64_after_hwframe (1,907 samples, 0.08%)
std::_Destroy<at::Tensor*> (404 samples, 0.02%)
__x64_sys_mprotect (1,158 samples, 0.05%)
asm_exc_page_fault (942 samples, 0.04%)
do_user_addr_fault (819 samples, 0.03%)
__do_global_dtors_aux (439 samples, 0.02%)
__x64_sys_mmap (658 samples, 0.03%)
__do_munmap (4,037 samples, 0.16%)
allocate_slab (6,110 samples, 0.24%)
__do_global_dtors_aux (439 samples, 0.02%)
c10::impl::wrap_kernel_functor_unboxed_<c10::impl::detail::WrapFunctionIntoFunctor_<c10::CompileTimeFunctionPointer<at::Tensor (216 samples, 0.01%)
tlb_gather_mmu (291 samples, 0.01%)
__vm_munmap (870 samples, 0.03%)
mkl_blas_avx512_sgemm_scopy_down8_ea (2,891 samples, 0.12%)
dup_task_struct (7,308 samples, 0.29%)
c10::function_ref<void (1,966 samples, 0.08%)
mkl_vml_kernel_sSqrt_Z0HAynn (5,490 samples, 0.22%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
asm_call_sysvec_on_stack (2,124 samples, 0.09%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
[unknown] (16,368 samples, 0.66%)
[unknown] (20,936 samples, 0.84%)
__hrtimer_init (308 samples, 0.01%)
smp_call_function_many_cond (586 samples, 0.02%)
__handle_mm_fault (551 samples, 0.02%)
[unknown] (20,929 samples, 0.84%)
cgroup_post_fork (252 samples, 0.01%)
wake_up_new_task (4,546 samples, 0.18%)
memset_erms (682 samples, 0.03%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
[unknown] (12,261 samples, 0.49%)
alloc_pages_vma (468 samples, 0.02%)
pskb_expand_head (332 samples, 0.01%)
handle_mm_fault (1,047 samples, 0.04%)
free_pages_and_swap_cache (218 samples, 0.01%)
alloc_pages_current (5,671 samples, 0.23%)
handle_mm_fault (1,410 samples, 0.06%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
netlink_trim (370 samples, 0.01%)
wake_up_q (1,907 samples, 0.08%)
at::native::templates::normal_impl_<at::native::NormalStub, at::Generator> (439 samples, 0.02%)
omp_get_num_threads (623 samples, 0.02%)
[unknown] (1,193 samples, 0.05%)
unmap_region (2,130 samples, 0.09%)