之前读 ULKe3 并没有配合最近版本的源码一起食用,导致理解不够深刻。后来又读了一遍 Linux kernel development,感觉虽然成文内容有点老,但是结构非常清晰,但是细节少。于是凭借着对 Linux kernel development 的理解又拿去 ULK 和 kernel 3.10 读了一遍,整理如下。
kernel task_struct 中 struct mm_struct *mm, *active_mm 是记录映射用户态地址空间的,结构体里面移除了一部分关于 numa 的字段,虽然 numa 是现代计算机非常重要的部分,但是引入了太多额外的细节对核心的理解有点干扰,还是移除了简单一点,有机会单独梳理一下 numa 吧。
unsignedlong saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
/* * Special counters, in some configurations protected by the * page_table_lock, in other configurations by being atomic. */ structmm_rss_statrss_stat;
structlinux_binfmt *binfmt;
cpumask_var_t cpu_vm_mask_var;
/* Architecture-specific MM context */ mm_context_t context;
unsignedlong flags; /* Must use atomic bitops to access the bits */
structcore_state *core_state;/* coredumping support */ #ifdef CONFIG_AIO spinlock_t ioctx_lock; structhlist_headioctx_list; #endif #ifdef CONFIG_MM_OWNER /* * "owner" points to a task that is regarded as the canonical * user/owner of this mm. All of the following must be true in * order for it to be changed: * * current == mm->owner * current->mm != mm * new_owner->mm == mm * new_owner->alloc_lock is held */ structtask_struct __rcu *owner; #endif
/* store ref to file /proc/<pid>/exe symlink points to */ structfile *exe_file; ... #ifdef CONFIG_TRANSPARENT_HUGEPAGE pgtable_t pmd_huge_pte; /* protected by page_table_lock */ #endif #ifdef CONFIG_CPUMASK_OFFSTACK structcpumaskcpumask_allocation; #endif ... structuprobes_stateuprobes_state; };
This struct defines a memory VMM memory area. There is one of these per VM-area/task. A VM area is any part of the process virtual memory space that has a special rule for the page-fault handlers (ie a shared library, the executable area etc).
structvm_area_struct { /* The first cache line has the info for VMA tree walking. */
unsignedlong vm_start; /* Our start address within vm_mm. */ unsignedlong vm_end; /* The first byte after our end address within vm_mm. */
/* linked list of VM areas per task, sorted by address */ structvm_area_struct *vm_next, *vm_prev;
structrb_nodevm_rb;
/* * Largest free memory gap in bytes to the left of this VMA. * Either between this VMA and vma->vm_prev, or between one of the * VMAs below us in the VMA rbtree and its ->vm_prev. This helps * get_unmapped_area find a free area of the right size. */ unsignedlong rb_subtree_gap;
/* Second cache line starts here. */
structmm_struct *vm_mm;/* The address space we belong to. */ pgprot_t vm_page_prot; /* Access permissions of this VMA. */ unsignedlong vm_flags; /* Flags, see mm.h. */
/* * For areas with an address space and backing store, * linkage into the address_space->i_mmap interval tree, or * linkage of vma in the address_space->i_mmap_nonlinear list. */ union { struct { structrb_noderb; unsignedlong rb_subtree_last; } linear; structlist_headnonlinear; } shared;
/* * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma * list, after a COW of one of the file pages. A MAP_SHARED vma * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack * or brk vma (with NULL file) can only be in an anon_vma list. */ structlist_headanon_vma_chain;/* Serialized by mmap_sem & * page_table_lock */ structanon_vma *anon_vma;/* Serialized by page_table_lock */
/* Function pointers to deal with this struct. */ conststructvm_operations_struct *vm_ops;
/* Information about our backing store: */ unsignedlong vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE units, *not* PAGE_CACHE_SIZE */ structfile * vm_file;/* File we map to (can be NULL). */ void * vm_private_data; /* was vm_pte (shared mem) */
#ifndef CONFIG_MMU structvm_region *vm_region;/* NOMMU mapping region */ #endif };
These are the virtual MM functions - opening of an area, closing and unmapping it (needed to keep files on disk up-to-date etc), pointer to the functions called when a no-page or a wp-page exception occurs.
/* notification that a previously read-only page is about to become * writable, if an error is returned it will cause a SIGBUS */ int (*page_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf);
/* called by access_process_vm when get_user_pages() fails, typically * for use by special VMAs that can switch between memory and hardware */ int (*access)(struct vm_area_struct *vma, unsignedlong addr, void *buf, int len, int write); ... /* called by sys_remap_file_pages() to populate non-linear mapping */ int (*remap_pages)(struct vm_area_struct *vma, unsignedlong addr, unsignedlong size, pgoff_t pgoff); };