diff options
Diffstat (limited to 'fs/proc/task_mmu.c')
| -rw-r--r-- | fs/proc/task_mmu.c | 357 | 
1 files changed, 261 insertions, 96 deletions
| diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 7dcd2a250495..9694cc283511 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -209,16 +209,20 @@ static int do_maps_open(struct inode *inode, struct file *file,  	return ret;  } -static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) +static void +show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)  {  	struct mm_struct *mm = vma->vm_mm;  	struct file *file = vma->vm_file; +	struct proc_maps_private *priv = m->private; +	struct task_struct *task = priv->task;  	vm_flags_t flags = vma->vm_flags;  	unsigned long ino = 0;  	unsigned long long pgoff = 0;  	unsigned long start, end;  	dev_t dev = 0;  	int len; +	const char *name = NULL;  	if (file) {  		struct inode *inode = vma->vm_file->f_path.dentry->d_inode; @@ -252,36 +256,57 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)  	if (file) {  		pad_len_spaces(m, len);  		seq_path(m, &file->f_path, "\n"); -	} else { -		const char *name = arch_vma_name(vma); -		if (!name) { -			if (mm) { -				if (vma->vm_start <= mm->brk && -						vma->vm_end >= mm->start_brk) { -					name = "[heap]"; -				} else if (vma->vm_start <= mm->start_stack && -					   vma->vm_end >= mm->start_stack) { -					name = "[stack]"; -				} +		goto done; +	} + +	name = arch_vma_name(vma); +	if (!name) { +		pid_t tid; + +		if (!mm) { +			name = "[vdso]"; +			goto done; +		} + +		if (vma->vm_start <= mm->brk && +		    vma->vm_end >= mm->start_brk) { +			name = "[heap]"; +			goto done; +		} + +		tid = vm_is_stack(task, vma, is_pid); + +		if (tid != 0) { +			/* +			 * Thread stack in /proc/PID/task/TID/maps or +			 * the main process stack. +			 */ +			if (!is_pid || (vma->vm_start <= mm->start_stack && +			    vma->vm_end >= mm->start_stack)) { +				name = "[stack]";  			} else { -				name = "[vdso]"; +				/* Thread stack in /proc/PID/maps */ +				pad_len_spaces(m, len); +				seq_printf(m, "[stack:%d]", tid);  			}  		} -		if (name) { -			pad_len_spaces(m, len); -			seq_puts(m, name); -		} +	} + +done: +	if (name) { +		pad_len_spaces(m, len); +		seq_puts(m, name);  	}  	seq_putc(m, '\n');  } -static int show_map(struct seq_file *m, void *v) +static int show_map(struct seq_file *m, void *v, int is_pid)  {  	struct vm_area_struct *vma = v;  	struct proc_maps_private *priv = m->private;  	struct task_struct *task = priv->task; -	show_map_vma(m, vma); +	show_map_vma(m, vma, is_pid);  	if (m->count < m->size)  /* vma is copied successfully */  		m->version = (vma != get_gate_vma(task->mm)) @@ -289,20 +314,49 @@ static int show_map(struct seq_file *m, void *v)  	return 0;  } +static int show_pid_map(struct seq_file *m, void *v) +{ +	return show_map(m, v, 1); +} + +static int show_tid_map(struct seq_file *m, void *v) +{ +	return show_map(m, v, 0); +} +  static const struct seq_operations proc_pid_maps_op = {  	.start	= m_start,  	.next	= m_next,  	.stop	= m_stop, -	.show	= show_map +	.show	= show_pid_map  }; -static int maps_open(struct inode *inode, struct file *file) +static const struct seq_operations proc_tid_maps_op = { +	.start	= m_start, +	.next	= m_next, +	.stop	= m_stop, +	.show	= show_tid_map +}; + +static int pid_maps_open(struct inode *inode, struct file *file)  {  	return do_maps_open(inode, file, &proc_pid_maps_op);  } -const struct file_operations proc_maps_operations = { -	.open		= maps_open, +static int tid_maps_open(struct inode *inode, struct file *file) +{ +	return do_maps_open(inode, file, &proc_tid_maps_op); +} + +const struct file_operations proc_pid_maps_operations = { +	.open		= pid_maps_open, +	.read		= seq_read, +	.llseek		= seq_lseek, +	.release	= seq_release_private, +}; + +const struct file_operations proc_tid_maps_operations = { +	.open		= tid_maps_open,  	.read		= seq_read,  	.llseek		= seq_lseek,  	.release	= seq_release_private, @@ -394,21 +448,15 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,  	pte_t *pte;  	spinlock_t *ptl; -	spin_lock(&walk->mm->page_table_lock); -	if (pmd_trans_huge(*pmd)) { -		if (pmd_trans_splitting(*pmd)) { -			spin_unlock(&walk->mm->page_table_lock); -			wait_split_huge_page(vma->anon_vma, pmd); -		} else { -			smaps_pte_entry(*(pte_t *)pmd, addr, -					HPAGE_PMD_SIZE, walk); -			spin_unlock(&walk->mm->page_table_lock); -			mss->anonymous_thp += HPAGE_PMD_SIZE; -			return 0; -		} -	} else { +	if (pmd_trans_huge_lock(pmd, vma) == 1) { +		smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk);  		spin_unlock(&walk->mm->page_table_lock); +		mss->anonymous_thp += HPAGE_PMD_SIZE; +		return 0;  	} + +	if (pmd_trans_unstable(pmd)) +		return 0;  	/*  	 * The mmap_sem held all the way back in m_start() is what  	 * keeps khugepaged out of here and from collapsing things @@ -422,7 +470,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,  	return 0;  } -static int show_smap(struct seq_file *m, void *v) +static int show_smap(struct seq_file *m, void *v, int is_pid)  {  	struct proc_maps_private *priv = m->private;  	struct task_struct *task = priv->task; @@ -440,7 +488,7 @@ static int show_smap(struct seq_file *m, void *v)  	if (vma->vm_mm && !is_vm_hugetlb_page(vma))  		walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); -	show_map_vma(m, vma); +	show_map_vma(m, vma, is_pid);  	seq_printf(m,  		   "Size:           %8lu kB\n" @@ -479,20 +527,49 @@ static int show_smap(struct seq_file *m, void *v)  	return 0;  } +static int show_pid_smap(struct seq_file *m, void *v) +{ +	return show_smap(m, v, 1); +} + +static int show_tid_smap(struct seq_file *m, void *v) +{ +	return show_smap(m, v, 0); +} +  static const struct seq_operations proc_pid_smaps_op = {  	.start	= m_start,  	.next	= m_next,  	.stop	= m_stop, -	.show	= show_smap +	.show	= show_pid_smap +}; + +static const struct seq_operations proc_tid_smaps_op = { +	.start	= m_start, +	.next	= m_next, +	.stop	= m_stop, +	.show	= show_tid_smap  }; -static int smaps_open(struct inode *inode, struct file *file) +static int pid_smaps_open(struct inode *inode, struct file *file)  {  	return do_maps_open(inode, file, &proc_pid_smaps_op);  } -const struct file_operations proc_smaps_operations = { -	.open		= smaps_open, +static int tid_smaps_open(struct inode *inode, struct file *file) +{ +	return do_maps_open(inode, file, &proc_tid_smaps_op); +} + +const struct file_operations proc_pid_smaps_operations = { +	.open		= pid_smaps_open, +	.read		= seq_read, +	.llseek		= seq_lseek, +	.release	= seq_release_private, +}; + +const struct file_operations proc_tid_smaps_operations = { +	.open		= tid_smaps_open,  	.read		= seq_read,  	.llseek		= seq_lseek,  	.release	= seq_release_private, @@ -507,6 +584,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,  	struct page *page;  	split_huge_page_pmd(walk->mm, pmd); +	if (pmd_trans_unstable(pmd)) +		return 0;  	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);  	for (; addr != end; pte++, addr += PAGE_SIZE) { @@ -598,11 +677,18 @@ const struct file_operations proc_clear_refs_operations = {  	.llseek		= noop_llseek,  }; +typedef struct { +	u64 pme; +} pagemap_entry_t; +  struct pagemapread {  	int pos, len; -	u64 *buffer; +	pagemap_entry_t *buffer;  }; +#define PAGEMAP_WALK_SIZE	(PMD_SIZE) +#define PAGEMAP_WALK_MASK	(PMD_MASK) +  #define PM_ENTRY_BYTES      sizeof(u64)  #define PM_STATUS_BITS      3  #define PM_STATUS_OFFSET    (64 - PM_STATUS_BITS) @@ -620,10 +706,15 @@ struct pagemapread {  #define PM_NOT_PRESENT      PM_PSHIFT(PAGE_SHIFT)  #define PM_END_OF_BUFFER    1 -static int add_to_pagemap(unsigned long addr, u64 pfn, +static inline pagemap_entry_t make_pme(u64 val) +{ +	return (pagemap_entry_t) { .pme = val }; +} + +static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,  			  struct pagemapread *pm)  { -	pm->buffer[pm->pos++] = pfn; +	pm->buffer[pm->pos++] = *pme;  	if (pm->pos >= pm->len)  		return PM_END_OF_BUFFER;  	return 0; @@ -635,8 +726,10 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,  	struct pagemapread *pm = walk->private;  	unsigned long addr;  	int err = 0; +	pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); +  	for (addr = start; addr < end; addr += PAGE_SIZE) { -		err = add_to_pagemap(addr, PM_NOT_PRESENT, pm); +		err = add_to_pagemap(addr, &pme, pm);  		if (err)  			break;  	} @@ -649,17 +742,35 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte)  	return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);  } -static u64 pte_to_pagemap_entry(pte_t pte) +static void pte_to_pagemap_entry(pagemap_entry_t *pme, pte_t pte)  { -	u64 pme = 0;  	if (is_swap_pte(pte)) -		pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte)) -			| PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; +		*pme = make_pme(PM_PFRAME(swap_pte_to_pagemap_entry(pte)) +				| PM_PSHIFT(PAGE_SHIFT) | PM_SWAP);  	else if (pte_present(pte)) -		pme = PM_PFRAME(pte_pfn(pte)) -			| PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; -	return pme; +		*pme = make_pme(PM_PFRAME(pte_pfn(pte)) +				| PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, +					pmd_t pmd, int offset) +{ +	/* +	 * Currently pmd for thp is always present because thp can not be +	 * swapped-out, migrated, or HWPOISONed (split in such cases instead.) +	 * This if-check is just to prepare for future implementation. +	 */ +	if (pmd_present(pmd)) +		*pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) +				| PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);  } +#else +static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, +						pmd_t pmd, int offset) +{ +} +#endif  static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,  			     struct mm_walk *walk) @@ -668,13 +779,30 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,  	struct pagemapread *pm = walk->private;  	pte_t *pte;  	int err = 0; +	pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); -	split_huge_page_pmd(walk->mm, pmd); +	if (pmd_trans_unstable(pmd)) +		return 0;  	/* find the first VMA at or above 'addr' */  	vma = find_vma(walk->mm, addr); +	spin_lock(&walk->mm->page_table_lock); +	if (pmd_trans_huge_lock(pmd, vma) == 1) { +		for (; addr != end; addr += PAGE_SIZE) { +			unsigned long offset; + +			offset = (addr & ~PAGEMAP_WALK_MASK) >> +					PAGE_SHIFT; +			thp_pmd_to_pagemap_entry(&pme, *pmd, offset); +			err = add_to_pagemap(addr, &pme, pm); +			if (err) +				break; +		} +		spin_unlock(&walk->mm->page_table_lock); +		return err; +	} +  	for (; addr != end; addr += PAGE_SIZE) { -		u64 pfn = PM_NOT_PRESENT;  		/* check to see if we've left 'vma' behind  		 * and need a new, higher one */ @@ -686,11 +814,11 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,  		if (vma && (vma->vm_start <= addr) &&  		    !is_vm_hugetlb_page(vma)) {  			pte = pte_offset_map(pmd, addr); -			pfn = pte_to_pagemap_entry(*pte); +			pte_to_pagemap_entry(&pme, *pte);  			/* unmap before userspace copy */  			pte_unmap(pte);  		} -		err = add_to_pagemap(addr, pfn, pm); +		err = add_to_pagemap(addr, &pme, pm);  		if (err)  			return err;  	} @@ -701,13 +829,12 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,  }  #ifdef CONFIG_HUGETLB_PAGE -static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset) +static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, +					pte_t pte, int offset)  { -	u64 pme = 0;  	if (pte_present(pte)) -		pme = PM_PFRAME(pte_pfn(pte) + offset) -			| PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; -	return pme; +		*pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) +				| PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);  }  /* This function walks within one hugetlb entry in the single call */ @@ -717,12 +844,12 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,  {  	struct pagemapread *pm = walk->private;  	int err = 0; -	u64 pfn; +	pagemap_entry_t pme = make_pme(PM_NOT_PRESENT);  	for (; addr != end; addr += PAGE_SIZE) {  		int offset = (addr & ~hmask) >> PAGE_SHIFT; -		pfn = huge_pte_to_pagemap_entry(*pte, offset); -		err = add_to_pagemap(addr, pfn, pm); +		huge_pte_to_pagemap_entry(&pme, *pte, offset); +		err = add_to_pagemap(addr, &pme, pm);  		if (err)  			return err;  	} @@ -757,8 +884,6 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,   * determine which areas of memory are actually mapped and llseek to   * skip over unmapped regions.   */ -#define PAGEMAP_WALK_SIZE	(PMD_SIZE) -#define PAGEMAP_WALK_MASK	(PMD_MASK)  static ssize_t pagemap_read(struct file *file, char __user *buf,  			    size_t count, loff_t *ppos)  { @@ -941,26 +1066,21 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,  	pte_t *pte;  	md = walk->private; -	spin_lock(&walk->mm->page_table_lock); -	if (pmd_trans_huge(*pmd)) { -		if (pmd_trans_splitting(*pmd)) { -			spin_unlock(&walk->mm->page_table_lock); -			wait_split_huge_page(md->vma->anon_vma, pmd); -		} else { -			pte_t huge_pte = *(pte_t *)pmd; -			struct page *page; - -			page = can_gather_numa_stats(huge_pte, md->vma, addr); -			if (page) -				gather_stats(page, md, pte_dirty(huge_pte), -						HPAGE_PMD_SIZE/PAGE_SIZE); -			spin_unlock(&walk->mm->page_table_lock); -			return 0; -		} -	} else { + +	if (pmd_trans_huge_lock(pmd, md->vma) == 1) { +		pte_t huge_pte = *(pte_t *)pmd; +		struct page *page; + +		page = can_gather_numa_stats(huge_pte, md->vma, addr); +		if (page) +			gather_stats(page, md, pte_dirty(huge_pte), +				     HPAGE_PMD_SIZE/PAGE_SIZE);  		spin_unlock(&walk->mm->page_table_lock); +		return 0;  	} +	if (pmd_trans_unstable(pmd)) +		return 0;  	orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);  	do {  		struct page *page = can_gather_numa_stats(*pte, md->vma, addr); @@ -1002,7 +1122,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,  /*   * Display pages allocated per node and memory policy via /proc.   */ -static int show_numa_map(struct seq_file *m, void *v) +static int show_numa_map(struct seq_file *m, void *v, int is_pid)  {  	struct numa_maps_private *numa_priv = m->private;  	struct proc_maps_private *proc_priv = &numa_priv->proc_maps; @@ -1039,9 +1159,19 @@ static int show_numa_map(struct seq_file *m, void *v)  		seq_path(m, &file->f_path, "\n\t= ");  	} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {  		seq_printf(m, " heap"); -	} else if (vma->vm_start <= mm->start_stack && -			vma->vm_end >= mm->start_stack) { -		seq_printf(m, " stack"); +	} else { +		pid_t tid = vm_is_stack(proc_priv->task, vma, is_pid); +		if (tid != 0) { +			/* +			 * Thread stack in /proc/PID/task/TID/maps or +			 * the main process stack. +			 */ +			if (!is_pid || (vma->vm_start <= mm->start_stack && +			    vma->vm_end >= mm->start_stack)) +				seq_printf(m, " stack"); +			else +				seq_printf(m, " stack:%d", tid); +		}  	}  	if (is_vm_hugetlb_page(vma)) @@ -1084,21 +1214,39 @@ out:  	return 0;  } +static int show_pid_numa_map(struct seq_file *m, void *v) +{ +	return show_numa_map(m, v, 1); +} + +static int show_tid_numa_map(struct seq_file *m, void *v) +{ +	return show_numa_map(m, v, 0); +} +  static const struct seq_operations proc_pid_numa_maps_op = { -        .start  = m_start, -        .next   = m_next, -        .stop   = m_stop, -        .show   = show_numa_map, +	.start  = m_start, +	.next   = m_next, +	.stop   = m_stop, +	.show   = show_pid_numa_map, +}; + +static const struct seq_operations proc_tid_numa_maps_op = { +	.start  = m_start, +	.next   = m_next, +	.stop   = m_stop, +	.show   = show_tid_numa_map,  }; -static int numa_maps_open(struct inode *inode, struct file *file) +static int numa_maps_open(struct inode *inode, struct file *file, +			  const struct seq_operations *ops)  {  	struct numa_maps_private *priv;  	int ret = -ENOMEM;  	priv = kzalloc(sizeof(*priv), GFP_KERNEL);  	if (priv) {  		priv->proc_maps.pid = proc_pid(inode); -		ret = seq_open(file, &proc_pid_numa_maps_op); +		ret = seq_open(file, ops);  		if (!ret) {  			struct seq_file *m = file->private_data;  			m->private = priv; @@ -1109,8 +1257,25 @@ static int numa_maps_open(struct inode *inode, struct file *file)  	return ret;  } -const struct file_operations proc_numa_maps_operations = { -	.open		= numa_maps_open, +static int pid_numa_maps_open(struct inode *inode, struct file *file) +{ +	return numa_maps_open(inode, file, &proc_pid_numa_maps_op); +} + +static int tid_numa_maps_open(struct inode *inode, struct file *file) +{ +	return numa_maps_open(inode, file, &proc_tid_numa_maps_op); +} + +const struct file_operations proc_pid_numa_maps_operations = { +	.open		= pid_numa_maps_open, +	.read		= seq_read, +	.llseek		= seq_lseek, +	.release	= seq_release_private, +}; + +const struct file_operations proc_tid_numa_maps_operations = { +	.open		= tid_numa_maps_open,  	.read		= seq_read,  	.llseek		= seq_lseek,  	.release	= seq_release_private, | 
