Version:  2.0.40 2.2.26 2.4.37 3.13 3.14 3.15 3.16 3.17 3.18 3.19 4.0 4.1 4.2 4.3 4.4 4.5 4.6 4.7 4.8 4.9 4.10

Linux/mm/page_idle.c

  1 #include <linux/init.h>
  2 #include <linux/bootmem.h>
  3 #include <linux/fs.h>
  4 #include <linux/sysfs.h>
  5 #include <linux/kobject.h>
  6 #include <linux/mm.h>
  7 #include <linux/mmzone.h>
  8 #include <linux/pagemap.h>
  9 #include <linux/rmap.h>
 10 #include <linux/mmu_notifier.h>
 11 #include <linux/page_ext.h>
 12 #include <linux/page_idle.h>
 13 
 14 #define BITMAP_CHUNK_SIZE       sizeof(u64)
 15 #define BITMAP_CHUNK_BITS       (BITMAP_CHUNK_SIZE * BITS_PER_BYTE)
 16 
 17 /*
 18  * Idle page tracking only considers user memory pages, for other types of
 19  * pages the idle flag is always unset and an attempt to set it is silently
 20  * ignored.
 21  *
 22  * We treat a page as a user memory page if it is on an LRU list, because it is
 23  * always safe to pass such a page to rmap_walk(), which is essential for idle
 24  * page tracking. With such an indicator of user pages we can skip isolated
 25  * pages, but since there are not usually many of them, it will hardly affect
 26  * the overall result.
 27  *
 28  * This function tries to get a user memory page by pfn as described above.
 29  */
 30 static struct page *page_idle_get_page(unsigned long pfn)
 31 {
 32         struct page *page;
 33         struct zone *zone;
 34 
 35         if (!pfn_valid(pfn))
 36                 return NULL;
 37 
 38         page = pfn_to_page(pfn);
 39         if (!page || !PageLRU(page) ||
 40             !get_page_unless_zero(page))
 41                 return NULL;
 42 
 43         zone = page_zone(page);
 44         spin_lock_irq(zone_lru_lock(zone));
 45         if (unlikely(!PageLRU(page))) {
 46                 put_page(page);
 47                 page = NULL;
 48         }
 49         spin_unlock_irq(zone_lru_lock(zone));
 50         return page;
 51 }
 52 
 53 static int page_idle_clear_pte_refs_one(struct page *page,
 54                                         struct vm_area_struct *vma,
 55                                         unsigned long addr, void *arg)
 56 {
 57         struct mm_struct *mm = vma->vm_mm;
 58         pmd_t *pmd;
 59         pte_t *pte;
 60         spinlock_t *ptl;
 61         bool referenced = false;
 62 
 63         if (!page_check_address_transhuge(page, mm, addr, &pmd, &pte, &ptl))
 64                 return SWAP_AGAIN;
 65 
 66         if (pte) {
 67                 referenced = ptep_clear_young_notify(vma, addr, pte);
 68                 pte_unmap(pte);
 69         } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
 70                 referenced = pmdp_clear_young_notify(vma, addr, pmd);
 71         } else {
 72                 /* unexpected pmd-mapped page? */
 73                 WARN_ON_ONCE(1);
 74         }
 75 
 76         spin_unlock(ptl);
 77 
 78         if (referenced) {
 79                 clear_page_idle(page);
 80                 /*
 81                  * We cleared the referenced bit in a mapping to this page. To
 82                  * avoid interference with page reclaim, mark it young so that
 83                  * page_referenced() will return > 0.
 84                  */
 85                 set_page_young(page);
 86         }
 87         return SWAP_AGAIN;
 88 }
 89 
 90 static void page_idle_clear_pte_refs(struct page *page)
 91 {
 92         /*
 93          * Since rwc.arg is unused, rwc is effectively immutable, so we
 94          * can make it static const to save some cycles and stack.
 95          */
 96         static const struct rmap_walk_control rwc = {
 97                 .rmap_one = page_idle_clear_pte_refs_one,
 98                 .anon_lock = page_lock_anon_vma_read,
 99         };
100         bool need_lock;
101 
102         if (!page_mapped(page) ||
103             !page_rmapping(page))
104                 return;
105 
106         need_lock = !PageAnon(page) || PageKsm(page);
107         if (need_lock && !trylock_page(page))
108                 return;
109 
110         rmap_walk(page, (struct rmap_walk_control *)&rwc);
111 
112         if (need_lock)
113                 unlock_page(page);
114 }
115 
116 static ssize_t page_idle_bitmap_read(struct file *file, struct kobject *kobj,
117                                      struct bin_attribute *attr, char *buf,
118                                      loff_t pos, size_t count)
119 {
120         u64 *out = (u64 *)buf;
121         struct page *page;
122         unsigned long pfn, end_pfn;
123         int bit;
124 
125         if (pos % BITMAP_CHUNK_SIZE || count % BITMAP_CHUNK_SIZE)
126                 return -EINVAL;
127 
128         pfn = pos * BITS_PER_BYTE;
129         if (pfn >= max_pfn)
130                 return 0;
131 
132         end_pfn = pfn + count * BITS_PER_BYTE;
133         if (end_pfn > max_pfn)
134                 end_pfn = ALIGN(max_pfn, BITMAP_CHUNK_BITS);
135 
136         for (; pfn < end_pfn; pfn++) {
137                 bit = pfn % BITMAP_CHUNK_BITS;
138                 if (!bit)
139                         *out = 0ULL;
140                 page = page_idle_get_page(pfn);
141                 if (page) {
142                         if (page_is_idle(page)) {
143                                 /*
144                                  * The page might have been referenced via a
145                                  * pte, in which case it is not idle. Clear
146                                  * refs and recheck.
147                                  */
148                                 page_idle_clear_pte_refs(page);
149                                 if (page_is_idle(page))
150                                         *out |= 1ULL << bit;
151                         }
152                         put_page(page);
153                 }
154                 if (bit == BITMAP_CHUNK_BITS - 1)
155                         out++;
156                 cond_resched();
157         }
158         return (char *)out - buf;
159 }
160 
161 static ssize_t page_idle_bitmap_write(struct file *file, struct kobject *kobj,
162                                       struct bin_attribute *attr, char *buf,
163                                       loff_t pos, size_t count)
164 {
165         const u64 *in = (u64 *)buf;
166         struct page *page;
167         unsigned long pfn, end_pfn;
168         int bit;
169 
170         if (pos % BITMAP_CHUNK_SIZE || count % BITMAP_CHUNK_SIZE)
171                 return -EINVAL;
172 
173         pfn = pos * BITS_PER_BYTE;
174         if (pfn >= max_pfn)
175                 return -ENXIO;
176 
177         end_pfn = pfn + count * BITS_PER_BYTE;
178         if (end_pfn > max_pfn)
179                 end_pfn = ALIGN(max_pfn, BITMAP_CHUNK_BITS);
180 
181         for (; pfn < end_pfn; pfn++) {
182                 bit = pfn % BITMAP_CHUNK_BITS;
183                 if ((*in >> bit) & 1) {
184                         page = page_idle_get_page(pfn);
185                         if (page) {
186                                 page_idle_clear_pte_refs(page);
187                                 set_page_idle(page);
188                                 put_page(page);
189                         }
190                 }
191                 if (bit == BITMAP_CHUNK_BITS - 1)
192                         in++;
193                 cond_resched();
194         }
195         return (char *)in - buf;
196 }
197 
198 static struct bin_attribute page_idle_bitmap_attr =
199                 __BIN_ATTR(bitmap, S_IRUSR | S_IWUSR,
200                            page_idle_bitmap_read, page_idle_bitmap_write, 0);
201 
202 static struct bin_attribute *page_idle_bin_attrs[] = {
203         &page_idle_bitmap_attr,
204         NULL,
205 };
206 
207 static struct attribute_group page_idle_attr_group = {
208         .bin_attrs = page_idle_bin_attrs,
209         .name = "page_idle",
210 };
211 
212 #ifndef CONFIG_64BIT
213 static bool need_page_idle(void)
214 {
215         return true;
216 }
217 struct page_ext_operations page_idle_ops = {
218         .need = need_page_idle,
219 };
220 #endif
221 
222 static int __init page_idle_init(void)
223 {
224         int err;
225 
226         err = sysfs_create_group(mm_kobj, &page_idle_attr_group);
227         if (err) {
228                 pr_err("page_idle: register sysfs failed\n");
229                 return err;
230         }
231         return 0;
232 }
233 subsys_initcall(page_idle_init);
234 

This page was automatically generated by LXR 0.3.1 (source).  •  Linux is a registered trademark of Linus Torvalds  •  Contact us