mmap(void*start,size_t length,int prot,int flags,int fd,off_t offset)
start表示用戶空間映射的起始地址,offset文件的起始length長度.
asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long pgoff)
{
return do_mmap2(addr, len, prot, flags, fd, pgoff);
}
其主體是do_mmap2,注意其標志MAP_ANONYMOUS表示匿名映射
/* common code for old and new mmaps */
static inline long do_mmap2(
unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long pgoff)
{
int error = -EBADF;
struct file * file = NULL;
flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
if (!(flags & MAP_ANONYMOUS)) {//map_anonymous表示沒有文件,只是在指定位置分配內存
file = fget(fd);//上一條表示,沒有文件,就跳過if以下,有文件則打開文件
if (!file)//如果文件不存在,直接返回錯誤
goto out;
}
down(¤t->mm->mmap_sem);//信號量down操作
error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);//mmap主體操作還是這個
up(¤t->mm->mmap_sem);//信號量up操作
if (file)
fput(file);
out:
return error;
}
其主體為do_mmap_pgoff
do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
第一個參數為打開文件,第二個地址,第三長度,第四個參數為訪問權限,第五個參數為其他控制目的,第6個為偏移量
unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags, unsigned long pgoff)
{
struct mm_struct * mm = current->mm;//獲取當前進程的內存描述符
struct vm_area_struct * vma;
int correct_wcount = 0;
int error;
//file非0表示是文件,其對應一定有相關操作函數.
if (file && (!file->f_op || !file->f_op->mmap))
return -ENODEV;
//長度對齊,如果為0,直接返回
if ((len = PAGE_ALIGN(len)) == 0)
return addr;
//長度大於3g或者addr+len映射區域超過用戶空間,返回錯誤
if (len > TASK_SIZE || addr > TASK_SIZE-len)
return -EINVAL;
// 偏移量是否超過了長度
/* offset overflow? */
if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
return -EINVAL;
//映射次數是否超過了限定
/* Too many mappings? */
if (mm->map_count > MAX_MAP_COUNT)
return -ENOMEM;
//是否加鎖?這里不知道了
/* mlock MCL_FUTURE? */
if (mm->def_flags & VM_LOCKED) {
unsigned long locked = mm->locked_vm << PAGE_SHIFT;
locked += len;
if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
return -EAGAIN;
}
/* Do simple checking here so the lower-level routines won't have
* to. we assume access permissions have been handled by the open
* of the memory object, so we don't do any here.
*/
if (file != NULL) { //如果文件存在
switch (flags & MAP_TYPE) {//映射類型:讀寫
case MAP_SHARED://共享映射
if ((prot & PROT_WRITE) && !(file->f_mode & FMODE_WRITE))
return -EACCES;
//確保我們不被允許寫在一個只可追加的文件
/* Make sure we don't allow writing to an append-only file.. */
if (IS_APPEND(file->f_dentry->d_inode) && (file->f_mode & FMODE_WRITE))
return -EACCES;
//確保我們的文件沒有鎖
/* make sure there are no mandatory locks on the file. */
if (locks_verify_locked(file->f_dentry->d_inode))
return -EAGAIN;
/* fall through */
case MAP_PRIVATE://私有映射
if (!(file->f_mode & FMODE_READ))
return -EACCES;
break;
default:
return -EINVAL;
}
}
/* Obtain the address to map to. we verify (or select) it and ensure
* that it represents a valid section of the address space.
*/
if (flags & MAP_FIXED) {//如果參數flag的標志位map_fixed為0表示,指定映射位置只是一個參考值
if (addr & ~PAGE_MASK)
return -EINVAL;
} else {//不滿足由內核從空洞執行分配一個區域
addr = get_unmapped_area(addr, len);
if (!addr)
return -ENOMEM;
}
/* Determine the object being mapped and call the appropriate
* specific mapper. the address has already been validated, but
* not unmapped, but the maps are removed from the list.
*/
vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);//從slab獲取一個vma結構
if (!vma)
return -ENOMEM;
vma->vm_mm = mm;//指向內存描述符
vma->vm_start = addr;//vma的起始地址指向映射的起始地址
vma->vm_end = addr + len;//同上
vma->vm_flags = vm_flags(prot,flags) | mm->def_flags;//設置vma屬性
if (file) {//如果file為0,表示匿名映射,僅僅是為了創建虛擬區間,或者僅在於建立從物理空間到虛存空間映射,而非文件映射
VM_ClearReadHint(vma);//以下代碼設置一堆屬性
vma->vm_raend = 0;
if (file->f_mode & FMODE_READ)
vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
if (flags & MAP_SHARED) {
vma->vm_flags |= VM_SHARED | VM_MAYSHARE;
/* This looks strange, but when we don't have the file open
* for writing, we can demote the shared mapping to a simpler
* private mapping. That also takes care of a security hole
* with ptrace() writing to a shared mapping without write
* permissions.
*
* We leave the VM_MAYSHARE bit on, just to get correct output
* from /proc/xxx/maps..
*/
if (!(file->f_mode & FMODE_WRITE))
vma->vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
}
} else {
vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
if (flags & MAP_SHARED)
vma->vm_flags |= VM_SHARED | VM_MAYSHARE;
}
vma->vm_page_prot = protection_map[vma->vm_flags & 0x0f];
vma->vm_ops = NULL;
vma->vm_pgoff = pgoff;//表示所映射內容在文件的起點,此值用於發生缺頁異常根據虛存地址計算出相應頁面的文件位置
vma->vm_file = NULL;
vma->vm_private_data = NULL;
/* Clear old maps */
error = -ENOMEM;
if (do_munmap(mm, addr, len))//檢查目的地址的vma空間是否已經使用(如果map_fixed設置為1的話)
goto free_vma;//已經使用則釋放free_vma
//檢查是否超過了限制
/* Check against address space limit. */
if ((mm->total_vm << PAGE_SHIFT) + len
> current->rlim[RLIMIT_AS].rlim_cur)
goto free_vma;
//檢查當前進程專用的可寫區間而物理頁面不足的情況
/* Private writable mapping? Check memory availability.. */
if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE &&
!(flags & MAP_NORESERVE) &&
!vm_enough_memory(len >> PAGE_SHIFT))
goto free_vma;
if (file) {//vm_deanwrite職位表示從文件到vma映射,表示不允許同過常規方式讀寫文件
if (vma->vm_flags & VM_DENYWRITE) {
error = deny_write_access(file);
if (error)
goto free_vma;
correct_wcount = 1;
}
vma->vm_file = file;
get_file(file);//遞增file結構的共享計數
error = file->f_op->mmap(file, vma);//一個文件操作必須存在mmap,否則釋放vma
if (error)
goto unmap_and_free_vma;
} else if (flags & MAP_SHARED) {//共享映射
error = shmem_zero_setup(vma);
if (error)
goto free_vma;
}
/* Can addr have changed??
*為了防止flags與addr有變化,再重新設置一遍,
* Answer: Yes, several device drivers can do it in their
* f_op->mmap method. -DaveM
*/
flags = vma->vm_flags;
addr = vma->vm_start;
insert_vm_struct(mm, vma);//插入當前進程的內存描述符
if (correct_wcount)
atomic_inc(&file->f_dentry->d_inode->i_writecount);
mm->total_vm += len >> PAGE_SHIFT;//映射區域+len>>page_shit
if (flags & VM_LOCKED) {//需要加鎖
mm->locked_vm += len >> PAGE_SHIFT;
make_pages_present(addr, addr + len);//建立初始映射
}
return addr;
unmap_and_free_vma:
if (correct_wcount)
atomic_inc(&file->f_dentry->d_inode->i_writecount);
vma->vm_file = NULL;
fput(file);
/* Undo any partial mapping done by a device driver. */
flush_cache_range(mm, vma->vm_start, vma->vm_end);
zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start);
flush_tlb_range(mm, vma->vm_start, vma->vm_end);
free_vma:
kmem_cache_free(vm_area_cachep, vma);
return error;
}
以上是文件與虛擬區間之間建立的映射,但具體的映射(從虛擬地址映射到物理地址)還沒開始,而是把具體頁面的映射推遲到真正需要的時候才進行,具體映射的簡歷,物理頁面的換入和換出分別准備了一些函數,filemap_nopage(),ext2_readpage(),ext2_writepage()
什么時候調用呢
(1)該區間中的一個頁面首次收到訪問時,會由於頁面沒映射發生缺頁異常,相應的異常處理程序do_no_page(),對於ext2系統,do_no_page()會通過ext2_readpage()分配一個空閑內存頁面並從文件讀入相應頁面,並建立映射.
(2)建立映射后,往頁面寫使得頁面變臟,但頁面的內容並不會立即寫回文件.而是由內核線程bdflush()周期性的運行時通過page_launder()間接調用ext2_writepage(),將頁面的內容寫入文件.如果頁面很長時間沒有收到訪問,那就會被try_to_swap_out()解除映射而轉入不活躍狀態,如果頁面是臟的那就也調用ext2_writepage()寫入然后再解除映射
(3)解除了映射的頁面再次收到訪問時又會發生缺頁異常,因為頁面無映射進入do_no_page()
mmap映射,如果文件映射的一個頁面長期得不到訪問,將直接把頁表項設置為0,如果訪問到將重新alloc_page分配一個新頁面,然后把文件讀取到新頁面,再建立映射,對於普通的換入/換出則是發生缺頁異常從swap分區查找到換出的頁面,然后建立映射
