// // Reserve size bytes in the MMIO region and map [pa,pa+size) at this // location. Return the base of the reserved region. size does *not* // have to be multiple of PGSIZE. // void * mmio_map_region(physaddr_t pa, size_t size) { // Where to start the next region. Initially, this is the // beginning of the MMIO region. Because this is static, its // value will be preserved between calls to mmio_map_region // (just like nextfree in boot_alloc). staticuintptr_t base = MMIOBASE;
uintptr_t result;
// Reserve size bytes of virtual memory starting at base and // map physical pages [pa,pa+size) to virtual addresses // [base,base+size). Since this is device memory and not // regular DRAM, you'll have to tell the CPU that it isn't // safe to cache access to this memory. Luckily, the page // tables provide bits for this purpose; simply create the // mapping with PTE_PCD|PTE_PWT (cache-disable and // write-through) in addition to PTE_W. (If you're interested // in more details on this, see section 10.5 of IA32 volume // 3A.) // // Be sure to round size up to a multiple of PGSIZE and to // handle if this reservation would overflow MMIOLIM (it's // okay to simply panic if this happens). // // Hint: The staff solution uses boot_map_region. // // Your code here: if(base + ROUNDUP(size, PGSIZE) >= MMIOLIM) panic("mmio_map_region: out of memory\n"); boot_map_region(kern_pgdir, base, size, pa, PTE_PCD | PTE_PWT | PTE_W); result = base; base += ROUNDUP(size, PGSIZE); return (void *)result; }
Exercise 2
修改page_init()的内容如下,只是增加一个特殊处理:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
// case 1: pages[0].pp_ref = 1; pages[0].pp_link = NULL; // case 2, 3, 4: for (i = 1; i < npages; i++) { if((IOPHYSMEM <= i * PGSIZE && i * PGSIZE < pa_free_start) || page2pa(pages + i) == MPENTRY_PADDR) { pages[i].pp_ref = 1; pages[i].pp_link = NULL; } else { pages[i].pp_ref = 0; pages[i].pp_link = page_free_list; page_free_list = &pages[i]; } }
// Modify mappings in kern_pgdir to support SMP // - Map the per-CPU stacks in the region [KSTACKTOP-PTSIZE, KSTACKTOP) // staticvoid mem_init_mp(void) { // Map per-CPU stacks starting at KSTACKTOP, for up to 'NCPU' CPUs. // // For CPU i, use the physical memory that 'percpu_kstacks[i]' refers // to as its kernel stack. CPU i's kernel stack grows down from virtual // address kstacktop_i = KSTACKTOP - i * (KSTKSIZE + KSTKGAP), and is // divided into two pieces, just like the single stack you set up in // mem_init: // * [kstacktop_i - KSTKSIZE, kstacktop_i) // -- backed by physical memory // * [kstacktop_i - (KSTKSIZE + KSTKGAP), kstacktop_i - KSTKSIZE) // -- not backed; so if the kernel overflows its stack, // it will fault rather than overwrite another CPU's stack. // Known as a "guard page". // Permissions: kernel RW, user NONE // // LAB 4: Your code here: uint32_t i, kstacktop_i; for(i=0, kstacktop_i=KSTACKTOP;i < NCPU; ++i, kstacktop_i -= KSTKSIZE + KSTKGAP) boot_map_region(kern_pgdir, kstacktop_i - KSTKSIZE, KSTKSIZE, PADDR(percpu_kstacks[i]), PTE_W); }
// Initialize and load the per-CPU TSS and IDT void trap_init_percpu(void) { // The example code here sets up the Task State Segment (TSS) and // the TSS descriptor for CPU 0. But it is incorrect if we are // running on other CPUs because each CPU has its own kernel stack. // Fix the code so that it works for all CPUs. // // Hints: // - The macro "thiscpu" always refers to the current CPU's // struct CpuInfo; // - The ID of the current CPU is given by cpunum() or // thiscpu->cpu_id; // - Use "thiscpu->cpu_ts" as the TSS for the current CPU, // rather than the global "ts" variable; // - Use gdt[(GD_TSS0 >> 3) + i] for CPU i's TSS descriptor; // - You mapped the per-CPU kernel stacks in mem_init_mp() // - Initialize cpu_ts.ts_iomb to prevent unauthorized environments // from doing IO (0 is not the correct value!) // // ltr sets a 'busy' flag in the TSS selector, so if you // accidentally load the same TSS on more than one CPU, you'll // get a triple fault. If you set up an individual CPU's TSS // wrong, you may not get a fault until you try to return from // user space on that CPU. // // LAB 4: Your code here:
// Setup a TSS so that we get the right stack // when we trap to the kernel. thiscpu->cpu_ts.ts_esp0 = KSTACKTOP - cpunum() * (KSTKSIZE + KSTKGAP); thiscpu->cpu_ts.ts_ss0 = GD_KD; thiscpu->cpu_ts.ts_iomb = sizeof(struct Taskstate);
// Initialize the TSS slot of the gdt. gdt[(GD_TSS0 >> 3) + cpunum()] = SEG16(STS_T32A, (uint32_t) (&(thiscpu->cpu_ts)), sizeof(struct Taskstate) - 1, 0); gdt[(GD_TSS0 >> 3) + cpunum()].sd_s = 0;
// Load the TSS selector (like other segment selectors, the // bottom three bits are special; we leave them 0) ltr(GD_TSS0 + (cpunum() << 3));
In i386_init(), acquire the lock before the BSP wakes up the other CPUs.
1 2 3 4 5 6
// Acquire the big kernel lock before waking up APs // Your code here: lock_kernel(); // Starting non-boot CPUs boot_aps();
In mp_main(), acquire the lock after initializing the AP, and then call sched_yield() to start running environments on this AP.
1 2 3 4 5 6 7
// Now that we have finished some basic setup, call sched_yield() // to start running processes on this CPU. But make sure that // only one CPU can enter the scheduler at a time! // // Your code here: lock_kernel(); sched_yield();
In trap(), acquire the lock when trapped from user mode. To determine whether a trap happened in user mode or in kernel mode, check the low bits of the tf_cs.
1 2 3 4 5 6
// Trapped from user mode. // Acquire the big kernel lock before doing any // serious kernel work. // LAB 4: Your code here. lock_kernel(); assert(curenv);
In env_run(), release the lock right before switching to user mode. Do not do that too early or too late, otherwise you will experience races or deadlocks.
// Choose a user environment to run and run it. void sched_yield(void) { structEnv *idle;
// Implement simple round-robin scheduling. // // Search through 'envs' for an ENV_RUNNABLE environment in // circular fashion starting just after the env this CPU was // last running. Switch to the first such environment found. // // If no envs are runnable, but the environment previously // running on this CPU is still ENV_RUNNING, it's okay to // choose that environment. // // Never choose an environment that's currently running on // another CPU (env_status == ENV_RUNNING). If there are // no runnable environments, simply drop through to the code // below to halt the cpu.
// Setup code for APs void mp_main(void) { // We are in high EIP now, safe to switch to kern_pgdir lcr3(PADDR(kern_pgdir)); cprintf("SMP: CPU %d starting\n", cpunum());
lapic_init(); env_init_percpu(); trap_init_percpu(); xchg(&thiscpu->cpu_status, CPU_STARTED); // tell boot_aps() we're up
// Now that we have finished some basic setup, call sched_yield() // to start running processes on this CPU. But make sure that // only one CPU can enter the scheduler at a time! // // Your code here: lock_kernel(); sched_yield();
// Remove this after you finish Exercise 6 for (;;); }
// Allocate a new environment. // Returns envid of new environment, or < 0 on error. Errors are: // -E_NO_FREE_ENV if no free environment is available. // -E_NO_MEM on memory exhaustion. staticenvid_t sys_exofork(void) { // Create the new environment with env_alloc(), from kern/env.c. // It should be left as env_alloc created it, except that // status is set to ENV_NOT_RUNNABLE, and the register set is copied // from the current environment -- but tweaked so sys_exofork // will appear to return 0.
// Set envid's env_status to status, which must be ENV_RUNNABLE // or ENV_NOT_RUNNABLE. // // Returns 0 on success, < 0 on error. Errors are: // -E_BAD_ENV if environment envid doesn't currently exist, // or the caller doesn't have permission to change envid. // -E_INVAL if status is not a valid status for an environment. staticint sys_env_set_status(envid_t envid, int status) { // Hint: Use the 'envid2env' function from kern/env.c to translate an // envid to a struct Env. // You should set envid2env's third argument to 1, which will // check whether the current environment has permission to set // envid's status.
// Allocate a page of memory and map it at 'va' with permission // 'perm' in the address space of 'envid'. // The page's contents are set to 0. // If a page is already mapped at 'va', that page is unmapped as a // side effect. // // perm -- PTE_U | PTE_P must be set, PTE_AVAIL | PTE_W may or may not be set, // but no other bits may be set. See PTE_SYSCALL in inc/mmu.h. // // Return 0 on success, < 0 on error. Errors are: // -E_BAD_ENV if environment envid doesn't currently exist, // or the caller doesn't have permission to change envid. // -E_INVAL if va >= UTOP, or va is not page-aligned. // -E_INVAL if perm is inappropriate (see above). // -E_NO_MEM if there's no memory to allocate the new page, // or to allocate any necessary page tables. staticint sys_page_alloc(envid_t envid, void *va, int perm) { // Hint: This function is a wrapper around page_alloc() and // page_insert() from kern/pmap.c. // Most of the new code you write should be to check the // parameters for correctness. // If page_insert() fails, remember to free the page you // allocated!
// Map the page of memory at 'srcva' in srcenvid's address space // at 'dstva' in dstenvid's address space with permission 'perm'. // Perm has the same restrictions as in sys_page_alloc, except // that it also must not grant write access to a read-only // page. // // Return 0 on success, < 0 on error. Errors are: // -E_BAD_ENV if srcenvid and/or dstenvid doesn't currently exist, // or the caller doesn't have permission to change one of them. // -E_INVAL if srcva >= UTOP or srcva is not page-aligned, // or dstva >= UTOP or dstva is not page-aligned. // -E_INVAL is srcva is not mapped in srcenvid's address space. // -E_INVAL if perm is inappropriate (see sys_page_alloc). // -E_INVAL if (perm & PTE_W), but srcva is read-only in srcenvid's // address space. // -E_NO_MEM if there's no memory to allocate any necessary page tables. staticint sys_page_map(envid_t srcenvid, void *srcva, envid_t dstenvid, void *dstva, int perm) { // Hint: This function is a wrapper around page_lookup() and // page_insert() from kern/pmap.c. // Again, most of the new code you write should be to check the // parameters for correctness. // Use the third argument to page_lookup() to // check the current permissions on the page.
// Unmap the page of memory at 'va' in the address space of 'envid'. // If no page is mapped, the function silently succeeds. // // Return 0 on success, < 0 on error. Errors are: // -E_BAD_ENV if environment envid doesn't currently exist, // or the caller doesn't have permission to change envid. // -E_INVAL if va >= UTOP, or va is not page-aligned. staticint sys_page_unmap(envid_t envid, void *va) { // Hint: This function is a wrapper around page_remove().
// Set the page fault upcall for 'envid' by modifying the corresponding struct // Env's 'env_pgfault_upcall' field. When 'envid' causes a page fault, the // kernel will push a fault record onto the exception stack, then branch to // 'func'. // // Returns 0 on success, < 0 on error. Errors are: // -E_BAD_ENV if environment envid doesn't currently exist, // or the caller doesn't have permission to change envid. staticint sys_env_set_pgfault_upcall(envid_t envid, void *func) { // LAB 4: Your code here. structEnv* env; if(envid2env(envid, &env, 1)) return -E_BAD_ENV; env->env_pgfault_upcall = func; return0; //panic("sys_env_set_pgfault_upcall not implemented"); }
// Restore the trap-time registers. After you do this, you // can no longer modify any general-purpose registers. // LAB 4: Your code here. addl $8, %esp popal
// Restore eflags from the stack. After you do this, you can // no longer use arithmetic operations or anything else that // modifies eflags. // LAB 4: Your code here. addl $4, %esp popfl
// Switch back to the adjusted trap-time stack. // LAB 4: Your code here. popl %esp
// Return to re-execute the instruction that faulted. // LAB 4: Your code here. ret
// // Set the page fault handler function. // If there isn't one yet, _pgfault_handler will be 0. // The first time we register a handler, we need to // allocate an exception stack (one page of memory with its top // at UXSTACKTOP), and tell the kernel to call the assembly-language // _pgfault_upcall routine when a page fault occurs. // void set_pgfault_handler(void (*handler)(struct UTrapframe *utf)) { int r;
if (_pgfault_handler == 0) { // First time through! // LAB 4: Your code here. if(sys_page_alloc(0, (void *)(UXSTACKTOP - PGSIZE), PTE_U | PTE_P | PTE_W)) panic("set_pgfault_handler: page alloc fault!"); if(sys_env_set_pgfault_upcall(0, (void *)_pgfault_upcall)) panic("set_pgfault handler: set pgfault upcall failed!"); } // Save handler pointer for assembly to call. _pgfault_handler = handler; }
// // Custom page fault handler - if faulting page is copy-on-write, // map in our own private writable copy. // staticvoid pgfault(struct UTrapframe *utf) { void *addr = (void *) utf->utf_fault_va; uint32_t err = utf->utf_err; int r;
// Check that the faulting access was (1) a write, and (2) to a // copy-on-write page. If not, panic. // Hint: // Use the read-only page table mappings at uvpt // (see <inc/memlayout.h>).
// LAB 4: Your code here. if(!((err & FEC_WR) && (uvpt[PGNUM(addr)] & PTE_COW))) panic("pgfault: 0x%08x the fault page is not writable or copy-on-write page!", addr);
// Allocate a new page, map it at a temporary location (PFTEMP), // copy the data from the old page to the new page, then move the new // page to the old page's address. // Hint: // You should make three system calls.
// // Map our virtual page pn (address pn*PGSIZE) into the target envid // at the same virtual address. If the page is writable or copy-on-write, // the new mapping must be created copy-on-write, and then our mapping must be // marked copy-on-write as well. (Exercise: Why do we need to mark ours // copy-on-write again if it was already copy-on-write at the beginning of // this function?) // // Returns: 0 on success, < 0 on error. // It is also OK to panic on error. // staticint duppage(envid_t envid, unsigned pn) { int r;
// // User-level fork with copy-on-write. // Set up our page fault handler appropriately. // Create a child. // Copy our address space and page fault handler setup to the child. // Then mark the child as runnable and return. // // Returns: child's envid to the parent, 0 to the child, < 0 on error. // It is also OK to panic on error. // // Hint: // Use uvpd, uvpt, and duppage. // Remember to fix "thisenv" in the child process. // Neither user exception stack should ever be marked copy-on-write, // so you must allocate a new page for the child's user exception stack. // envid_t fork(void) { // LAB 4: Your code here. int r; envid_t envid; uint8_t * addr; set_pgfault_handler(pgfault); envid = sys_exofork(); if(envid < 0) panic("fork: sys_exofork failed!"); if(envid == 0){ thisenv = &envs[ENVX(sys_getenvid())]; return0; }
// Block until a value is ready. Record that you want to receive // using the env_ipc_recving and env_ipc_dstva fields of struct Env, // mark yourself not runnable, and then give up the CPU. // // If 'dstva' is < UTOP, then you are willing to receive a page of data. // 'dstva' is the virtual address at which the sent page should be mapped. // // This function only returns on error, but the system call will eventually // return 0 on success. // Return < 0 on error. Errors are: // -E_INVAL if dstva < UTOP but dstva is not page-aligned. staticint sys_ipc_recv(void *dstva) { // LAB 4: Your code here. structEnv * env; if(envid2env(0, &env, 0)) return -E_BAD_ENV; if((uint32_t)dstva < UTOP && (dstva != ROUNDDOWN(dstva, PGSIZE))) return -E_INVAL;
// Try to send 'value' to the target env 'envid'. // If srcva < UTOP, then also send page currently mapped at 'srcva', // so that receiver gets a duplicate mapping of the same page. // // The send fails with a return value of -E_IPC_NOT_RECV if the // target is not blocked, waiting for an IPC. // // The send also can fail for the other reasons listed below. // // Otherwise, the send succeeds, and the target's ipc fields are // updated as follows: // env_ipc_recving is set to 0 to block future sends; // env_ipc_from is set to the sending envid; // env_ipc_value is set to the 'value' parameter; // env_ipc_perm is set to 'perm' if a page was transferred, 0 otherwise. // The target environment is marked runnable again, returning 0 // from the paused sys_ipc_recv system call. (Hint: does the // sys_ipc_recv function ever actually return?) // // If the sender wants to send a page but the receiver isn't asking for one, // then no page mapping is transferred, but no error occurs. // The ipc only happens when no errors occur. // // Returns 0 on success, < 0 on error. // Errors are: // -E_BAD_ENV if environment envid doesn't currently exist. // (No need to check permissions.) // -E_IPC_NOT_RECV if envid is not currently blocked in sys_ipc_recv, // or another environment managed to send first. // -E_INVAL if srcva < UTOP but srcva is not page-aligned. // -E_INVAL if srcva < UTOP and perm is inappropriate // (see sys_page_alloc). // -E_INVAL if srcva < UTOP but srcva is not mapped in the caller's // address space. // -E_INVAL if (perm & PTE_W), but srcva is read-only in the // current environment's address space. // -E_NO_MEM if there's not enough memory to map srcva in envid's // address space. staticint sys_ipc_try_send(envid_t envid, uint32_t value, void *srcva, unsigned perm) { // LAB 4: Your code here. structEnv* dstenv, * srcenv; if(envid2env(envid, &dstenv, 0) || envid2env(0, &srcenv, 0)) return -E_BAD_ENV; if(!dstenv->env_ipc_recving) return -E_IPC_NOT_RECV;
// Receive a value via IPC and return it. // If 'pg' is nonnull, then any page sent by the sender will be mapped at // that address. // If 'from_env_store' is nonnull, then store the IPC sender's envid in // *from_env_store. // If 'perm_store' is nonnull, then store the IPC sender's page permission // in *perm_store (this is nonzero iff a page was successfully // transferred to 'pg'). // If the system call fails, then store 0 in *fromenv and *perm (if // they're nonnull) and return the error. // Otherwise, return the value sent by the sender // // Hint: // Use 'thisenv' to discover the value and who sent it. // If 'pg' is null, pass sys_ipc_recv a value that it will understand // as meaning "no page". (Zero is not the right value, since that's // a perfectly valid place to map a page.) int32_t ipc_recv(envid_t *from_env_store, void *pg, int *perm_store) { // LAB 4: Your code here. int r;
// Send 'val' (and 'pg' with 'perm', if 'pg' is nonnull) to 'toenv'. // This function keeps trying until it succeeds. // It should panic() on any error other than -E_IPC_NOT_RECV. // // Hint: // Use sys_yield() to be CPU-friendly. // If 'pg' is null, pass sys_ipc_try_send a value that it will understand // as meaning "no page". (Zero is not the right value.) void ipc_send(envid_t to_env, uint32_t val, void *pg, int perm) { // LAB 4: Your code here. int r; do{ sys_yield(); r = sys_ipc_try_send(to_env, val, pg ? pg : (void *)UTOP, perm); if(r != 0 && r != -E_IPC_NOT_RECV) panic("ipc_send: faild, %e", r); }while(r); }
实现完了之后lab4的基础内容就已经结束了,执行make grade可以得到如下的输出:
1 2 3 4 5 6 7 8 9 10 11
spin: OK (1.8s) stresssched: OK (3.2s) sendpage: OK (0.9s) (Old jos.out.sendpage failure log removed) pingpong: OK (1.9s) (Old jos.out.pingpong failure log removed) primes: OK (9.1s) (Old jos.out.primes failure log removed) Part C score: 25/25
enabled interrupts: 1 2 [00000000] new env 00001000 [00001000] new env 00001001 i am 00001000; thisenv is 0xeec00000 send 0 from 1000 to 1001 1001 got 0 from 1000 (thisenv is 0xeec0007c 1001) 1000 got 1 from 1001 (thisenv is 0xeec00000 1000) 1001 got 2 from 1000 (thisenv is 0xeec0007c 1001) 1000 got 3 from 1001 (thisenv is 0xeec00000 1000) 1001 got 4 from 1000 (thisenv is 0xeec0007c 1001) 1000 got 5 from 1001 (thisenv is 0xeec00000 1000) 1001 got 6 from 1000 (thisenv is 0xeec0007c 1001) 1000 got 7 from 1001 (thisenv is 0xeec00000 1000) 1001 got 8 from 1000 (thisenv is 0xeec0007c 1001) 1000 got 9 from 1001 (thisenv is 0xeec00000 1000) [00001000] exiting gracefully [00001000] free env 00001000 1001 got 10 from 1000 (thisenv is 0xeec0007c 1001) [00001001] exiting gracefully [00001001] free env 00001001
可以发现实际上两个进程确实是共享了地址空间,并且thisenv能够正确的指向进程自身了。
如果将其中的sfork()修改成fork()的话,得到的输出如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
enabled interrupts: 1 2 [00000000] new env 00001000 [00001000] new env 00001001 i am 00001000; thisenv is 0xeec00000 send 0 from 1000 to 1001 1001 got 0 from 1000 (thisenv is 0xeec0007c 1001) 1000 got 0 from 1001 (thisenv is 0xeec00000 1000) 1001 got 1 from 1000 (thisenv is 0xeec0007c 1001) 1000 got 1 from 1001 (thisenv is 0xeec00000 1000) 1001 got 2 from 1000 (thisenv is 0xeec0007c 1001) 1000 got 2 from 1001 (thisenv is 0xeec00000 1000) 1001 got 3 from 1000 (thisenv is 0xeec0007c 1001) 1000 got 3 from 1001 (thisenv is 0xeec00000 1000) 1001 got 4 from 1000 (thisenv is 0xeec0007c 1001) 1000 got 4 from 1001 (thisenv is 0xeec00000 1000) ...