diff options
Diffstat (limited to 'arch/i386/kernel')
62 files changed, 1391 insertions, 1159 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 4cc83b322b36..f10de0f2c5e6 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -7,11 +7,11 @@ extra-y := head.o init_task.o vmlinux.lds obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ - doublefault.o quirks.o + doublefault.o quirks.o i8237.o obj-y += cpu/ obj-y += timers/ -obj-$(CONFIG_ACPI_BOOT) += acpi/ +obj-$(CONFIG_ACPI) += acpi/ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca.o obj-$(CONFIG_X86_MSR) += msr.o diff --git a/arch/i386/kernel/acpi/Makefile b/arch/i386/kernel/acpi/Makefile index ee75cb286cfe..267ca48e1b6c 100644 --- a/arch/i386/kernel/acpi/Makefile +++ b/arch/i386/kernel/acpi/Makefile @@ -1,4 +1,8 @@ -obj-$(CONFIG_ACPI_BOOT) := boot.o +obj-y := boot.o obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o +ifneq ($(CONFIG_ACPI_PROCESSOR),) +obj-y += cstate.o +endif + diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index b7808a89d945..a63351c085c6 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -40,19 +40,25 @@ #ifdef CONFIG_X86_64 -static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id) { } +static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ +} extern void __init clustered_apic_check(void); -static inline int ioapic_setup_disabled(void) { return 0; } +static inline int ioapic_setup_disabled(void) +{ + return 0; +} + #include <asm/proto.h> -#else /* X86 */ +#else /* X86 */ #ifdef CONFIG_X86_LOCAL_APIC #include <mach_apic.h> #include <mach_mpparse.h> -#endif /* CONFIG_X86_LOCAL_APIC */ +#endif /* CONFIG_X86_LOCAL_APIC */ -#endif /* X86 */ +#endif /* X86 */ #define BAD_MADT_ENTRY(entry, end) ( \ (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ @@ -60,13 +66,8 @@ static inline int ioapic_setup_disabled(void) { return 0; } #define PREFIX "ACPI: " -#ifdef CONFIG_ACPI_PCI int acpi_noirq __initdata; /* skip ACPI IRQ initialization */ -int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization */ -#else -int acpi_noirq __initdata = 1; -int acpi_pci_disabled __initdata = 1; -#endif +int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization */ int acpi_ht __initdata = 1; /* enable HT */ int acpi_lapic; @@ -88,7 +89,7 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; #define MAX_MADT_ENTRIES 256 u8 x86_acpiid_to_apicid[MAX_MADT_ENTRIES] = - { [0 ... MAX_MADT_ENTRIES-1] = 0xff }; + {[0 ... MAX_MADT_ENTRIES - 1] = 0xff }; EXPORT_SYMBOL(x86_acpiid_to_apicid); /* -------------------------------------------------------------------------- @@ -99,7 +100,7 @@ EXPORT_SYMBOL(x86_acpiid_to_apicid); * The default interrupt routing model is PIC (8259). This gets * overriden if IOAPICs are enumerated (below). */ -enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; +enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; #ifdef CONFIG_X86_64 @@ -107,7 +108,7 @@ enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; char *__acpi_map_table(unsigned long phys_addr, unsigned long size) { if (!phys_addr || !size) - return NULL; + return NULL; if (phys_addr < (end_pfn_map << PAGE_SHIFT)) return __va(phys_addr); @@ -134,8 +135,8 @@ char *__acpi_map_table(unsigned long phys, unsigned long size) unsigned long base, offset, mapped_size; int idx; - if (phys + size < 8*1024*1024) - return __va(phys); + if (phys + size < 8 * 1024 * 1024) + return __va(phys); offset = phys & (PAGE_SIZE - 1); mapped_size = PAGE_SIZE - offset; @@ -154,7 +155,7 @@ char *__acpi_map_table(unsigned long phys, unsigned long size) mapped_size += PAGE_SIZE; } - return ((unsigned char *) base + offset); + return ((unsigned char *)base + offset); } #endif @@ -172,7 +173,7 @@ int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size) if (!phys_addr || !size) return -EINVAL; - mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr, size); + mcfg = (struct acpi_table_mcfg *)__acpi_map_table(phys_addr, size); if (!mcfg) { printk(KERN_WARNING PREFIX "Unable to map MCFG\n"); return -ENODEV; @@ -209,20 +210,17 @@ int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size) return 0; } -#endif /* CONFIG_PCI_MMCONFIG */ +#endif /* CONFIG_PCI_MMCONFIG */ #ifdef CONFIG_X86_LOCAL_APIC -static int __init -acpi_parse_madt ( - unsigned long phys_addr, - unsigned long size) +static int __init acpi_parse_madt(unsigned long phys_addr, unsigned long size) { - struct acpi_table_madt *madt = NULL; + struct acpi_table_madt *madt = NULL; if (!phys_addr || !size) return -EINVAL; - madt = (struct acpi_table_madt *) __acpi_map_table(phys_addr, size); + madt = (struct acpi_table_madt *)__acpi_map_table(phys_addr, size); if (!madt) { printk(KERN_WARNING PREFIX "Unable to map MADT\n"); return -ENODEV; @@ -232,22 +230,20 @@ acpi_parse_madt ( acpi_lapic_addr = (u64) madt->lapic_address; printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n", - madt->lapic_address); + madt->lapic_address); } acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id); - + return 0; } - static int __init -acpi_parse_lapic ( - acpi_table_entry_header *header, const unsigned long end) +acpi_parse_lapic(acpi_table_entry_header * header, const unsigned long end) { - struct acpi_table_lapic *processor = NULL; + struct acpi_table_lapic *processor = NULL; - processor = (struct acpi_table_lapic*) header; + processor = (struct acpi_table_lapic *)header; if (BAD_MADT_ENTRY(processor, end)) return -EINVAL; @@ -260,20 +256,19 @@ acpi_parse_lapic ( x86_acpiid_to_apicid[processor->acpi_id] = processor->id; - mp_register_lapic ( - processor->id, /* APIC ID */ - processor->flags.enabled); /* Enabled? */ + mp_register_lapic(processor->id, /* APIC ID */ + processor->flags.enabled); /* Enabled? */ return 0; } static int __init -acpi_parse_lapic_addr_ovr ( - acpi_table_entry_header *header, const unsigned long end) +acpi_parse_lapic_addr_ovr(acpi_table_entry_header * header, + const unsigned long end) { struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL; - lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr*) header; + lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr *)header; if (BAD_MADT_ENTRY(lapic_addr_ovr, end)) return -EINVAL; @@ -284,12 +279,11 @@ acpi_parse_lapic_addr_ovr ( } static int __init -acpi_parse_lapic_nmi ( - acpi_table_entry_header *header, const unsigned long end) +acpi_parse_lapic_nmi(acpi_table_entry_header * header, const unsigned long end) { struct acpi_table_lapic_nmi *lapic_nmi = NULL; - lapic_nmi = (struct acpi_table_lapic_nmi*) header; + lapic_nmi = (struct acpi_table_lapic_nmi *)header; if (BAD_MADT_ENTRY(lapic_nmi, end)) return -EINVAL; @@ -302,37 +296,32 @@ acpi_parse_lapic_nmi ( return 0; } +#endif /*CONFIG_X86_LOCAL_APIC */ -#endif /*CONFIG_X86_LOCAL_APIC*/ - -#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) +#ifdef CONFIG_X86_IO_APIC static int __init -acpi_parse_ioapic ( - acpi_table_entry_header *header, const unsigned long end) +acpi_parse_ioapic(acpi_table_entry_header * header, const unsigned long end) { struct acpi_table_ioapic *ioapic = NULL; - ioapic = (struct acpi_table_ioapic*) header; + ioapic = (struct acpi_table_ioapic *)header; if (BAD_MADT_ENTRY(ioapic, end)) return -EINVAL; - + acpi_table_print_madt_entry(header); - mp_register_ioapic ( - ioapic->id, - ioapic->address, - ioapic->global_irq_base); - + mp_register_ioapic(ioapic->id, + ioapic->address, ioapic->global_irq_base); + return 0; } /* * Parse Interrupt Source Override for the ACPI SCI */ -static void -acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) +static void acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) { if (trigger == 0) /* compatible SCI trigger is level */ trigger = 3; @@ -348,7 +337,7 @@ acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) polarity = acpi_sci_flags.polarity; /* - * mp_config_acpi_legacy_irqs() already setup IRQs < 16 + * mp_config_acpi_legacy_irqs() already setup IRQs < 16 * If GSI is < 16, this will update its flags, * else it will create a new mp_irqs[] entry. */ @@ -363,12 +352,12 @@ acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) } static int __init -acpi_parse_int_src_ovr ( - acpi_table_entry_header *header, const unsigned long end) +acpi_parse_int_src_ovr(acpi_table_entry_header * header, + const unsigned long end) { struct acpi_table_int_src_ovr *intsrc = NULL; - intsrc = (struct acpi_table_int_src_ovr*) header; + intsrc = (struct acpi_table_int_src_ovr *)header; if (BAD_MADT_ENTRY(intsrc, end)) return -EINVAL; @@ -377,33 +366,30 @@ acpi_parse_int_src_ovr ( if (intsrc->bus_irq == acpi_fadt.sci_int) { acpi_sci_ioapic_setup(intsrc->global_irq, - intsrc->flags.polarity, intsrc->flags.trigger); + intsrc->flags.polarity, + intsrc->flags.trigger); return 0; } if (acpi_skip_timer_override && - intsrc->bus_irq == 0 && intsrc->global_irq == 2) { - printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n"); - return 0; + intsrc->bus_irq == 0 && intsrc->global_irq == 2) { + printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n"); + return 0; } - mp_override_legacy_irq ( - intsrc->bus_irq, - intsrc->flags.polarity, - intsrc->flags.trigger, - intsrc->global_irq); + mp_override_legacy_irq(intsrc->bus_irq, + intsrc->flags.polarity, + intsrc->flags.trigger, intsrc->global_irq); return 0; } - static int __init -acpi_parse_nmi_src ( - acpi_table_entry_header *header, const unsigned long end) +acpi_parse_nmi_src(acpi_table_entry_header * header, const unsigned long end) { struct acpi_table_nmi_src *nmi_src = NULL; - nmi_src = (struct acpi_table_nmi_src*) header; + nmi_src = (struct acpi_table_nmi_src *)header; if (BAD_MADT_ENTRY(nmi_src, end)) return -EINVAL; @@ -415,9 +401,7 @@ acpi_parse_nmi_src ( return 0; } -#endif /* CONFIG_X86_IO_APIC */ - -#ifdef CONFIG_ACPI_BUS +#endif /* CONFIG_X86_IO_APIC */ /* * acpi_pic_sci_set_trigger() @@ -433,8 +417,7 @@ acpi_parse_nmi_src ( * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0) */ -void __init -acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) +void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) { unsigned int mask = 1 << irq; unsigned int old, new; @@ -454,10 +437,10 @@ acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) * routing tables.. */ switch (trigger) { - case 1: /* Edge - clear */ + case 1: /* Edge - clear */ new &= ~mask; break; - case 3: /* Level - set */ + case 3: /* Level - set */ new |= mask; break; } @@ -470,21 +453,22 @@ acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) outb(new >> 8, 0x4d1); } - -#endif /* CONFIG_ACPI_BUS */ - int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) { #ifdef CONFIG_X86_IO_APIC if (use_pci_vector() && !platform_legacy_irq(gsi)) - *irq = IO_APIC_VECTOR(gsi); + *irq = IO_APIC_VECTOR(gsi); else #endif *irq = gsi; return 0; } -unsigned int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low) +/* + * success: return IRQ number (>=0) + * failure: return < 0 + */ +int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low) { unsigned int irq; unsigned int plat_gsi = gsi; @@ -497,7 +481,7 @@ unsigned int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low) extern void eisa_set_level_irq(unsigned int irq); if (edge_level == ACPI_LEVEL_SENSITIVE) - eisa_set_level_irq(gsi); + eisa_set_level_irq(gsi); } #endif @@ -509,60 +493,58 @@ unsigned int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low) acpi_gsi_to_irq(plat_gsi, &irq); return irq; } + EXPORT_SYMBOL(acpi_register_gsi); /* * ACPI based hotplug support for CPU */ #ifdef CONFIG_ACPI_HOTPLUG_CPU -int -acpi_map_lsapic(acpi_handle handle, int *pcpu) +int acpi_map_lsapic(acpi_handle handle, int *pcpu) { /* TBD */ return -EINVAL; } -EXPORT_SYMBOL(acpi_map_lsapic); +EXPORT_SYMBOL(acpi_map_lsapic); -int -acpi_unmap_lsapic(int cpu) +int acpi_unmap_lsapic(int cpu) { /* TBD */ return -EINVAL; } + EXPORT_SYMBOL(acpi_unmap_lsapic); -#endif /* CONFIG_ACPI_HOTPLUG_CPU */ +#endif /* CONFIG_ACPI_HOTPLUG_CPU */ -int -acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base) +int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base) { /* TBD */ return -EINVAL; } + EXPORT_SYMBOL(acpi_register_ioapic); -int -acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base) +int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base) { /* TBD */ return -EINVAL; } + EXPORT_SYMBOL(acpi_unregister_ioapic); static unsigned long __init -acpi_scan_rsdp ( - unsigned long start, - unsigned long length) +acpi_scan_rsdp(unsigned long start, unsigned long length) { - unsigned long offset = 0; - unsigned long sig_len = sizeof("RSD PTR ") - 1; + unsigned long offset = 0; + unsigned long sig_len = sizeof("RSD PTR ") - 1; /* * Scan all 16-byte boundaries of the physical memory region for the * RSDP signature. */ for (offset = 0; offset < length; offset += 16) { - if (strncmp((char *) (start + offset), "RSD PTR ", sig_len)) + if (strncmp((char *)(start + offset), "RSD PTR ", sig_len)) continue; return (start + offset); } @@ -575,20 +557,19 @@ static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size) struct acpi_table_sbf *sb; if (!phys_addr || !size) - return -EINVAL; + return -EINVAL; - sb = (struct acpi_table_sbf *) __acpi_map_table(phys_addr, size); + sb = (struct acpi_table_sbf *)__acpi_map_table(phys_addr, size); if (!sb) { printk(KERN_WARNING PREFIX "Unable to map SBF\n"); return -ENODEV; } - sbf_port = sb->sbf_cmos; /* Save CMOS port */ + sbf_port = sb->sbf_cmos; /* Save CMOS port */ return 0; } - #ifdef CONFIG_HPET_TIMER static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) @@ -598,7 +579,7 @@ static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) if (!phys || !size) return -EINVAL; - hpet_tbl = (struct acpi_table_hpet *) __acpi_map_table(phys, size); + hpet_tbl = (struct acpi_table_hpet *)__acpi_map_table(phys, size); if (!hpet_tbl) { printk(KERN_WARNING PREFIX "Unable to map HPET\n"); return -ENODEV; @@ -609,22 +590,21 @@ static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) "memory.\n"); return -1; } - #ifdef CONFIG_X86_64 - vxtime.hpet_address = hpet_tbl->addr.addrl | - ((long) hpet_tbl->addr.addrh << 32); + vxtime.hpet_address = hpet_tbl->addr.addrl | + ((long)hpet_tbl->addr.addrh << 32); - printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", - hpet_tbl->id, vxtime.hpet_address); -#else /* X86 */ + printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", + hpet_tbl->id, vxtime.hpet_address); +#else /* X86 */ { extern unsigned long hpet_address; hpet_address = hpet_tbl->addr.addrl; printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", - hpet_tbl->id, hpet_address); + hpet_tbl->id, hpet_address); } -#endif /* X86 */ +#endif /* X86 */ return 0; } @@ -640,28 +620,25 @@ static int __init acpi_parse_fadt(unsigned long phys, unsigned long size) { struct fadt_descriptor_rev2 *fadt = NULL; - fadt = (struct fadt_descriptor_rev2*) __acpi_map_table(phys,size); - if(!fadt) { + fadt = (struct fadt_descriptor_rev2 *)__acpi_map_table(phys, size); + if (!fadt) { printk(KERN_WARNING PREFIX "Unable to map FADT\n"); return 0; } - -#ifdef CONFIG_ACPI_INTERPRETER /* initialize sci_int early for INT_SRC_OVR MADT parsing */ acpi_fadt.sci_int = fadt->sci_int; -#endif -#ifdef CONFIG_ACPI_BUS /* initialize rev and apic_phys_dest_mode for x86_64 genapic */ acpi_fadt.revision = fadt->revision; - acpi_fadt.force_apic_physical_destination_mode = fadt->force_apic_physical_destination_mode; -#endif + acpi_fadt.force_apic_physical_destination_mode = + fadt->force_apic_physical_destination_mode; #ifdef CONFIG_X86_PM_TIMER /* detect the location of the ACPI PM Timer */ if (fadt->revision >= FADT2_REVISION_ID) { /* FADT rev. 2 */ - if (fadt->xpm_tmr_blk.address_space_id != ACPI_ADR_SPACE_SYSTEM_IO) + if (fadt->xpm_tmr_blk.address_space_id != + ACPI_ADR_SPACE_SYSTEM_IO) return 0; pmtmr_ioport = fadt->xpm_tmr_blk.address; @@ -670,16 +647,15 @@ static int __init acpi_parse_fadt(unsigned long phys, unsigned long size) pmtmr_ioport = fadt->V1_pm_tmr_blk; } if (pmtmr_ioport) - printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", pmtmr_ioport); + printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", + pmtmr_ioport); #endif return 0; } - -unsigned long __init -acpi_find_rsdp (void) +unsigned long __init acpi_find_rsdp(void) { - unsigned long rsdp_phys = 0; + unsigned long rsdp_phys = 0; if (efi_enabled) { if (efi.acpi20) @@ -691,9 +667,9 @@ acpi_find_rsdp (void) * Scan memory looking for the RSDP signature. First search EBDA (low * memory) paragraphs and then search upper memory (E0000-FFFFF). */ - rsdp_phys = acpi_scan_rsdp (0, 0x400); + rsdp_phys = acpi_scan_rsdp(0, 0x400); if (!rsdp_phys) - rsdp_phys = acpi_scan_rsdp (0xE0000, 0x20000); + rsdp_phys = acpi_scan_rsdp(0xE0000, 0x20000); return rsdp_phys; } @@ -703,8 +679,7 @@ acpi_find_rsdp (void) * Parse LAPIC entries in MADT * returns 0 on success, < 0 on error */ -static int __init -acpi_parse_madt_lapic_entries(void) +static int __init acpi_parse_madt_lapic_entries(void) { int count; @@ -713,28 +688,31 @@ acpi_parse_madt_lapic_entries(void) * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). */ - count = acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0); + count = + acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, + acpi_parse_lapic_addr_ovr, 0); if (count < 0) { - printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n"); + printk(KERN_ERR PREFIX + "Error parsing LAPIC address override entry\n"); return count; } mp_register_lapic_address(acpi_lapic_addr); count = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic, - MAX_APICS); - if (!count) { + MAX_APICS); + if (!count) { printk(KERN_ERR PREFIX "No LAPIC entries present\n"); /* TBD: Cleanup to allow fallback to MPS */ return -ENODEV; - } - else if (count < 0) { + } else if (count < 0) { printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n"); /* TBD: Cleanup to allow fallback to MPS */ return count; } - count = acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0); + count = + acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0); if (count < 0) { printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); /* TBD: Cleanup to allow fallback to MPS */ @@ -742,15 +720,14 @@ acpi_parse_madt_lapic_entries(void) } return 0; } -#endif /* CONFIG_X86_LOCAL_APIC */ +#endif /* CONFIG_X86_LOCAL_APIC */ -#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) +#ifdef CONFIG_X86_IO_APIC /* * Parse IOAPIC related entries in MADT * returns 0 on success, < 0 on error */ -static int __init -acpi_parse_madt_ioapic_entries(void) +static int __init acpi_parse_madt_ioapic_entries(void) { int count; @@ -762,30 +739,34 @@ acpi_parse_madt_ioapic_entries(void) */ if (acpi_disabled || acpi_noirq) { return -ENODEV; - } + } /* - * if "noapic" boot option, don't look for IO-APICs + * if "noapic" boot option, don't look for IO-APICs */ if (skip_ioapic_setup) { printk(KERN_INFO PREFIX "Skipping IOAPIC probe " - "due to 'noapic' option.\n"); + "due to 'noapic' option.\n"); return -ENODEV; } - count = acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic, MAX_IO_APICS); + count = + acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic, + MAX_IO_APICS); if (!count) { printk(KERN_ERR PREFIX "No IOAPIC entries present\n"); return -ENODEV; - } - else if (count < 0) { + } else if (count < 0) { printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n"); return count; } - count = acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, NR_IRQ_VECTORS); + count = + acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, + NR_IRQ_VECTORS); if (count < 0) { - printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n"); + printk(KERN_ERR PREFIX + "Error parsing interrupt source overrides entry\n"); /* TBD: Cleanup to allow fallback to MPS */ return count; } @@ -800,7 +781,9 @@ acpi_parse_madt_ioapic_entries(void) /* Fill in identity legacy mapings where no override */ mp_config_acpi_legacy_irqs(); - count = acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, NR_IRQ_VECTORS); + count = + acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, + NR_IRQ_VECTORS); if (count < 0) { printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); /* TBD: Cleanup to allow fallback to MPS */ @@ -814,11 +797,9 @@ static inline int acpi_parse_madt_ioapic_entries(void) { return -1; } -#endif /* !(CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER) */ - +#endif /* !CONFIG_X86_IO_APIC */ -static void __init -acpi_process_madt(void) +static void __init acpi_process_madt(void) { #ifdef CONFIG_X86_LOCAL_APIC int count, error; @@ -833,6 +814,9 @@ acpi_process_madt(void) if (!error) { acpi_lapic = 1; +#ifdef CONFIG_X86_GENERICARCH + generic_bigsmp_probe(); +#endif /* * Parse MADT IO-APIC entries */ @@ -850,7 +834,8 @@ acpi_process_madt(void) /* * Dell Precision Workstation 410, 610 come here. */ - printk(KERN_ERR PREFIX "Invalid BIOS MADT, disabling ACPI\n"); + printk(KERN_ERR PREFIX + "Invalid BIOS MADT, disabling ACPI\n"); disable_acpi(); } } @@ -862,7 +847,6 @@ extern int acpi_force; #ifdef __i386__ -#ifdef CONFIG_ACPI_PCI static int __init disable_acpi_irq(struct dmi_system_id *d) { if (!acpi_force) { @@ -882,12 +866,11 @@ static int __init disable_acpi_pci(struct dmi_system_id *d) } return 0; } -#endif static int __init dmi_disable_acpi(struct dmi_system_id *d) { if (!acpi_force) { - printk(KERN_NOTICE "%s detected: acpi off\n",d->ident); + printk(KERN_NOTICE "%s detected: acpi off\n", d->ident); disable_acpi(); } else { printk(KERN_NOTICE @@ -902,7 +885,8 @@ static int __init dmi_disable_acpi(struct dmi_system_id *d) static int __init force_acpi_ht(struct dmi_system_id *d) { if (!acpi_force) { - printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", d->ident); + printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", + d->ident); disable_acpi(); acpi_ht = 1; } else { @@ -921,155 +905,155 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { * Boxes that need ACPI disabled */ { - .callback = dmi_disable_acpi, - .ident = "IBM Thinkpad", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), - DMI_MATCH(DMI_BOARD_NAME, "2629H1G"), - }, - }, + .callback = dmi_disable_acpi, + .ident = "IBM Thinkpad", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "2629H1G"), + }, + }, /* * Boxes that need acpi=ht */ { - .callback = force_acpi_ht, - .ident = "FSC Primergy T850", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), - DMI_MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"), - }, - }, + .callback = force_acpi_ht, + .ident = "FSC Primergy T850", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"), + }, + }, { - .callback = force_acpi_ht, - .ident = "DELL GX240", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_BOARD_NAME, "OptiPlex GX240"), - }, - }, + .callback = force_acpi_ht, + .ident = "DELL GX240", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"), + DMI_MATCH(DMI_BOARD_NAME, "OptiPlex GX240"), + }, + }, { - .callback = force_acpi_ht, - .ident = "HP VISUALIZE NT Workstation", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"), - }, - }, + .callback = force_acpi_ht, + .ident = "HP VISUALIZE NT Workstation", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"), + }, + }, { - .callback = force_acpi_ht, - .ident = "Compaq Workstation W8000", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Compaq"), - DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"), - }, - }, + .callback = force_acpi_ht, + .ident = "Compaq Workstation W8000", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Compaq"), + DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"), + }, + }, { - .callback = force_acpi_ht, - .ident = "ASUS P4B266", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - DMI_MATCH(DMI_BOARD_NAME, "P4B266"), - }, - }, + .callback = force_acpi_ht, + .ident = "ASUS P4B266", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "P4B266"), + }, + }, { - .callback = force_acpi_ht, - .ident = "ASUS P2B-DS", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"), - }, - }, + .callback = force_acpi_ht, + .ident = "ASUS P2B-DS", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"), + }, + }, { - .callback = force_acpi_ht, - .ident = "ASUS CUR-DLS", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - DMI_MATCH(DMI_BOARD_NAME, "CUR-DLS"), - }, - }, + .callback = force_acpi_ht, + .ident = "ASUS CUR-DLS", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "CUR-DLS"), + }, + }, { - .callback = force_acpi_ht, - .ident = "ABIT i440BX-W83977", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"), - DMI_MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"), - }, - }, + .callback = force_acpi_ht, + .ident = "ABIT i440BX-W83977", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"), + DMI_MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"), + }, + }, { - .callback = force_acpi_ht, - .ident = "IBM Bladecenter", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), - DMI_MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"), - }, - }, + .callback = force_acpi_ht, + .ident = "IBM Bladecenter", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"), + }, + }, { - .callback = force_acpi_ht, - .ident = "IBM eServer xSeries 360", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), - DMI_MATCH(DMI_BOARD_NAME, "eServer xSeries 360"), - }, - }, + .callback = force_acpi_ht, + .ident = "IBM eServer xSeries 360", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "eServer xSeries 360"), + }, + }, { - .callback = force_acpi_ht, - .ident = "IBM eserver xSeries 330", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), - DMI_MATCH(DMI_BOARD_NAME, "eserver xSeries 330"), - }, - }, + .callback = force_acpi_ht, + .ident = "IBM eserver xSeries 330", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "eserver xSeries 330"), + }, + }, { - .callback = force_acpi_ht, - .ident = "IBM eserver xSeries 440", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), - DMI_MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"), - }, - }, - -#ifdef CONFIG_ACPI_PCI + .callback = force_acpi_ht, + .ident = "IBM eserver xSeries 440", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"), + }, + }, + /* * Boxes that need ACPI PCI IRQ routing disabled */ { - .callback = disable_acpi_irq, - .ident = "ASUS A7V", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"), - DMI_MATCH(DMI_BOARD_NAME, "<A7V>"), - /* newer BIOS, Revision 1011, does work */ - DMI_MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"), - }, - }, + .callback = disable_acpi_irq, + .ident = "ASUS A7V", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"), + DMI_MATCH(DMI_BOARD_NAME, "<A7V>"), + /* newer BIOS, Revision 1011, does work */ + DMI_MATCH(DMI_BIOS_VERSION, + "ASUS A7V ACPI BIOS Revision 1007"), + }, + }, /* * Boxes that need ACPI PCI IRQ routing and PCI scan disabled */ - { /* _BBN 0 bug */ - .callback = disable_acpi_pci, - .ident = "ASUS PR-DLS", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - DMI_MATCH(DMI_BOARD_NAME, "PR-DLS"), - DMI_MATCH(DMI_BIOS_VERSION, "ASUS PR-DLS ACPI BIOS Revision 1010"), - DMI_MATCH(DMI_BIOS_DATE, "03/21/2003") - }, - }, + { /* _BBN 0 bug */ + .callback = disable_acpi_pci, + .ident = "ASUS PR-DLS", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "PR-DLS"), + DMI_MATCH(DMI_BIOS_VERSION, + "ASUS PR-DLS ACPI BIOS Revision 1010"), + DMI_MATCH(DMI_BIOS_DATE, "03/21/2003") + }, + }, { - .callback = disable_acpi_pci, - .ident = "Acer TravelMate 36x Laptop", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Acer"), - DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), - }, - }, -#endif - { } + .callback = disable_acpi_pci, + .ident = "Acer TravelMate 36x Laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), + }, + }, + {} }; -#endif /* __i386__ */ +#endif /* __i386__ */ /* * acpi_boot_table_init() and acpi_boot_init() @@ -1094,8 +1078,7 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { * !0: failure */ -int __init -acpi_boot_table_init(void) +int __init acpi_boot_table_init(void) { int error; @@ -1108,7 +1091,7 @@ acpi_boot_table_init(void) * One exception: acpi=ht continues far enough to enumerate LAPICs */ if (acpi_disabled && !acpi_ht) - return 1; + return 1; /* * Initialize the ACPI boot-time table parser. @@ -1118,7 +1101,6 @@ acpi_boot_table_init(void) disable_acpi(); return error; } - #ifdef __i386__ check_acpi_pci(); #endif @@ -1142,7 +1124,6 @@ acpi_boot_table_init(void) return 0; } - int __init acpi_boot_init(void) { /* @@ -1150,7 +1131,7 @@ int __init acpi_boot_init(void) * One exception: acpi=ht continues far enough to enumerate LAPICs */ if (acpi_disabled && !acpi_ht) - return 1; + return 1; acpi_table_parse(ACPI_BOOT, acpi_parse_sbf); @@ -1168,4 +1149,3 @@ int __init acpi_boot_init(void) return 0; } - diff --git a/arch/i386/kernel/acpi/cstate.c b/arch/i386/kernel/acpi/cstate.c new file mode 100644 index 000000000000..4c3036ba65df --- /dev/null +++ b/arch/i386/kernel/acpi/cstate.c @@ -0,0 +1,103 @@ +/* + * arch/i386/kernel/acpi/cstate.c + * + * Copyright (C) 2005 Intel Corporation + * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> + * - Added _PDC for SMP C-states on Intel CPUs + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/acpi.h> + +#include <acpi/processor.h> +#include <asm/acpi.h> + +static void acpi_processor_power_init_intel_pdc(struct acpi_processor_power + *pow) +{ + struct acpi_object_list *obj_list; + union acpi_object *obj; + u32 *buf; + + /* allocate and initialize pdc. It will be used later. */ + obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL); + if (!obj_list) { + printk(KERN_ERR "Memory allocation error\n"); + return; + } + + obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL); + if (!obj) { + printk(KERN_ERR "Memory allocation error\n"); + kfree(obj_list); + return; + } + + buf = kmalloc(12, GFP_KERNEL); + if (!buf) { + printk(KERN_ERR "Memory allocation error\n"); + kfree(obj); + kfree(obj_list); + return; + } + + buf[0] = ACPI_PDC_REVISION_ID; + buf[1] = 1; + buf[2] = ACPI_PDC_C_CAPABILITY_SMP; + + obj->type = ACPI_TYPE_BUFFER; + obj->buffer.length = 12; + obj->buffer.pointer = (u8 *) buf; + obj_list->count = 1; + obj_list->pointer = obj; + pow->pdc = obj_list; + + return; +} + +/* Initialize _PDC data based on the CPU vendor */ +void acpi_processor_power_init_pdc(struct acpi_processor_power *pow, + unsigned int cpu) +{ + struct cpuinfo_x86 *c = cpu_data + cpu; + + pow->pdc = NULL; + if (c->x86_vendor == X86_VENDOR_INTEL) + acpi_processor_power_init_intel_pdc(pow); + + return; +} + +EXPORT_SYMBOL(acpi_processor_power_init_pdc); + +/* + * Initialize bm_flags based on the CPU cache properties + * On SMP it depends on cache configuration + * - When cache is not shared among all CPUs, we flush cache + * before entering C3. + * - When cache is shared among all CPUs, we use bm_check + * mechanism as in UP case + * + * This routine is called only after all the CPUs are online + */ +void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, + unsigned int cpu) +{ + struct cpuinfo_x86 *c = cpu_data + cpu; + + flags->bm_check = 0; + if (num_online_cpus() == 1) + flags->bm_check = 1; + else if (c->x86_vendor == X86_VENDOR_INTEL) { + /* + * Today all CPUs that support C3 share cache. + * TBD: This needs to look at cache shared map, once + * multi-core detection patch makes to the base. + */ + flags->bm_check = 1; + } +} + +EXPORT_SYMBOL(acpi_processor_power_init_bm_check); diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c index 726a5ca4b165..1ae2aeeda18b 100644 --- a/arch/i386/kernel/acpi/earlyquirk.c +++ b/arch/i386/kernel/acpi/earlyquirk.c @@ -7,45 +7,55 @@ #include <linux/pci.h> #include <asm/pci-direct.h> #include <asm/acpi.h> +#include <asm/apic.h> -static int __init check_bridge(int vendor, int device) +static int __init check_bridge(int vendor, int device) { /* According to Nvidia all timer overrides are bogus. Just ignore them all. */ - if (vendor == PCI_VENDOR_ID_NVIDIA) { - acpi_skip_timer_override = 1; + if (vendor == PCI_VENDOR_ID_NVIDIA) { + acpi_skip_timer_override = 1; } +#ifdef CONFIG_X86_LOCAL_APIC + /* + * ATI IXP chipsets get double timer interrupts. + * For now just do this for all ATI chipsets. + * FIXME: this needs to be checked for the non ACPI case too. + */ + if (vendor == PCI_VENDOR_ID_ATI) + disable_timer_pin_1 = 1; +#endif return 0; } - -void __init check_acpi_pci(void) -{ - int num,slot,func; + +void __init check_acpi_pci(void) +{ + int num, slot, func; /* Assume the machine supports type 1. If not it will always read ffffffff and should not have any side effect. */ /* Poor man's PCI discovery */ - for (num = 0; num < 32; num++) { - for (slot = 0; slot < 32; slot++) { - for (func = 0; func < 8; func++) { + for (num = 0; num < 32; num++) { + for (slot = 0; slot < 32; slot++) { + for (func = 0; func < 8; func++) { u32 class; u32 vendor; - class = read_pci_config(num,slot,func, + class = read_pci_config(num, slot, func, PCI_CLASS_REVISION); if (class == 0xffffffff) - break; + break; if ((class >> 16) != PCI_CLASS_BRIDGE_PCI) - continue; - - vendor = read_pci_config(num, slot, func, + continue; + + vendor = read_pci_config(num, slot, func, PCI_VENDOR_ID); - - if (check_bridge(vendor&0xffff, vendor >> 16)) - return; - } - + + if (check_bridge(vendor & 0xffff, vendor >> 16)) + return; + } + } } } diff --git a/arch/i386/kernel/acpi/sleep.c b/arch/i386/kernel/acpi/sleep.c index c1af93032ff3..1cb2b186a3af 100644 --- a/arch/i386/kernel/acpi/sleep.c +++ b/arch/i386/kernel/acpi/sleep.c @@ -20,12 +20,13 @@ extern void zap_low_mappings(void); extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); -static void init_low_mapping(pgd_t *pgd, int pgd_limit) +static void init_low_mapping(pgd_t * pgd, int pgd_limit) { int pgd_ofs = 0; - while ((pgd_ofs < pgd_limit) && (pgd_ofs + USER_PTRS_PER_PGD < PTRS_PER_PGD)) { - set_pgd(pgd, *(pgd+USER_PTRS_PER_PGD)); + while ((pgd_ofs < pgd_limit) + && (pgd_ofs + USER_PTRS_PER_PGD < PTRS_PER_PGD)) { + set_pgd(pgd, *(pgd + USER_PTRS_PER_PGD)); pgd_ofs++, pgd++; } flush_tlb_all(); @@ -37,12 +38,13 @@ static void init_low_mapping(pgd_t *pgd, int pgd_limit) * Create an identity mapped page table and copy the wakeup routine to * low memory. */ -int acpi_save_state_mem (void) +int acpi_save_state_mem(void) { if (!acpi_wakeup_address) return 1; init_low_mapping(swapper_pg_dir, USER_PTRS_PER_PGD); - memcpy((void *) acpi_wakeup_address, &wakeup_start, &wakeup_end - &wakeup_start); + memcpy((void *)acpi_wakeup_address, &wakeup_start, + &wakeup_end - &wakeup_start); acpi_copy_wakeup_routine(acpi_wakeup_address); return 0; @@ -51,7 +53,7 @@ int acpi_save_state_mem (void) /* * acpi_restore_state - undo effects of acpi_save_state_mem */ -void acpi_restore_state_mem (void) +void acpi_restore_state_mem(void) { zap_low_mappings(); } @@ -67,7 +69,8 @@ void acpi_restore_state_mem (void) void __init acpi_reserve_bootmem(void) { if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) { - printk(KERN_ERR "ACPI: Wakeup code way too big, S3 disabled.\n"); + printk(KERN_ERR + "ACPI: Wakeup code way too big, S3 disabled.\n"); return; } @@ -90,10 +93,8 @@ static int __init acpi_sleep_setup(char *str) return 1; } - __setup("acpi_sleep=", acpi_sleep_setup); - static __init int reset_videomode_after_s3(struct dmi_system_id *d) { acpi_video_flags |= 2; @@ -101,14 +102,14 @@ static __init int reset_videomode_after_s3(struct dmi_system_id *d) } static __initdata struct dmi_system_id acpisleep_dmi_table[] = { - { /* Reset video mode after returning from ACPI S3 sleep */ - .callback = reset_videomode_after_s3, - .ident = "Toshiba Satellite 4030cdt", - .matches = { - DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), - }, - }, - { } + { /* Reset video mode after returning from ACPI S3 sleep */ + .callback = reset_videomode_after_s3, + .ident = "Toshiba Satellite 4030cdt", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), + }, + }, + {} }; static int __init acpisleep_dmi_init(void) diff --git a/arch/i386/kernel/acpi/wakeup.S b/arch/i386/kernel/acpi/wakeup.S index 39d32484f6f5..7c74fe0dc93c 100644 --- a/arch/i386/kernel/acpi/wakeup.S +++ b/arch/i386/kernel/acpi/wakeup.S @@ -74,8 +74,9 @@ wakeup_code: movw %ax,%fs movw $0x0e00 + 'i', %fs:(0x12) - # need a gdt - lgdt real_save_gdt - wakeup_code + # need a gdt -- use lgdtl to force 32-bit operands, in case + # the GDT is located past 16 megabytes. + lgdtl real_save_gdt - wakeup_code movl real_save_cr0 - wakeup_code, %eax movl %eax, %cr0 @@ -303,12 +304,6 @@ ret_point: call restore_processor_state ret -ENTRY(do_suspend_lowlevel_s4bios) - call save_processor_state - call save_registers - call acpi_enter_sleep_state_s4bios - ret - ALIGN # saved registers saved_gdt: .long 0,0 diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index bd1dbf3bd223..a22a866de8f9 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -726,15 +726,11 @@ __setup("apic=", apic_set_verbosity); static int __init detect_init_APIC (void) { u32 h, l, features; - extern void get_cpu_vendor(struct cpuinfo_x86*); /* Disabled by kernel option? */ if (enable_local_apic < 0) return -1; - /* Workaround for us being called before identify_cpu(). */ - get_cpu_vendor(&boot_cpu_data); - switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) || diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 064211d5f41b..d7811c4e8b50 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -911,14 +911,7 @@ static void apm_power_off(void) 0xcd, 0x15 /* int $0x15 */ }; - /* - * This may be called on an SMP machine. - */ -#ifdef CONFIG_SMP /* Some bioses don't like being called from CPU != 0 */ - set_cpus_allowed(current, cpumask_of_cpu(0)); - BUG_ON(smp_processor_id() != 0); -#endif if (apm_info.realmode_power_off) { (void)apm_save_cpus(); diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 2203a9d20212..9ad43be9a01f 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -151,7 +151,7 @@ static char __devinit *table_lookup_model(struct cpuinfo_x86 *c) } -void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) +static void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) { char *v = c->x86_vendor_id; int i; @@ -435,6 +435,11 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) if (c == &boot_cpu_data) sysenter_setup(); enable_sep_cpu(); + + if (c == &boot_cpu_data) + mtrr_bp_init(); + else + mtrr_ap_init(); } #ifdef CONFIG_X86_HT @@ -608,8 +613,8 @@ void __devinit cpu_init(void) memcpy(thread->tls_array, &per_cpu(cpu_gdt_table, cpu), GDT_ENTRY_TLS_ENTRIES * 8); - __asm__ __volatile__("lgdt %0" : : "m" (cpu_gdt_descr[cpu])); - __asm__ __volatile__("lidt %0" : : "m" (idt_descr)); + load_gdt(&cpu_gdt_descr[cpu]); + load_idt(&idt_descr); /* * Delete NT @@ -637,12 +642,12 @@ void __devinit cpu_init(void) asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); /* Clear all 6 debug registers: */ - -#define CD(register) set_debugreg(0, register) - - CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7); - -#undef CD + set_debugreg(0, 0); + set_debugreg(0, 1); + set_debugreg(0, 2); + set_debugreg(0, 3); + set_debugreg(0, 6); + set_debugreg(0, 7); /* * Force FPU initialization: diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c index 963e17aa205d..822c8ce9d1f1 100644 --- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -31,6 +31,7 @@ #include <linux/cpufreq.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/compiler.h> #include <asm/io.h> #include <asm/delay.h> #include <asm/uaccess.h> @@ -57,6 +58,8 @@ static struct cpufreq_acpi_io *acpi_io_data[NR_CPUS]; static struct cpufreq_driver acpi_cpufreq_driver; +static unsigned int acpi_pstate_strict; + static int acpi_processor_write_port( u16 port, @@ -163,34 +166,44 @@ acpi_processor_set_performance ( } /* - * Then we read the 'status_register' and compare the value with the - * target state's 'status' to make sure the transition was successful. - * Note that we'll poll for up to 1ms (100 cycles of 10us) before - * giving up. + * Assume the write went through when acpi_pstate_strict is not used. + * As read status_register is an expensive operation and there + * are no specific error cases where an IO port write will fail. */ - - port = data->acpi_data.status_register.address; - bit_width = data->acpi_data.status_register.bit_width; - - dprintk("Looking for 0x%08x from port 0x%04x\n", - (u32) data->acpi_data.states[state].status, port); - - for (i=0; i<100; i++) { - ret = acpi_processor_read_port(port, bit_width, &value); - if (ret) { - dprintk("Invalid port width 0x%04x\n", bit_width); - retval = ret; - goto migrate_end; + if (acpi_pstate_strict) { + /* Then we read the 'status_register' and compare the value + * with the target state's 'status' to make sure the + * transition was successful. + * Note that we'll poll for up to 1ms (100 cycles of 10us) + * before giving up. + */ + + port = data->acpi_data.status_register.address; + bit_width = data->acpi_data.status_register.bit_width; + + dprintk("Looking for 0x%08x from port 0x%04x\n", + (u32) data->acpi_data.states[state].status, port); + + for (i=0; i<100; i++) { + ret = acpi_processor_read_port(port, bit_width, &value); + if (ret) { + dprintk("Invalid port width 0x%04x\n", bit_width); + retval = ret; + goto migrate_end; + } + if (value == (u32) data->acpi_data.states[state].status) + break; + udelay(10); } - if (value == (u32) data->acpi_data.states[state].status) - break; - udelay(10); + } else { + i = 0; + value = (u32) data->acpi_data.states[state].status; } /* notify cpufreq */ cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE); - if (value != (u32) data->acpi_data.states[state].status) { + if (unlikely(value != (u32) data->acpi_data.states[state].status)) { unsigned int tmp = cpufreq_freqs.new; cpufreq_freqs.new = cpufreq_freqs.old; cpufreq_freqs.old = tmp; @@ -442,6 +455,13 @@ acpi_cpufreq_cpu_init ( (u32) data->acpi_data.states[i].transition_latency); cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu); + + /* + * the first call to ->target() should result in us actually + * writing something to the appropriate registers. + */ + data->resume = 1; + return (result); err_freqfree: @@ -530,6 +550,8 @@ acpi_cpufreq_exit (void) return; } +module_param(acpi_pstate_strict, uint, 0644); +MODULE_PARM_DESC(acpi_pstate_strict, "value 0 or non-zero. non-zero -> strict ACPI checks are performed during frequency changes."); late_initcall(acpi_cpufreq_init); module_exit(acpi_cpufreq_exit); diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index 04e3563da4fe..8ef38544453c 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -64,8 +64,6 @@ static int dont_scale_voltage; #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) -#define __hlt() __asm__ __volatile__("hlt": : :"memory") - /* Clock ratios multiplied by 10 */ static int clock_ratio[32]; static int eblcr_table[32]; @@ -168,11 +166,9 @@ static void do_powersaver(union msr_longhaul *longhaul, outb(0xFE,0x21); /* TMR0 only */ outb(0xFF,0x80); /* delay */ - local_irq_enable(); - - __hlt(); + safe_halt(); wrmsrl(MSR_VIA_LONGHAUL, longhaul->val); - __hlt(); + halt(); local_irq_disable(); @@ -251,9 +247,7 @@ static void longhaul_setstate(unsigned int clock_ratio_index) bcr2.bits.CLOCKMUL = clock_ratio_index; local_irq_disable(); wrmsrl (MSR_VIA_BCR2, bcr2.val); - local_irq_enable(); - - __hlt(); + safe_halt(); /* Disable software clock multiplier */ rdmsrl (MSR_VIA_BCR2, bcr2.val); @@ -473,11 +467,11 @@ static void __init longhaul_setup_voltagescaling(void) } if (vrmrev==0) { - dprintk ("VRM 8.5 \n"); + dprintk ("VRM 8.5\n"); memcpy (voltage_table, vrm85scales, sizeof(voltage_table)); numvscales = (voltage_table[maxvid]-voltage_table[minvid])/25; } else { - dprintk ("Mobile VRM \n"); + dprintk ("Mobile VRM\n"); memcpy (voltage_table, mobilevrmscales, sizeof(voltage_table)); numvscales = (voltage_table[maxvid]-voltage_table[minvid])/5; } diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index 10cc096c0ade..ab6e0611303d 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -1,5 +1,5 @@ /* - * (c) 2003, 2004 Advanced Micro Devices, Inc. + * (c) 2003, 2004, 2005 Advanced Micro Devices, Inc. * Your use of this code is subject to the terms and conditions of the * GNU general public license version 2. See "COPYING" or * http://www.gnu.org/licenses/gpl.html @@ -44,7 +44,7 @@ #define PFX "powernow-k8: " #define BFX PFX "BIOS error: " -#define VERSION "version 1.40.2" +#define VERSION "version 1.50.3" #include "powernow-k8.h" /* serialize freq changes */ @@ -110,14 +110,13 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data) u32 lo, hi; u32 i = 0; - lo = MSR_S_LO_CHANGE_PENDING; - while (lo & MSR_S_LO_CHANGE_PENDING) { + do { if (i++ > 0x1000000) { printk(KERN_ERR PFX "detected change pending stuck\n"); return 1; } rdmsr(MSR_FIDVID_STATUS, lo, hi); - } + } while (lo & MSR_S_LO_CHANGE_PENDING); data->currvid = hi & MSR_S_HI_CURRENT_VID; data->currfid = lo & MSR_S_LO_CURRENT_FID; @@ -232,7 +231,7 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid) /* * Reduce the vid by the max of step or reqvid. * Decreasing vid codes represent increasing voltages: - * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of 0x1f is off. + * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off. */ static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, u32 step) { @@ -467,7 +466,7 @@ static int check_supported_cpu(unsigned int cpu) eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || ((eax & CPUID_XFAM) != CPUID_XFAM_K8) || - ((eax & CPUID_XMOD) > CPUID_XMOD_REV_E)) { + ((eax & CPUID_XMOD) > CPUID_XMOD_REV_F)) { printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax); goto out; } @@ -696,6 +695,7 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK; data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK; + data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK); data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK; @@ -735,8 +735,16 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) } for (i = 0; i < data->acpi_data.state_count; i++) { - u32 fid = data->acpi_data.states[i].control & FID_MASK; - u32 vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK; + u32 fid; + u32 vid; + + if (data->exttype) { + fid = data->acpi_data.states[i].status & FID_MASK; + vid = (data->acpi_data.states[i].status >> VID_SHIFT) & VID_MASK; + } else { + fid = data->acpi_data.states[i].control & FID_MASK; + vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK; + } dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); @@ -753,7 +761,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) } /* verify voltage is OK - BIOSs are using "off" to indicate invalid */ - if (vid == 0x1f) { + if (vid == VID_OFF) { dprintk("invalid vid %u, ignoring\n", vid); powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; continue; @@ -930,15 +938,6 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi down(&fidvid_sem); - for_each_cpu_mask(i, cpu_core_map[pol->cpu]) { - /* make sure the sibling is initialized */ - if (!powernow_data[i]) { - ret = 0; - up(&fidvid_sem); - goto err_out; - } - } - powernow_k8_acpi_pst_values(data, newstate); if (transition_frequency(data, newstate)) { @@ -978,7 +977,7 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol) { struct powernow_k8_data *data; cpumask_t oldmask = CPU_MASK_ALL; - int rc; + int rc, i; if (!check_supported_cpu(pol->cpu)) return -ENODEV; @@ -1064,7 +1063,9 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol) printk("cpu_init done, current fid 0x%x, vid 0x%x\n", data->currfid, data->currvid); - powernow_data[pol->cpu] = data; + for_each_cpu_mask(i, cpu_core_map[pol->cpu]) { + powernow_data[i] = data; + } return 0; diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h index 9ed5bf221cb7..b1e85bb36396 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h @@ -1,5 +1,5 @@ /* - * (c) 2003, 2004 Advanced Micro Devices, Inc. + * (c) 2003, 2004, 2005 Advanced Micro Devices, Inc. * Your use of this code is subject to the terms and conditions of the * GNU general public license version 2. See "COPYING" or * http://www.gnu.org/licenses/gpl.html @@ -19,6 +19,7 @@ struct powernow_k8_data { u32 vidmvs; /* usable value calculated from mvs */ u32 vstable; /* voltage stabilization time, units 20 us */ u32 plllock; /* pll lock time, units 1 us */ + u32 exttype; /* extended interface = 1 */ /* keep track of the current fid / vid */ u32 currvid, currfid; @@ -41,7 +42,7 @@ struct powernow_k8_data { #define CPUID_XFAM 0x0ff00000 /* extended family */ #define CPUID_XFAM_K8 0 #define CPUID_XMOD 0x000f0000 /* extended model */ -#define CPUID_XMOD_REV_E 0x00020000 +#define CPUID_XMOD_REV_F 0x00040000 #define CPUID_USE_XFAM_XMOD 0x00000f00 #define CPUID_GET_MAX_CAPABILITIES 0x80000000 #define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007 @@ -57,25 +58,26 @@ struct powernow_k8_data { /* Field definitions within the FID VID Low Control MSR : */ #define MSR_C_LO_INIT_FID_VID 0x00010000 -#define MSR_C_LO_NEW_VID 0x00001f00 -#define MSR_C_LO_NEW_FID 0x0000002f +#define MSR_C_LO_NEW_VID 0x00003f00 +#define MSR_C_LO_NEW_FID 0x0000003f #define MSR_C_LO_VID_SHIFT 8 /* Field definitions within the FID VID High Control MSR : */ -#define MSR_C_HI_STP_GNT_TO 0x000fffff +#define MSR_C_HI_STP_GNT_TO 0x000fffff /* Field definitions within the FID VID Low Status MSR : */ -#define MSR_S_LO_CHANGE_PENDING 0x80000000 /* cleared when completed */ -#define MSR_S_LO_MAX_RAMP_VID 0x1f000000 +#define MSR_S_LO_CHANGE_PENDING 0x80000000 /* cleared when completed */ +#define MSR_S_LO_MAX_RAMP_VID 0x3f000000 #define MSR_S_LO_MAX_FID 0x003f0000 #define MSR_S_LO_START_FID 0x00003f00 #define MSR_S_LO_CURRENT_FID 0x0000003f /* Field definitions within the FID VID High Status MSR : */ -#define MSR_S_HI_MAX_WORKING_VID 0x001f0000 -#define MSR_S_HI_START_VID 0x00001f00 -#define MSR_S_HI_CURRENT_VID 0x0000001f -#define MSR_C_HI_STP_GNT_BENIGN 0x00000001 +#define MSR_S_HI_MIN_WORKING_VID 0x3f000000 +#define MSR_S_HI_MAX_WORKING_VID 0x003f0000 +#define MSR_S_HI_START_VID 0x00003f00 +#define MSR_S_HI_CURRENT_VID 0x0000003f +#define MSR_C_HI_STP_GNT_BENIGN 0x00000001 /* * There are restrictions frequencies have to follow: @@ -99,13 +101,15 @@ struct powernow_k8_data { #define MIN_FREQ_RESOLUTION 200 /* fids jump by 2 matching freq jumps by 200 */ #define MAX_FID 0x2a /* Spec only gives FID values as far as 5 GHz */ -#define LEAST_VID 0x1e /* Lowest (numerically highest) useful vid value */ +#define LEAST_VID 0x3e /* Lowest (numerically highest) useful vid value */ #define MIN_FREQ 800 /* Min and max freqs, per spec */ #define MAX_FREQ 5000 #define INVALID_FID_MASK 0xffffffc1 /* not a valid fid if these bits are set */ -#define INVALID_VID_MASK 0xffffffe0 /* not a valid vid if these bits are set */ +#define INVALID_VID_MASK 0xffffffc0 /* not a valid vid if these bits are set */ + +#define VID_OFF 0x3f #define STOP_GRANT_5NS 1 /* min poss memory access latency for voltage change */ @@ -121,12 +125,14 @@ struct powernow_k8_data { #define IRT_SHIFT 30 #define RVO_SHIFT 28 +#define EXT_TYPE_SHIFT 27 #define PLL_L_SHIFT 20 #define MVS_SHIFT 18 #define VST_SHIFT 11 #define VID_SHIFT 6 #define IRT_MASK 3 #define RVO_MASK 3 +#define EXT_TYPE_MASK 1 #define PLL_L_MASK 0x7f #define MVS_MASK 3 #define VST_MASK 0x7f diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c index 7dcbf70fc16f..c397b6220430 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c @@ -259,7 +259,7 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy) if (model->op_points == NULL) { /* Matched a non-match */ - dprintk(KERN_INFO PFX "no table support for CPU model \"%s\": \n", + dprintk(KERN_INFO PFX "no table support for CPU model \"%s\"\n", cpu->x86_model_id); #ifndef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI dprintk(KERN_INFO PFX "try compiling with CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI enabled\n"); @@ -375,7 +375,7 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) arg0.buffer.pointer = (u8 *) arg0_buf; arg0_buf[0] = ACPI_PDC_REVISION_ID; arg0_buf[1] = 1; - arg0_buf[2] = ACPI_PDC_EST_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_MSR; + arg0_buf[2] = ACPI_PDC_EST_CAPABILITY_SMP_MSR; p.pdc = &arg_list; @@ -402,7 +402,7 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) for (i=0; i<p.state_count; i++) { if (p.states[i].control != p.states[i].status) { - dprintk("Different control (%x) and status values (%x)\n", + dprintk("Different control (%llu) and status values (%llu)\n", p.states[i].control, p.states[i].status); result = -EINVAL; goto err_unreg; @@ -415,7 +415,7 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) } if (p.states[i].core_frequency > p.states[0].core_frequency) { - dprintk("P%u has larger frequency (%u) than P0 (%u), skipping\n", i, + dprintk("P%u has larger frequency (%llu) than P0 (%llu), skipping\n", i, p.states[i].core_frequency, p.states[0].core_frequency); p.states[i].core_frequency = 0; continue; @@ -498,13 +498,6 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST)) return -ENODEV; - for (i = 0; i < N_IDS; i++) - if (centrino_verify_cpu_id(cpu, &cpu_ids[i])) - break; - - if (i != N_IDS) - centrino_cpu[policy->cpu] = &cpu_ids[i]; - if (is_const_loops_cpu(policy->cpu)) { centrino_driver.flags |= CPUFREQ_CONST_LOOPS; } @@ -513,6 +506,13 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) if (policy->cpu != 0) return -ENODEV; + for (i = 0; i < N_IDS; i++) + if (centrino_verify_cpu_id(cpu, &cpu_ids[i])) + break; + + if (i != N_IDS) + centrino_cpu[policy->cpu] = &cpu_ids[i]; + if (!centrino_cpu[policy->cpu]) { dprintk(KERN_INFO PFX "found unsupported CPU with " "Enhanced SpeedStep: send /proc/cpuinfo to " diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c index b25fb6b635ae..2718fb6f6aba 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c @@ -99,7 +99,7 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) u32 function = GET_SPEEDSTEP_FREQS; if (!(ist_info.event & 0xFFFF)) { - dprintk("bug #1422 -- can't read freqs from BIOS\n", result); + dprintk("bug #1422 -- can't read freqs from BIOS\n"); return -ENODEV; } diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c index ba4b01138c8f..ff87cc22b323 100644 --- a/arch/i386/kernel/cpu/cyrix.c +++ b/arch/i386/kernel/cpu/cyrix.c @@ -132,11 +132,7 @@ static void __init set_cx86_memwb(void) setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); /* set 'Not Write-through' */ cr0 = 0x20000000; - __asm__("movl %%cr0,%%eax\n\t" - "orl %0,%%eax\n\t" - "movl %%eax,%%cr0\n" - : : "r" (cr0) - :"ax"); + write_cr0(read_cr0() | cr0); /* CCR2 bit 2: lock NW bit and set WT1 */ setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 ); } diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 96a75d045835..43601de0f633 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -25,7 +25,7 @@ extern int trap_init_f00f_bug(void); /* * Alignment at which movsl is preferred for bulk memory copies. */ -struct movsl_mask movsl_mask; +struct movsl_mask movsl_mask __read_mostly; #endif void __devinit early_intel_workaround(struct cpuinfo_x86 *c) @@ -82,16 +82,13 @@ static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c) */ static int __devinit num_cpu_cores(struct cpuinfo_x86 *c) { - unsigned int eax; + unsigned int eax, ebx, ecx, edx; if (c->cpuid_level < 4) return 1; - __asm__("cpuid" - : "=a" (eax) - : "0" (4), "c" (0) - : "bx", "dx"); - + /* Intel has a non-standard dependency on %ecx for this CPUID level. */ + cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); if (eax & 0x1f) return ((eax >> 26) + 1); else diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index 1d768b263269..9e0d5f83cb9f 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -128,7 +128,7 @@ static int __devinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_le cpuid_count(4, index, &eax, &ebx, &ecx, &edx); cache_eax.full = eax; if (cache_eax.split.type == CACHE_TYPE_NULL) - return -1; + return -EIO; /* better error ? */ this_leaf->eax.full = eax; this_leaf->ebx.full = ebx; @@ -305,6 +305,9 @@ static void __devinit cache_shared_cpu_map_setup(unsigned int cpu, int index) { struct _cpuid4_info *this_leaf; unsigned long num_threads_sharing; +#ifdef CONFIG_X86_HT + struct cpuinfo_x86 *c = cpu_data + cpu; +#endif this_leaf = CPUID4_INFO_IDX(cpu, index); num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; @@ -314,10 +317,12 @@ static void __devinit cache_shared_cpu_map_setup(unsigned int cpu, int index) #ifdef CONFIG_X86_HT else if (num_threads_sharing == smp_num_siblings) this_leaf->shared_cpu_map = cpu_sibling_map[cpu]; -#endif + else if (num_threads_sharing == (c->x86_num_cores * smp_num_siblings)) + this_leaf->shared_cpu_map = cpu_core_map[cpu]; else - printk(KERN_INFO "Number of CPUs sharing cache didn't match " + printk(KERN_DEBUG "Number of CPUs sharing cache didn't match " "any known set of CPUs\n"); +#endif } #else static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {} @@ -334,6 +339,7 @@ static int __devinit detect_cache_attributes(unsigned int cpu) struct _cpuid4_info *this_leaf; unsigned long j; int retval; + cpumask_t oldmask; if (num_cache_leaves == 0) return -ENOENT; @@ -345,19 +351,26 @@ static int __devinit detect_cache_attributes(unsigned int cpu) memset(cpuid4_info[cpu], 0, sizeof(struct _cpuid4_info) * num_cache_leaves); + oldmask = current->cpus_allowed; + retval = set_cpus_allowed(current, cpumask_of_cpu(cpu)); + if (retval) + goto out; + /* Do cpuid and store the results */ + retval = 0; for (j = 0; j < num_cache_leaves; j++) { this_leaf = CPUID4_INFO_IDX(cpu, j); retval = cpuid4_cache_lookup(j, this_leaf); if (unlikely(retval < 0)) - goto err_out; + break; cache_shared_cpu_map_setup(cpu, j); } - return 0; + set_cpus_allowed(current, oldmask); -err_out: - free_cache_attributes(cpu); - return -ENOMEM; +out: + if (retval) + free_cache_attributes(cpu); + return retval; } #ifdef CONFIG_SYSFS diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c index 64d91f73a0a4..169ac8e0db68 100644 --- a/arch/i386/kernel/cpu/mtrr/generic.c +++ b/arch/i386/kernel/cpu/mtrr/generic.c @@ -67,13 +67,6 @@ void __init get_mtrr_state(void) mtrr_state.enabled = (lo & 0xc00) >> 10; } -/* Free resources associated with a struct mtrr_state */ -void __init finalize_mtrr_state(void) -{ - kfree(mtrr_state.var_ranges); - mtrr_state.var_ranges = NULL; -} - /* Some BIOS's are fucked and don't set all MTRRs the same! */ void __init mtrr_state_warn(void) { @@ -334,6 +327,9 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base, */ { unsigned long flags; + struct mtrr_var_range *vr; + + vr = &mtrr_state.var_ranges[reg]; local_irq_save(flags); prepare_set(); @@ -342,11 +338,15 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base, /* The invalid bit is kept in the mask, so we simply clear the relevant mask register to disable a range. */ mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0); + memset(vr, 0, sizeof(struct mtrr_var_range)); } else { - mtrr_wrmsr(MTRRphysBase_MSR(reg), base << PAGE_SHIFT | type, - (base & size_and_mask) >> (32 - PAGE_SHIFT)); - mtrr_wrmsr(MTRRphysMask_MSR(reg), -size << PAGE_SHIFT | 0x800, - (-size & size_and_mask) >> (32 - PAGE_SHIFT)); + vr->base_lo = base << PAGE_SHIFT | type; + vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT); + vr->mask_lo = -size << PAGE_SHIFT | 0x800; + vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT); + + mtrr_wrmsr(MTRRphysBase_MSR(reg), vr->base_lo, vr->base_hi); + mtrr_wrmsr(MTRRphysMask_MSR(reg), vr->mask_lo, vr->mask_hi); } post_set(); diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c index d66b09e0c820..dd4ebd6af7e4 100644 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ b/arch/i386/kernel/cpu/mtrr/main.c @@ -332,6 +332,8 @@ int mtrr_add_page(unsigned long base, unsigned long size, error = -EINVAL; + /* No CPU hotplug when we change MTRR entries */ + lock_cpu_hotplug(); /* Search for existing MTRR */ down(&main_lock); for (i = 0; i < num_var_ranges; ++i) { @@ -372,6 +374,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, error = i; out: up(&main_lock); + unlock_cpu_hotplug(); return error; } @@ -461,6 +464,8 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) return -ENXIO; max = num_var_ranges; + /* No CPU hotplug when we change MTRR entries */ + lock_cpu_hotplug(); down(&main_lock); if (reg < 0) { /* Search for existing MTRR */ @@ -501,6 +506,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) error = reg; out: up(&main_lock); + unlock_cpu_hotplug(); return error; } /** @@ -544,21 +550,9 @@ static void __init init_ifs(void) centaur_init_mtrr(); } -static void __init init_other_cpus(void) -{ - if (use_intel()) - get_mtrr_state(); - - /* bring up the other processors */ - set_mtrr(~0U,0,0,0); - - if (use_intel()) { - finalize_mtrr_state(); - mtrr_state_warn(); - } -} - - +/* The suspend/resume methods are only for CPU without MTRR. CPU using generic + * MTRR driver doesn't require this + */ struct mtrr_value { mtrr_type ltype; unsigned long lbase; @@ -567,7 +561,7 @@ struct mtrr_value { static struct mtrr_value * mtrr_state; -static int mtrr_save(struct sys_device * sysdev, u32 state) +static int mtrr_save(struct sys_device * sysdev, pm_message_t state) { int i; int size = num_var_ranges * sizeof(struct mtrr_value); @@ -611,13 +605,13 @@ static struct sysdev_driver mtrr_sysdev_driver = { /** - * mtrr_init - initialize mtrrs on the boot CPU + * mtrr_bp_init - initialize mtrrs on the boot CPU * * This needs to be called early; before any of the other CPUs are * initialized (i.e. before smp_init()). * */ -static int __init mtrr_init(void) +void __init mtrr_bp_init(void) { init_ifs(); @@ -674,12 +668,48 @@ static int __init mtrr_init(void) if (mtrr_if) { set_num_var_ranges(); init_table(); - init_other_cpus(); - - return sysdev_driver_register(&cpu_sysdev_class, - &mtrr_sysdev_driver); + if (use_intel()) + get_mtrr_state(); } - return -ENXIO; } -subsys_initcall(mtrr_init); +void mtrr_ap_init(void) +{ + unsigned long flags; + + if (!mtrr_if || !use_intel()) + return; + /* + * Ideally we should hold main_lock here to avoid mtrr entries changed, + * but this routine will be called in cpu boot time, holding the lock + * breaks it. This routine is called in two cases: 1.very earily time + * of software resume, when there absolutely isn't mtrr entry changes; + * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to + * prevent mtrr entry changes + */ + local_irq_save(flags); + + mtrr_if->set_all(); + + local_irq_restore(flags); +} + +static int __init mtrr_init_finialize(void) +{ + if (!mtrr_if) + return 0; + if (use_intel()) + mtrr_state_warn(); + else { + /* The CPUs haven't MTRR and seemes not support SMP. They have + * specific drivers, we use a tricky method to support + * suspend/resume for them. + * TBD: is there any system with such CPU which supports + * suspend/resume? if no, we should remove the code. + */ + sysdev_driver_register(&cpu_sysdev_class, + &mtrr_sysdev_driver); + } + return 0; +} +subsys_initcall(mtrr_init_finialize); diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h index de1351245599..99c9f2682041 100644 --- a/arch/i386/kernel/cpu/mtrr/mtrr.h +++ b/arch/i386/kernel/cpu/mtrr/mtrr.h @@ -91,7 +91,6 @@ extern struct mtrr_ops * mtrr_if; extern unsigned int num_var_ranges; -void finalize_mtrr_state(void); void mtrr_state_warn(void); char *mtrr_attrib_to_str(int x); void mtrr_wrmsr(unsigned, unsigned, unsigned); diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c index f57e5ee94943..fc426380366b 100644 --- a/arch/i386/kernel/cpu/transmeta.c +++ b/arch/i386/kernel/cpu/transmeta.c @@ -76,6 +76,12 @@ static void __init init_transmeta(struct cpuinfo_x86 *c) #define USER686 (X86_FEATURE_TSC|X86_FEATURE_CX8|X86_FEATURE_CMOV) if ( c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686 ) c->x86 = 6; + +#ifdef CONFIG_SYSCTL + /* randomize_va_space slows us down enormously; + it probably triggers retranslation of x86->native bytecode */ + randomize_va_space = 0; +#endif } static void transmeta_identify(struct cpuinfo_x86 * c) diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index e5fab12f7926..913be77bb844 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -153,7 +153,7 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu) disable_local_APIC(); atomic_dec(&waiting_for_crash_ipi); /* Assume hlt works */ - __asm__("hlt"); + halt(); for(;;); return 1; diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index a3cdf894302b..58516e2ac172 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -6,32 +6,28 @@ #include <linux/bootmem.h> -struct dmi_header { - u8 type; - u8 length; - u16 handle; -}; - -#undef DMI_DEBUG - -#ifdef DMI_DEBUG -#define dmi_printk(x) printk x -#else -#define dmi_printk(x) -#endif - static char * __init dmi_string(struct dmi_header *dm, u8 s) { u8 *bp = ((u8 *) dm) + dm->length; + char *str = ""; - if (!s) - return ""; - s--; - while (s > 0 && *bp) { - bp += strlen(bp) + 1; + if (s) { s--; - } - return bp; + while (s > 0 && *bp) { + bp += strlen(bp) + 1; + s--; + } + + if (*bp != 0) { + str = alloc_bootmem(strlen(bp) + 1); + if (str != NULL) + strcpy(str, bp); + else + printk(KERN_ERR "dmi_string: out of memory.\n"); + } + } + + return str; } /* @@ -84,69 +80,76 @@ static int __init dmi_checksum(u8 *buf) return sum == 0; } -static int __init dmi_iterate(void (*decode)(struct dmi_header *)) +static char *dmi_ident[DMI_STRING_MAX]; +static LIST_HEAD(dmi_devices); + +/* + * Save a DMI string + */ +static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string) { - u8 buf[15]; - char __iomem *p, *q; + char *p, *d = (char*) dm; - /* - * no iounmap() for that ioremap(); it would be a no-op, but it's - * so early in setup that sucker gets confused into doing what - * it shouldn't if we actually call it. - */ - p = ioremap(0xF0000, 0x10000); + if (dmi_ident[slot]) + return; + + p = dmi_string(dm, d[string]); if (p == NULL) - return -1; + return; - for (q = p; q < p + 0x10000; q += 16) { - memcpy_fromio(buf, q, 15); - if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) { - u16 num = (buf[13] << 8) | buf[12]; - u16 len = (buf[7] << 8) | buf[6]; - u32 base = (buf[11] << 24) | (buf[10] << 16) | - (buf[9] << 8) | buf[8]; + dmi_ident[slot] = p; +} - /* - * DMI version 0.0 means that the real version is taken from - * the SMBIOS version, which we don't know at this point. - */ - if (buf[14] != 0) - printk(KERN_INFO "DMI %d.%d present.\n", - buf[14] >> 4, buf[14] & 0xF); - else - printk(KERN_INFO "DMI present.\n"); +static void __init dmi_save_devices(struct dmi_header *dm) +{ + int i, count = (dm->length - sizeof(struct dmi_header)) / 2; + struct dmi_device *dev; + + for (i = 0; i < count; i++) { + char *d = ((char *) dm) + (i * 2); - dmi_printk((KERN_INFO "%d structures occupying %d bytes.\n", - num, len)); - dmi_printk((KERN_INFO "DMI table at 0x%08X.\n", base)); + /* Skip disabled device */ + if ((*d & 0x80) == 0) + continue; - if (dmi_table(base,len, num, decode) == 0) - return 0; + dev = alloc_bootmem(sizeof(*dev)); + if (!dev) { + printk(KERN_ERR "dmi_save_devices: out of memory.\n"); + break; } + + dev->type = *d++ & 0x7f; + dev->name = dmi_string(dm, *d); + dev->device_data = NULL; + + list_add(&dev->list, &dmi_devices); } - return -1; } -static char *dmi_ident[DMI_STRING_MAX]; - -/* - * Save a DMI string - */ -static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string) +static void __init dmi_save_ipmi_device(struct dmi_header *dm) { - char *d = (char*)dm; - char *p = dmi_string(dm, d[string]); + struct dmi_device *dev; + void * data; - if (p == NULL || *p == 0) + data = alloc_bootmem(dm->length); + if (data == NULL) { + printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n"); return; - if (dmi_ident[slot]) + } + + memcpy(data, dm, dm->length); + + dev = alloc_bootmem(sizeof(*dev)); + if (!dev) { + printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n"); return; + } - dmi_ident[slot] = alloc_bootmem(strlen(p) + 1); - if(dmi_ident[slot]) - strcpy(dmi_ident[slot], p); - else - printk(KERN_ERR "dmi_save_ident: out of memory.\n"); + dev->type = DMI_DEV_TYPE_IPMI; + dev->name = "IPMI controller"; + dev->device_data = data; + + list_add(&dev->list, &dmi_devices); } /* @@ -156,42 +159,69 @@ static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string) */ static void __init dmi_decode(struct dmi_header *dm) { - u8 *data __attribute__((__unused__)) = (u8 *)dm; - switch(dm->type) { - case 0: - dmi_printk(("BIOS Vendor: %s\n", dmi_string(dm, data[4]))); + case 0: /* BIOS Information */ dmi_save_ident(dm, DMI_BIOS_VENDOR, 4); - dmi_printk(("BIOS Version: %s\n", dmi_string(dm, data[5]))); dmi_save_ident(dm, DMI_BIOS_VERSION, 5); - dmi_printk(("BIOS Release: %s\n", dmi_string(dm, data[8]))); dmi_save_ident(dm, DMI_BIOS_DATE, 8); break; - case 1: - dmi_printk(("System Vendor: %s\n", dmi_string(dm, data[4]))); + case 1: /* System Information */ dmi_save_ident(dm, DMI_SYS_VENDOR, 4); - dmi_printk(("Product Name: %s\n", dmi_string(dm, data[5]))); dmi_save_ident(dm, DMI_PRODUCT_NAME, 5); - dmi_printk(("Version: %s\n", dmi_string(dm, data[6]))); dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6); - dmi_printk(("Serial Number: %s\n", dmi_string(dm, data[7]))); dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7); break; - case 2: - dmi_printk(("Board Vendor: %s\n", dmi_string(dm, data[4]))); + case 2: /* Base Board Information */ dmi_save_ident(dm, DMI_BOARD_VENDOR, 4); - dmi_printk(("Board Name: %s\n", dmi_string(dm, data[5]))); dmi_save_ident(dm, DMI_BOARD_NAME, 5); - dmi_printk(("Board Version: %s\n", dmi_string(dm, data[6]))); dmi_save_ident(dm, DMI_BOARD_VERSION, 6); break; + case 10: /* Onboard Devices Information */ + dmi_save_devices(dm); + break; + case 38: /* IPMI Device Information */ + dmi_save_ipmi_device(dm); } } void __init dmi_scan_machine(void) { - if (dmi_iterate(dmi_decode)) - printk(KERN_INFO "DMI not present.\n"); + u8 buf[15]; + char __iomem *p, *q; + + /* + * no iounmap() for that ioremap(); it would be a no-op, but it's + * so early in setup that sucker gets confused into doing what + * it shouldn't if we actually call it. + */ + p = ioremap(0xF0000, 0x10000); + if (p == NULL) + goto out; + + for (q = p; q < p + 0x10000; q += 16) { + memcpy_fromio(buf, q, 15); + if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) { + u16 num = (buf[13] << 8) | buf[12]; + u16 len = (buf[7] << 8) | buf[6]; + u32 base = (buf[11] << 24) | (buf[10] << 16) | + (buf[9] << 8) | buf[8]; + + /* + * DMI version 0.0 means that the real version is taken from + * the SMBIOS version, which we don't know at this point. + */ + if (buf[14] != 0) + printk(KERN_INFO "DMI %d.%d present.\n", + buf[14] >> 4, buf[14] & 0xF); + else + printk(KERN_INFO "DMI present.\n"); + + if (dmi_table(base,len, num, dmi_decode) == 0) + return; + } + } + +out: printk(KERN_INFO "DMI not present.\n"); } @@ -218,9 +248,9 @@ int dmi_check_system(struct dmi_system_id *list) /* No match */ goto fail; } + count++; if (d->callback && d->callback(d)) break; - count++; fail: d++; } @@ -240,3 +270,32 @@ char *dmi_get_system_info(int field) return dmi_ident[field]; } EXPORT_SYMBOL(dmi_get_system_info); + +/** + * dmi_find_device - find onboard device by type/name + * @type: device type or %DMI_DEV_TYPE_ANY to match all device types + * @desc: device name string or %NULL to match all + * @from: previous device found in search, or %NULL for new search. + * + * Iterates through the list of known onboard devices. If a device is + * found with a matching @vendor and @device, a pointer to its device + * structure is returned. Otherwise, %NULL is returned. + * A new search is initiated by passing %NULL to the @from argument. + * If @from is not %NULL, searches continue from next device. + */ +struct dmi_device * dmi_find_device(int type, const char *name, + struct dmi_device *from) +{ + struct list_head *d, *head = from ? &from->list : &dmi_devices; + + for(d = head->next; d != &dmi_devices; d = d->next) { + struct dmi_device *dev = list_entry(d, struct dmi_device, list); + + if (((type == DMI_DEV_TYPE_ANY) || (dev->type == type)) && + ((name == NULL) || (strcmp(dev->name, name) == 0))) + return dev; + } + + return NULL; +} +EXPORT_SYMBOL(dmi_find_device); diff --git a/arch/i386/kernel/doublefault.c b/arch/i386/kernel/doublefault.c index 789af3e9fb1f..5edb1d379add 100644 --- a/arch/i386/kernel/doublefault.c +++ b/arch/i386/kernel/doublefault.c @@ -20,7 +20,7 @@ static void doublefault_fn(void) struct Xgt_desc_struct gdt_desc = {0, 0}; unsigned long gdt, tss; - __asm__ __volatile__("sgdt %0": "=m" (gdt_desc): :"memory"); + store_gdt(&gdt_desc); gdt = gdt_desc.address; printk("double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size); diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index 385883ea8c19..ecad519fd395 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c @@ -79,7 +79,7 @@ static void efi_call_phys_prelog(void) * directory. If I have PSE, I just need to duplicate one entry in * page directory. */ - __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4)); + cr4 = read_cr4(); if (cr4 & X86_CR4_PSE) { efi_bak_pg_dir_pointer[0].pgd = @@ -104,8 +104,7 @@ static void efi_call_phys_prelog(void) local_flush_tlb(); cpu_gdt_descr[0].address = __pa(cpu_gdt_descr[0].address); - __asm__ __volatile__("lgdt %0":"=m" - (*(struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0]))); + load_gdt((struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0])); } static void efi_call_phys_epilog(void) @@ -114,8 +113,8 @@ static void efi_call_phys_epilog(void) cpu_gdt_descr[0].address = (unsigned long) __va(cpu_gdt_descr[0].address); - __asm__ __volatile__("lgdt %0":"=m"(cpu_gdt_descr)); - __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4)); + load_gdt(&cpu_gdt_descr[0]); + cr4 = read_cr4(); if (cr4 & X86_CR4_PSE) { swapper_pg_dir[pgd_index(0)].pgd = @@ -233,22 +232,23 @@ void __init efi_map_memmap(void) { memmap.map = NULL; - memmap.map = (efi_memory_desc_t *) - bt_ioremap((unsigned long) memmap.phys_map, - (memmap.nr_map * sizeof(efi_memory_desc_t))); - + memmap.map = bt_ioremap((unsigned long) memmap.phys_map, + (memmap.nr_map * memmap.desc_size)); if (memmap.map == NULL) printk(KERN_ERR PFX "Could not remap the EFI memmap!\n"); + + memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); } #if EFI_DEBUG static void __init print_efi_memmap(void) { efi_memory_desc_t *md; + void *p; int i; - for (i = 0; i < memmap.nr_map; i++) { - md = &memmap.map[i]; + for (p = memmap.map, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) { + md = p; printk(KERN_INFO "mem%02u: type=%u, attr=0x%llx, " "range=[0x%016llx-0x%016llx) (%lluMB)\n", i, md->type, md->attribute, md->phys_addr, @@ -271,10 +271,10 @@ void efi_memmap_walk(efi_freemem_callback_t callback, void *arg) } prev, curr; efi_memory_desc_t *md; unsigned long start, end; - int i; + void *p; - for (i = 0; i < memmap.nr_map; i++) { - md = &memmap.map[i]; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; if ((md->num_pages == 0) || (!is_available_memory(md))) continue; @@ -325,6 +325,7 @@ void __init efi_init(void) memmap.phys_map = EFI_MEMMAP; memmap.nr_map = EFI_MEMMAP_SIZE/EFI_MEMDESC_SIZE; memmap.desc_version = EFI_MEMDESC_VERSION; + memmap.desc_size = EFI_MEMDESC_SIZE; efi.systab = (efi_system_table_t *) boot_ioremap((unsigned long) efi_phys.systab, @@ -428,22 +429,30 @@ void __init efi_init(void) printk(KERN_ERR PFX "Could not map the runtime service table!\n"); /* Map the EFI memory map for use until paging_init() */ - - memmap.map = (efi_memory_desc_t *) - boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE); - + memmap.map = boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE); if (memmap.map == NULL) printk(KERN_ERR PFX "Could not map the EFI memory map!\n"); - if (EFI_MEMDESC_SIZE != sizeof(efi_memory_desc_t)) { - printk(KERN_WARNING PFX "Warning! Kernel-defined memdesc doesn't " - "match the one from EFI!\n"); - } + memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); + #if EFI_DEBUG print_efi_memmap(); #endif } +static inline void __init check_range_for_systab(efi_memory_desc_t *md) +{ + if (((unsigned long)md->phys_addr <= (unsigned long)efi_phys.systab) && + ((unsigned long)efi_phys.systab < md->phys_addr + + ((unsigned long)md->num_pages << EFI_PAGE_SHIFT))) { + unsigned long addr; + + addr = md->virt_addr - md->phys_addr + + (unsigned long)efi_phys.systab; + efi.systab = (efi_system_table_t *)addr; + } +} + /* * This function will switch the EFI runtime services to virtual mode. * Essentially, look through the EFI memmap and map every region that @@ -457,43 +466,32 @@ void __init efi_enter_virtual_mode(void) { efi_memory_desc_t *md; efi_status_t status; - int i; + void *p; efi.systab = NULL; - for (i = 0; i < memmap.nr_map; i++) { - md = &memmap.map[i]; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; - if (md->attribute & EFI_MEMORY_RUNTIME) { - md->virt_addr = - (unsigned long)ioremap(md->phys_addr, - md->num_pages << EFI_PAGE_SHIFT); - if (!(unsigned long)md->virt_addr) { - printk(KERN_ERR PFX "ioremap of 0x%lX failed\n", - (unsigned long)md->phys_addr); - } + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; - if (((unsigned long)md->phys_addr <= - (unsigned long)efi_phys.systab) && - ((unsigned long)efi_phys.systab < - md->phys_addr + - ((unsigned long)md->num_pages << - EFI_PAGE_SHIFT))) { - unsigned long addr; - - addr = md->virt_addr - md->phys_addr + - (unsigned long)efi_phys.systab; - efi.systab = (efi_system_table_t *)addr; - } + md->virt_addr = (unsigned long)ioremap(md->phys_addr, + md->num_pages << EFI_PAGE_SHIFT); + if (!(unsigned long)md->virt_addr) { + printk(KERN_ERR PFX "ioremap of 0x%lX failed\n", + (unsigned long)md->phys_addr); } + /* update the virtual address of the EFI system table */ + check_range_for_systab(md); } if (!efi.systab) BUG(); status = phys_efi_set_virtual_address_map( - sizeof(efi_memory_desc_t) * memmap.nr_map, - sizeof(efi_memory_desc_t), + memmap.desc_size * memmap.nr_map, + memmap.desc_size, memmap.desc_version, memmap.phys_map); @@ -533,10 +531,10 @@ efi_initialize_iomem_resources(struct resource *code_resource, { struct resource *res; efi_memory_desc_t *md; - int i; + void *p; - for (i = 0; i < memmap.nr_map; i++) { - md = &memmap.map[i]; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) > 0x100000000ULL) @@ -613,10 +611,10 @@ efi_initialize_iomem_resources(struct resource *code_resource, u32 efi_mem_type(unsigned long phys_addr) { efi_memory_desc_t *md; - int i; + void *p; - for (i = 0; i < memmap.nr_map; i++) { - md = &memmap.map[i]; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; if ((md->phys_addr <= phys_addr) && (phys_addr < (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) return md->type; @@ -627,10 +625,10 @@ u32 efi_mem_type(unsigned long phys_addr) u64 efi_mem_attributes(unsigned long phys_addr) { efi_memory_desc_t *md; - int i; + void *p; - for (i = 0; i < memmap.nr_map; i++) { - md = &memmap.map[i]; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; if ((md->phys_addr <= phys_addr) && (phys_addr < (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) return md->attribute; diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index a991d4e5edd2..9e24f7b207ee 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -203,7 +203,7 @@ sysenter_past_esp: GET_THREAD_INFO(%ebp) /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ - testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp) + testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) jnz syscall_trace_entry cmpl $(nr_syscalls), %eax jae syscall_badsys @@ -226,9 +226,9 @@ ENTRY(system_call) pushl %eax # save orig_eax SAVE_ALL GET_THREAD_INFO(%ebp) - # system call tracing in operation + # system call tracing in operation / emulation /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ - testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp) + testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) jnz syscall_trace_entry cmpl $(nr_syscalls), %eax jae syscall_badsys @@ -319,7 +319,7 @@ work_notifysig: # deal with pending signals and # vm86-space xorl %edx, %edx call do_notify_resume - jmp restore_all + jmp resume_userspace ALIGN work_notifysig_v86: @@ -329,7 +329,7 @@ work_notifysig_v86: movl %eax, %esp xorl %edx, %edx call do_notify_resume - jmp restore_all + jmp resume_userspace # perform syscall exit tracing ALIGN @@ -338,6 +338,9 @@ syscall_trace_entry: movl %esp, %eax xorl %edx,%edx call do_syscall_trace + cmpl $0, %eax + jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU, + # so must skip actual syscall movl ORIG_EAX(%esp), %eax cmpl $(nr_syscalls), %eax jnae syscall_call @@ -504,7 +507,7 @@ label: \ pushl $__KERNEL_CS; \ pushl $sysenter_past_esp -ENTRY(debug) +KPROBE_ENTRY(debug) cmpl $sysenter_entry,(%esp) jne debug_stack_correct FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) @@ -515,7 +518,7 @@ debug_stack_correct: movl %esp,%eax # pt_regs pointer call do_debug jmp ret_from_exception - + .previous .text /* * NMI is doubly nasty. It can happen _while_ we're handling * a debug fault, and the debug fault hasn't yet been able to @@ -588,13 +591,14 @@ nmi_16bit_stack: .long 1b,iret_exc .previous -ENTRY(int3) +KPROBE_ENTRY(int3) pushl $-1 # mark this as an int SAVE_ALL xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_int3 jmp ret_from_exception + .previous .text ENTRY(overflow) pushl $0 @@ -628,17 +632,19 @@ ENTRY(stack_segment) pushl $do_stack_segment jmp error_code -ENTRY(general_protection) +KPROBE_ENTRY(general_protection) pushl $do_general_protection jmp error_code + .previous .text ENTRY(alignment_check) pushl $do_alignment_check jmp error_code -ENTRY(page_fault) +KPROBE_ENTRY(page_fault) pushl $do_page_fault jmp error_code + .previous .text #ifdef CONFIG_X86_MCE ENTRY(machine_check) diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index 4477bb107098..e437fb367498 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -17,7 +17,7 @@ #include <asm/desc.h> #include <asm/cache.h> #include <asm/thread_info.h> -#include <asm/asm_offsets.h> +#include <asm/asm-offsets.h> #include <asm/setup.h> /* @@ -77,6 +77,32 @@ ENTRY(startup_32) subl %edi,%ecx shrl $2,%ecx rep ; stosl +/* + * Copy bootup parameters out of the way. + * Note: %esi still has the pointer to the real-mode data. + * With the kexec as boot loader, parameter segment might be loaded beyond + * kernel image and might not even be addressable by early boot page tables. + * (kexec on panic case). Hence copy out the parameters before initializing + * page tables. + */ + movl $(boot_params - __PAGE_OFFSET),%edi + movl $(PARAM_SIZE/4),%ecx + cld + rep + movsl + movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi + andl %esi,%esi + jnz 2f # New command line protocol + cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR + jne 1f + movzwl OLD_CL_OFFSET,%esi + addl $(OLD_CL_BASE_ADDR),%esi +2: + movl $(saved_command_line - __PAGE_OFFSET),%edi + movl $(COMMAND_LINE_SIZE/4),%ecx + rep + movsl +1: /* * Initialize page tables. This creates a PDE and a set of page @@ -214,28 +240,6 @@ ENTRY(startup_32_smp) */ call setup_idt -/* - * Copy bootup parameters out of the way. - * Note: %esi still has the pointer to the real-mode data. - */ - movl $boot_params,%edi - movl $(PARAM_SIZE/4),%ecx - cld - rep - movsl - movl boot_params+NEW_CL_POINTER,%esi - andl %esi,%esi - jnz 2f # New command line protocol - cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR - jne 1f - movzwl OLD_CL_OFFSET,%esi - addl $(OLD_CL_BASE_ADDR),%esi -2: - movl $saved_command_line,%edi - movl $(COMMAND_LINE_SIZE/4),%ecx - rep - movsl -1: checkCPUtype: movl $-1,X86_CPUID # -1 for no CPUID initially diff --git a/arch/i386/kernel/i387.c b/arch/i386/kernel/i387.c index b817168d9c62..d75524758daf 100644 --- a/arch/i386/kernel/i387.c +++ b/arch/i386/kernel/i387.c @@ -82,17 +82,6 @@ void kernel_fpu_begin(void) } EXPORT_SYMBOL_GPL(kernel_fpu_begin); -void restore_fpu( struct task_struct *tsk ) -{ - if ( cpu_has_fxsr ) { - asm volatile( "fxrstor %0" - : : "m" (tsk->thread.i387.fxsave) ); - } else { - asm volatile( "frstor %0" - : : "m" (tsk->thread.i387.fsave) ); - } -} - /* * FPU tag word conversions. */ diff --git a/arch/i386/kernel/i8237.c b/arch/i386/kernel/i8237.c new file mode 100644 index 000000000000..c36d1c006c2f --- /dev/null +++ b/arch/i386/kernel/i8237.c @@ -0,0 +1,67 @@ +/* + * i8237.c: 8237A DMA controller suspend functions. + * + * Written by Pierre Ossman, 2005. + */ + +#include <linux/init.h> +#include <linux/sysdev.h> + +#include <asm/dma.h> + +/* + * This module just handles suspend/resume issues with the + * 8237A DMA controller (used for ISA and LPC). + * Allocation is handled in kernel/dma.c and normal usage is + * in asm/dma.h. + */ + +static int i8237A_resume(struct sys_device *dev) +{ + unsigned long flags; + int i; + + flags = claim_dma_lock(); + + dma_outb(DMA1_RESET_REG, 0); + dma_outb(DMA2_RESET_REG, 0); + + for (i = 0;i < 8;i++) { + set_dma_addr(i, 0x000000); + /* DMA count is a bit weird so this is not 0 */ + set_dma_count(i, 1); + } + + /* Enable cascade DMA or channel 0-3 won't work */ + enable_dma(4); + + release_dma_lock(flags); + + return 0; +} + +static int i8237A_suspend(struct sys_device *dev, pm_message_t state) +{ + return 0; +} + +static struct sysdev_class i8237_sysdev_class = { + set_kset_name("i8237"), + .suspend = i8237A_suspend, + .resume = i8237A_resume, +}; + +static struct sys_device device_i8237A = { + .id = 0, + .cls = &i8237_sysdev_class, +}; + +static int __init i8237A_init_sysfs(void) +{ + int error = sysdev_class_register(&i8237_sysdev_class); + if (!error) + error = sysdev_register(&device_i8237A); + return error; +} + +device_initcall(i8237A_init_sysfs); diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 6578f40bd501..378313b0cce9 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -33,6 +33,7 @@ #include <linux/acpi.h> #include <linux/module.h> #include <linux/sysdev.h> + #include <asm/io.h> #include <asm/smp.h> #include <asm/desc.h> @@ -59,6 +60,8 @@ int sis_apic_bug = -1; */ int nr_ioapic_registers[MAX_IO_APICS]; +int disable_timer_pin_1 __initdata; + /* * Rough estimation of how many shared IRQs there are, can * be changed anytime. @@ -77,7 +80,7 @@ static struct irq_pin_list { int apic, pin, next; } irq_2_pin[PIN_MAP_SIZE]; -int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1}; +int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; #ifdef CONFIG_PCI_MSI #define vector_to_irq(vector) \ (platform_legacy_irq(vector) ? vector : vector_irq[vector]) @@ -222,13 +225,21 @@ static void clear_IO_APIC (void) clear_IO_APIC_pin(apic, pin); } +#ifdef CONFIG_SMP static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) { unsigned long flags; int pin; struct irq_pin_list *entry = irq_2_pin + irq; unsigned int apicid_value; + cpumask_t tmp; + cpus_and(tmp, cpumask, cpu_online_map); + if (cpus_empty(tmp)) + tmp = TARGET_CPUS; + + cpus_and(cpumask, tmp, CPU_MASK_ALL); + apicid_value = cpu_mask_to_apicid(cpumask); /* Prepare to do the io_apic_write */ apicid_value = apicid_value << 24; @@ -242,6 +253,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) break; entry = irq_2_pin + entry->next; } + set_irq_info(irq, cpumask); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -259,7 +271,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) # define Dprintk(x...) # endif -cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS]; #define IRQBALANCE_CHECK_ARCH -999 static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH; @@ -328,12 +339,7 @@ static inline void balance_irq(int cpu, int irq) cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]); new_cpu = move(cpu, allowed_mask, now, 1); if (cpu != new_cpu) { - irq_desc_t *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu); - spin_unlock_irqrestore(&desc->lock, flags); + set_pending_irq(irq, cpumask_of_cpu(new_cpu)); } } @@ -528,16 +534,12 @@ tryanotherirq: cpus_and(tmp, target_cpu_mask, allowed_mask); if (!cpus_empty(tmp)) { - irq_desc_t *desc = irq_desc + selected_irq; - unsigned long flags; Dprintk("irq = %d moved to cpu = %d\n", selected_irq, min_loaded); /* mark for change destination */ - spin_lock_irqsave(&desc->lock, flags); - pending_irq_balance_cpumask[selected_irq] = - cpumask_of_cpu(min_loaded); - spin_unlock_irqrestore(&desc->lock, flags); + set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); + /* Since we made a change, come back sooner to * check for more variation. */ @@ -568,12 +570,12 @@ static int balanced_irq(void *unused) /* push everything to CPU 0 to give us a starting point. */ for (i = 0 ; i < NR_IRQS ; i++) { - pending_irq_balance_cpumask[i] = cpumask_of_cpu(0); + pending_irq_cpumask[i] = cpumask_of_cpu(0); + set_pending_irq(i, cpumask_of_cpu(0)); } for ( ; ; ) { - set_current_state(TASK_INTERRUPTIBLE); - time_remaining = schedule_timeout(time_remaining); + time_remaining = schedule_timeout_interruptible(time_remaining); try_to_freeze(); if (time_after(jiffies, prev_balance_time+balanced_irq_interval)) { @@ -647,20 +649,9 @@ int __init irqbalance_disable(char *str) __setup("noirqbalance", irqbalance_disable); -static inline void move_irq(int irq) -{ - /* note - we hold the desc->lock */ - if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) { - set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]); - cpus_clear(pending_irq_balance_cpumask[irq]); - } -} - late_initcall(balanced_irq_init); - -#else /* !CONFIG_IRQBALANCE */ -static inline void move_irq(int irq) { } #endif /* CONFIG_IRQBALANCE */ +#endif /* CONFIG_SMP */ #ifndef CONFIG_SMP void fastcall send_IPI_self(int vector) @@ -820,6 +811,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); * we need to reprogram the ioredtbls to cater for the cpus which have come online * so mask in all cases should simply be TARGET_CPUS */ +#ifdef CONFIG_SMP void __init setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; @@ -838,6 +830,7 @@ void __init setup_ioapic_dest(void) } } +#endif /* * EISA Edge/Level control register, ELCR @@ -1127,7 +1120,7 @@ static inline int IO_APIC_irq_trigger(int irq) } /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ -u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 }; +u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; int assign_irq_vector(int irq) { @@ -1249,6 +1242,7 @@ static void __init setup_IO_APIC_irqs(void) spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); + set_native_irq_info(irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); } } @@ -1641,9 +1635,9 @@ void disable_IO_APIC(void) clear_IO_APIC(); /* - * If the i82559 is routed through an IOAPIC + * If the i8259 is routed through an IOAPIC * Put that IOAPIC in virtual wire mode - * so legacy interrups can be delivered. + * so legacy interrupts can be delivered. */ pin = find_isa_irq_pin(0, mp_ExtINT); if (pin != -1) { @@ -1944,6 +1938,7 @@ static void ack_edge_ioapic_vector(unsigned int vector) { int irq = vector_to_irq(vector); + move_irq(vector); ack_edge_ioapic_irq(irq); } @@ -1958,6 +1953,7 @@ static void end_level_ioapic_vector (unsigned int vector) { int irq = vector_to_irq(vector); + move_irq(vector); end_level_ioapic_irq(irq); } @@ -1975,14 +1971,17 @@ static void unmask_IO_APIC_vector (unsigned int vector) unmask_IO_APIC_irq(irq); } +#ifdef CONFIG_SMP static void set_ioapic_affinity_vector (unsigned int vector, cpumask_t cpu_mask) { int irq = vector_to_irq(vector); + set_native_irq_info(vector, cpu_mask); set_ioapic_affinity_irq(irq, cpu_mask); } #endif +#endif /* * Level and edge triggered IO-APIC interrupts need different handling, @@ -1992,7 +1991,7 @@ static void set_ioapic_affinity_vector (unsigned int vector, * edge-triggered handler, without risking IRQ storms and other ugly * races. */ -static struct hw_interrupt_type ioapic_edge_type = { +static struct hw_interrupt_type ioapic_edge_type __read_mostly = { .typename = "IO-APIC-edge", .startup = startup_edge_ioapic, .shutdown = shutdown_edge_ioapic, @@ -2000,10 +1999,12 @@ static struct hw_interrupt_type ioapic_edge_type = { .disable = disable_edge_ioapic, .ack = ack_edge_ioapic, .end = end_edge_ioapic, +#ifdef CONFIG_SMP .set_affinity = set_ioapic_affinity, +#endif }; -static struct hw_interrupt_type ioapic_level_type = { +static struct hw_interrupt_type ioapic_level_type __read_mostly = { .typename = "IO-APIC-level", .startup = startup_level_ioapic, .shutdown = shutdown_level_ioapic, @@ -2011,7 +2012,9 @@ static struct hw_interrupt_type ioapic_level_type = { .disable = disable_level_ioapic, .ack = mask_and_ack_level_ioapic, .end = end_level_ioapic, +#ifdef CONFIG_SMP .set_affinity = set_ioapic_affinity, +#endif }; static inline void init_IO_APIC_traps(void) @@ -2074,7 +2077,7 @@ static void ack_lapic_irq (unsigned int irq) static void end_lapic_irq (unsigned int i) { /* nothing */ } -static struct hw_interrupt_type lapic_irq_type = { +static struct hw_interrupt_type lapic_irq_type __read_mostly = { .typename = "local-APIC-edge", .startup = NULL, /* startup_irq() not used for IRQ0 */ .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ @@ -2210,6 +2213,8 @@ static inline void check_timer(void) setup_nmi(); enable_8259A_irq(0); } + if (disable_timer_pin_1 > 0) + clear_IO_APIC_pin(0, pin1); return; } clear_IO_APIC_pin(0, pin1); @@ -2421,7 +2426,7 @@ device_initcall(ioapic_init_sysfs); ACPI-based IOAPIC Configuration -------------------------------------------------------------------------- */ -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI int __init io_apic_get_unique_id (int ioapic, int apic_id) { @@ -2569,9 +2574,10 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); + set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); return 0; } -#endif /*CONFIG_ACPI_BOOT*/ +#endif /* CONFIG_ACPI */ diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c index 8b25160393c1..f2b37654777f 100644 --- a/arch/i386/kernel/ioport.c +++ b/arch/i386/kernel/ioport.c @@ -132,6 +132,7 @@ asmlinkage long sys_iopl(unsigned long unused) volatile struct pt_regs * regs = (struct pt_regs *) &unused; unsigned int level = regs->ebx; unsigned int old = (regs->eflags >> 12) & 3; + struct thread_struct *t = ¤t->thread; if (level > 3) return -EINVAL; @@ -140,8 +141,8 @@ asmlinkage long sys_iopl(unsigned long unused) if (!capable(CAP_SYS_RAWIO)) return -EPERM; } - regs->eflags = (regs->eflags &~ 0x3000UL) | (level << 12); - /* Make sure we return the long way (not sysenter) */ - set_thread_flag(TIF_IRET); + t->iopl = level << 12; + regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl; + set_iopl_mask(t->iopl); return 0; } diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index a6d8c45961d3..6345b430b105 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -62,32 +62,32 @@ static inline int is_IF_modifier(kprobe_opcode_t opcode) return 0; } -int arch_prepare_kprobe(struct kprobe *p) +int __kprobes arch_prepare_kprobe(struct kprobe *p) { return 0; } -void arch_copy_kprobe(struct kprobe *p) +void __kprobes arch_copy_kprobe(struct kprobe *p) { memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); p->opcode = *p->addr; } -void arch_arm_kprobe(struct kprobe *p) +void __kprobes arch_arm_kprobe(struct kprobe *p) { *p->addr = BREAKPOINT_INSTRUCTION; flush_icache_range((unsigned long) p->addr, (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -void arch_disarm_kprobe(struct kprobe *p) +void __kprobes arch_disarm_kprobe(struct kprobe *p) { *p->addr = p->opcode; flush_icache_range((unsigned long) p->addr, (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -void arch_remove_kprobe(struct kprobe *p) +void __kprobes arch_remove_kprobe(struct kprobe *p) { } @@ -127,7 +127,8 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->eip = (unsigned long)&p->ainsn.insn; } -void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) +void __kprobes arch_prepare_kretprobe(struct kretprobe *rp, + struct pt_regs *regs) { unsigned long *sara = (unsigned long *)®s->esp; struct kretprobe_instance *ri; @@ -150,7 +151,7 @@ void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled thorough out this function. */ -static int kprobe_handler(struct pt_regs *regs) +static int __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p; int ret = 0; @@ -176,7 +177,8 @@ static int kprobe_handler(struct pt_regs *regs) Disarm the probe we just hit, and ignore it. */ p = get_kprobe(addr); if (p) { - if (kprobe_status == KPROBE_HIT_SS) { + if (kprobe_status == KPROBE_HIT_SS && + *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { regs->eflags &= ~TF_MASK; regs->eflags |= kprobe_saved_eflags; unlock_kprobes(); @@ -220,7 +222,10 @@ static int kprobe_handler(struct pt_regs *regs) * either a probepoint or a debugger breakpoint * at this address. In either case, no further * handling of this interrupt is appropriate. + * Back up over the (now missing) int3 and run + * the original instruction. */ + regs->eip -= sizeof(kprobe_opcode_t); ret = 1; } /* Not one of ours: let kernel handle it */ @@ -259,7 +264,7 @@ no_kprobe: /* * Called when we hit the probe point at kretprobe_trampoline */ -int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head; @@ -338,7 +343,7 @@ int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * that is atop the stack is the address following the copied instruction. * We need to make it the address following the original instruction. */ -static void resume_execution(struct kprobe *p, struct pt_regs *regs) +static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) { unsigned long *tos = (unsigned long *)®s->esp; unsigned long next_eip = 0; @@ -444,8 +449,8 @@ static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) /* * Wrapper routine to for handling exceptions. */ -int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, - void *data) +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) { struct die_args *args = (struct die_args *)data; switch (val) { @@ -473,7 +478,7 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, return NOTIFY_DONE; } -int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct jprobe *jp = container_of(p, struct jprobe, kp); unsigned long addr; @@ -495,7 +500,7 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) return 1; } -void jprobe_return(void) +void __kprobes jprobe_return(void) { preempt_enable_no_resched(); asm volatile (" xchgl %%ebx,%%esp \n" @@ -506,7 +511,7 @@ void jprobe_return(void) (jprobe_saved_esp):"memory"); } -int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { u8 *addr = (u8 *) (regs->eip - 1); unsigned long stack_addr = (unsigned long)jprobe_saved_esp; diff --git a/arch/i386/kernel/ldt.c b/arch/i386/kernel/ldt.c index bb50afbee921..fe1ffa55587d 100644 --- a/arch/i386/kernel/ldt.c +++ b/arch/i386/kernel/ldt.c @@ -177,7 +177,7 @@ static int read_default_ldt(void __user * ptr, unsigned long bytecount) static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) { struct mm_struct * mm = current->mm; - __u32 entry_1, entry_2, *lp; + __u32 entry_1, entry_2; int error; struct user_desc ldt_info; @@ -205,8 +205,6 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) goto out_unlock; } - lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt); - /* Allow LDTs to be cleared by the user. */ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { if (oldmode || LDT_empty(&ldt_info)) { @@ -223,8 +221,7 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) /* Install the new entry ... */ install: - *lp = entry_1; - *(lp+1) = entry_2; + write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1, entry_2); error = 0; out_unlock: diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c index 52ed18d8b511..a912fed48482 100644 --- a/arch/i386/kernel/machine_kexec.c +++ b/arch/i386/kernel/machine_kexec.c @@ -16,13 +16,8 @@ #include <asm/io.h> #include <asm/apic.h> #include <asm/cpufeature.h> - -static inline unsigned long read_cr3(void) -{ - unsigned long cr3; - asm volatile("movl %%cr3,%0": "=r"(cr3)); - return cr3; -} +#include <asm/desc.h> +#include <asm/system.h> #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) @@ -90,34 +85,27 @@ static void identity_map_page(unsigned long address) } #endif - static void set_idt(void *newidt, __u16 limit) { - unsigned char curidt[6]; + struct Xgt_desc_struct curidt; /* ia32 supports unaliged loads & stores */ - (*(__u16 *)(curidt)) = limit; - (*(__u32 *)(curidt +2)) = (unsigned long)(newidt); + curidt.size = limit; + curidt.address = (unsigned long)newidt; - __asm__ __volatile__ ( - "lidt %0\n" - : "=m" (curidt) - ); + load_idt(&curidt); }; static void set_gdt(void *newgdt, __u16 limit) { - unsigned char curgdt[6]; + struct Xgt_desc_struct curgdt; /* ia32 supports unaligned loads & stores */ - (*(__u16 *)(curgdt)) = limit; - (*(__u32 *)(curgdt +2)) = (unsigned long)(newgdt); + curgdt.size = limit; + curgdt.address = (unsigned long)newgdt; - __asm__ __volatile__ ( - "lgdt %0\n" - : "=m" (curgdt) - ); + load_gdt(&curgdt); }; static void load_segments(void) diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index a77c612aad00..165f13158c60 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c @@ -164,7 +164,8 @@ static void collect_cpu_info (void *unused) } wrmsr(MSR_IA32_UCODE_REV, 0, 0); - __asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx"); + /* see notes above for revision 1.07. Apparent chip bug */ + serialize_cpu(); /* get the current revision from MSR 0x8B */ rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev); pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", @@ -377,7 +378,9 @@ static void do_update_one (void * unused) (unsigned long) uci->mc->bits >> 16 >> 16); wrmsr(MSR_IA32_UCODE_REV, 0, 0); - __asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx"); + /* see notes above for revision 1.07. Apparent chip bug */ + serialize_cpu(); + /* get the current revision from MSR 0x8B */ rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index af917f609c7d..15949fd08109 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -65,6 +65,8 @@ int nr_ioapics; int pic_mode; unsigned long mp_lapic_addr; +unsigned int def_to_bigsmp = 0; + /* Processor that is doing the boot up */ unsigned int boot_cpu_physical_apicid = -1U; /* Internal processor count */ @@ -121,7 +123,7 @@ static int MP_valid_apicid(int apicid, int version) static void __init MP_processor_info (struct mpc_config_processor *m) { int ver, apicid; - physid_mask_t tmp; + physid_mask_t phys_cpu; if (!(m->mpc_cpuflag & CPU_ENABLED)) return; @@ -192,27 +194,36 @@ static void __init MP_processor_info (struct mpc_config_processor *m) " Processor ignored.\n", maxcpus); return; } - num_processors++; ver = m->mpc_apicver; if (!MP_valid_apicid(apicid, ver)) { printk(KERN_WARNING "Processor #%d INVALID. (Max ID: %d).\n", m->mpc_apicid, MAX_APICS); - --num_processors; return; } - tmp = apicid_to_cpu_present(apicid); - physids_or(phys_cpu_present_map, phys_cpu_present_map, tmp); - + cpu_set(num_processors, cpu_possible_map); + num_processors++; + phys_cpu = apicid_to_cpu_present(apicid); + physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu); + /* * Validate version */ if (ver == 0x0) { - printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid); + printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " + "fixing up to 0x10. (tell your hw vendor)\n", + m->mpc_apicid); ver = 0x10; } apic_version[m->mpc_apicid] = ver; + if ((num_processors > 8) && + APIC_XAPIC(ver) && + (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) + def_to_bigsmp = 1; + else + def_to_bigsmp = 0; + bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; } @@ -653,8 +664,6 @@ void __init get_smp_config (void) struct intel_mp_floating *mpf = mpf_found; /* - * ACPI may be used to obtain the entire SMP configuration or just to - * enumerate/configure processors (CONFIG_ACPI_BOOT). Note that * ACPI supports both logical (e.g. Hyper-Threading) and physical * processors, where MPS only supports physical. */ @@ -810,7 +819,7 @@ void __init find_smp_config (void) ACPI-based MP Configuration -------------------------------------------------------------------------- */ -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI void __init mp_register_lapic_address ( u64 address) @@ -856,7 +865,7 @@ void __init mp_register_lapic ( MP_processor_info(&processor); } -#if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_ACPI_INTERPRETER) || defined(CONFIG_ACPI_BOOT)) +#ifdef CONFIG_X86_IO_APIC #define MP_ISA_BUS 0 #define MP_MAX_IOAPIC_PIN 127 @@ -1071,11 +1080,9 @@ int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) */ static int gsi_to_irq[MAX_GSI_NUM]; -#ifdef CONFIG_ACPI_BUS /* Don't set up the ACPI SCI because it's already set up */ if (acpi_fadt.sci_int == gsi) return gsi; -#endif ioapic = mp_find_ioapic(gsi); if (ioapic < 0) { @@ -1116,7 +1123,13 @@ int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) */ int irq = gsi; if (gsi < MAX_GSI_NUM) { - gsi = pci_irq++; + if (gsi > 15) + gsi = pci_irq++; + /* + * Don't assign IRQ used by ACPI SCI + */ + if (gsi == acpi_fadt.sci_int) + gsi = pci_irq++; gsi_to_irq[irq] = gsi; } else { printk(KERN_ERR "GSI %u is too high\n", gsi); @@ -1130,5 +1143,5 @@ int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) return gsi; } -#endif /*CONFIG_X86_IO_APIC && (CONFIG_ACPI_INTERPRETER || CONFIG_ACPI_BOOT)*/ -#endif /*CONFIG_ACPI_BOOT*/ +#endif /* CONFIG_X86_IO_APIC */ +#endif /* CONFIG_ACPI */ diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c index b2f03c39a6fe..03100d6fc5d6 100644 --- a/arch/i386/kernel/msr.c +++ b/arch/i386/kernel/msr.c @@ -46,23 +46,13 @@ static struct class *msr_class; -/* Note: "err" is handled in a funny way below. Otherwise one version - of gcc or another breaks. */ - static inline int wrmsr_eio(u32 reg, u32 eax, u32 edx) { int err; - asm volatile ("1: wrmsr\n" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl %4,%0\n" - " jmp 2b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" " .long 1b,3b\n" ".previous":"=&bDS" (err) - :"a"(eax), "d"(edx), "c"(reg), "i"(-EIO), "0"(0)); - + err = wrmsr_safe(reg, eax, edx); + if (err) + err = -EIO; return err; } @@ -70,18 +60,9 @@ static inline int rdmsr_eio(u32 reg, u32 *eax, u32 *edx) { int err; - asm volatile ("1: rdmsr\n" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl %4,%0\n" - " jmp 2b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 1b,3b\n" - ".previous":"=&bDS" (err), "=a"(*eax), "=d"(*edx) - :"c"(reg), "i"(-EIO), "0"(0)); - + err = rdmsr_safe(reg, eax, edx); + if (err) + err = -EIO; return err; } diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index da6c46d667cb..0178457db721 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -195,7 +195,7 @@ static void disable_lapic_nmi_watchdog(void) wrmsr(MSR_P6_EVNTSEL0, 0, 0); break; case 15: - if (boot_cpu_data.x86_model > 0x3) + if (boot_cpu_data.x86_model > 0x4) break; wrmsr(MSR_P4_IQ_CCCR0, 0, 0); @@ -432,7 +432,7 @@ void setup_apic_nmi_watchdog (void) setup_p6_watchdog(); break; case 15: - if (boot_cpu_data.x86_model > 0x3) + if (boot_cpu_data.x86_model > 0x4) return; if (!setup_p4_watchdog()) @@ -478,6 +478,11 @@ void touch_nmi_watchdog (void) */ for (i = 0; i < NR_CPUS; i++) alert_counter[i] = 0; + + /* + * Tickle the softlockup detector too: + */ + touch_softlockup_watchdog(); } extern void die_nmi(struct pt_regs *, const char *msg); @@ -501,8 +506,11 @@ void nmi_watchdog_tick (struct pt_regs * regs) */ alert_counter[cpu]++; if (alert_counter[cpu] == 5*nmi_hz) + /* + * die_nmi will return ONLY if NOTIFY_STOP happens.. + */ die_nmi(regs, "NMI Watchdog detected LOCKUP"); - } else { + last_irq_sums[cpu] = sum; alert_counter[cpu] = 0; } diff --git a/arch/i386/kernel/numaq.c b/arch/i386/kernel/numaq.c index e51edf0a6564..5f5b075f860a 100644 --- a/arch/i386/kernel/numaq.c +++ b/arch/i386/kernel/numaq.c @@ -31,6 +31,7 @@ #include <linux/nodemask.h> #include <asm/numaq.h> #include <asm/topology.h> +#include <asm/processor.h> #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) @@ -77,3 +78,11 @@ int __init get_memcfg_numaq(void) smp_dump_qct(); return 1; } + +static int __init numaq_dsc_disable(void) +{ + printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); + tsc_disable = 1; + return 0; +} +core_initcall(numaq_dsc_disable); diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index ba243a4cc119..b45cbf93d439 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -164,7 +164,7 @@ static inline void play_dead(void) */ local_irq_disable(); while (1) - __asm__ __volatile__("hlt":::"memory"); + halt(); } #else static inline void play_dead(void) @@ -313,16 +313,12 @@ void show_regs(struct pt_regs * regs) printk(" DS: %04x ES: %04x\n", 0xffff & regs->xds,0xffff & regs->xes); - __asm__("movl %%cr0, %0": "=r" (cr0)); - __asm__("movl %%cr2, %0": "=r" (cr2)); - __asm__("movl %%cr3, %0": "=r" (cr3)); - /* This could fault if %cr4 does not exist */ - __asm__("1: movl %%cr4, %0 \n" - "2: \n" - ".section __ex_table,\"a\" \n" - ".long 1b,2b \n" - ".previous \n" - : "=r" (cr4): "0" (0)); + cr0 = read_cr0(); + cr2 = read_cr2(); + cr3 = read_cr3(); + if (current_cpu_data.x86 > 4) { + cr4 = read_cr4(); + } printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); show_trace(NULL, ®s->esp); } @@ -682,41 +678,56 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas __unlazy_fpu(prev_p); /* - * Reload esp0, LDT and the page table pointer: + * Reload esp0. */ load_esp0(tss, next); /* - * Load the per-thread Thread-Local Storage descriptor. + * Save away %fs and %gs. No need to save %es and %ds, as + * those are always kernel segments while inside the kernel. + * Doing this before setting the new TLS descriptors avoids + * the situation where we temporarily have non-reloadable + * segments in %fs and %gs. This could be an issue if the + * NMI handler ever used %fs or %gs (it does not today), or + * if the kernel is running inside of a hypervisor layer. */ - load_TLS(next, cpu); + savesegment(fs, prev->fs); + savesegment(gs, prev->gs); /* - * Save away %fs and %gs. No need to save %es and %ds, as - * those are always kernel segments while inside the kernel. + * Load the per-thread Thread-Local Storage descriptor. */ - asm volatile("mov %%fs,%0":"=m" (prev->fs)); - asm volatile("mov %%gs,%0":"=m" (prev->gs)); + load_TLS(next, cpu); /* * Restore %fs and %gs if needed. + * + * Glibc normally makes %fs be zero, and %gs is one of + * the TLS segments. */ - if (unlikely(prev->fs | prev->gs | next->fs | next->gs)) { + if (unlikely(prev->fs | next->fs)) loadsegment(fs, next->fs); + + if (prev->gs | next->gs) loadsegment(gs, next->gs); - } + + /* + * Restore IOPL if needed. + */ + if (unlikely(prev->iopl != next->iopl)) + set_iopl_mask(next->iopl); /* * Now maybe reload the debug registers */ if (unlikely(next->debugreg[7])) { - set_debugreg(current->thread.debugreg[0], 0); - set_debugreg(current->thread.debugreg[1], 1); - set_debugreg(current->thread.debugreg[2], 2); - set_debugreg(current->thread.debugreg[3], 3); + set_debugreg(next->debugreg[0], 0); + set_debugreg(next->debugreg[1], 1); + set_debugreg(next->debugreg[2], 2); + set_debugreg(next->debugreg[3], 3); /* no 4 and 5 */ - set_debugreg(current->thread.debugreg[6], 6); - set_debugreg(current->thread.debugreg[7], 7); + set_debugreg(next->debugreg[6], 6); + set_debugreg(next->debugreg[7], 7); } if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) @@ -913,6 +924,8 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *u_info) if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) return -EINVAL; + memset(&info, 0, sizeof(info)); + desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; info.entry_number = idx; diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index 0da59b42843c..7b6368bf8974 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -271,6 +271,8 @@ static void clear_singlestep(struct task_struct *child) void ptrace_disable(struct task_struct *child) { clear_singlestep(child); + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); } /* @@ -509,15 +511,20 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) } break; + case PTRACE_SYSEMU: /* continue and stop at next syscall, which will not be executed */ case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: /* restart after signal. */ ret = -EIO; if (!valid_signal(data)) break; - if (request == PTRACE_SYSCALL) { + if (request == PTRACE_SYSEMU) { + set_tsk_thread_flag(child, TIF_SYSCALL_EMU); + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + } else if (request == PTRACE_SYSCALL) { set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - } - else { + clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); + } else { + clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); } child->exit_code = data; @@ -542,10 +549,17 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) wake_up_process(child); break; + case PTRACE_SYSEMU_SINGLESTEP: /* Same as SYSEMU, but singlestep if not syscall */ case PTRACE_SINGLESTEP: /* set the trap flag. */ ret = -EIO; if (!valid_signal(data)) break; + + if (request == PTRACE_SYSEMU_SINGLESTEP) + set_tsk_thread_flag(child, TIF_SYSCALL_EMU); + else + clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); set_singlestep(child); child->exit_code = data; @@ -678,27 +692,58 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) * - triggered by current->work.syscall_trace */ __attribute__((regparm(3))) -void do_syscall_trace(struct pt_regs *regs, int entryexit) +int do_syscall_trace(struct pt_regs *regs, int entryexit) { - /* do the secure computing check first */ - secure_computing(regs->orig_eax); + int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU); + /* + * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall + * interception + */ + int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP); + int ret = 0; - if (unlikely(current->audit_context) && entryexit) - audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), regs->eax); + /* do the secure computing check first */ + if (!entryexit) + secure_computing(regs->orig_eax); + + if (unlikely(current->audit_context)) { + if (entryexit) + audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), + regs->eax); + /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only + * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is + * not used, entry.S will call us only on syscall exit, not + * entry; so when TIF_SYSCALL_AUDIT is used we must avoid + * calling send_sigtrap() on syscall entry. + * + * Note that when PTRACE_SYSEMU_SINGLESTEP is used, + * is_singlestep is false, despite his name, so we will still do + * the correct thing. + */ + else if (is_singlestep) + goto out; + } if (!(current->ptrace & PT_PTRACED)) goto out; + /* If a process stops on the 1st tracepoint with SYSCALL_TRACE + * and then is resumed with SYSEMU_SINGLESTEP, it will come in + * here. We have to check this and return */ + if (is_sysemu && entryexit) + return 0; + /* Fake a debug trap */ - if (test_thread_flag(TIF_SINGLESTEP)) + if (is_singlestep) send_sigtrap(current, regs, 0); - if (!test_thread_flag(TIF_SYSCALL_TRACE)) + if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu) goto out; /* the 0x80 provides a way for the tracing parent to distinguish between a syscall stop and SIGTRAP delivery */ - ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0)); + /* Note that the debugger could change the result of test_thread_flag!*/ + ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0)); /* * this isn't the same as continuing with a signal, but it will do @@ -709,9 +754,17 @@ void do_syscall_trace(struct pt_regs *regs, int entryexit) send_sig(current->exit_code, current, 1); current->exit_code = 0; } - out: + ret = is_sysemu; +out: if (unlikely(current->audit_context) && !entryexit) audit_syscall_entry(current, AUDIT_ARCH_I386, regs->orig_eax, regs->ebx, regs->ecx, regs->edx, regs->esi); - + if (ret == 0) + return 0; + + regs->orig_eax = -1; /* force skip of syscall restarting */ + if (unlikely(current->audit_context)) + audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), + regs->eax); + return 1; } diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c index b3e584849961..350ea6680f63 100644 --- a/arch/i386/kernel/reboot.c +++ b/arch/i386/kernel/reboot.c @@ -11,8 +11,10 @@ #include <linux/mc146818rtc.h> #include <linux/efi.h> #include <linux/dmi.h> +#include <linux/ctype.h> #include <asm/uaccess.h> #include <asm/apic.h> +#include <asm/desc.h> #include "mach_reboot.h" #include <linux/reboot_fixups.h> @@ -27,8 +29,6 @@ static int reboot_thru_bios; #ifdef CONFIG_SMP static int reboot_cpu = -1; -/* shamelessly grabbed from lib/vsprintf.c for readability */ -#define is_digit(c) ((c) >= '0' && (c) <= '9') #endif static int __init reboot_setup(char *str) { @@ -48,9 +48,9 @@ static int __init reboot_setup(char *str) break; #ifdef CONFIG_SMP case 's': /* "smp" reboot by executing reset on BSP or other CPU*/ - if (is_digit(*(str+1))) { + if (isdigit(*(str+1))) { reboot_cpu = (int) (*(str+1) - '0'); - if (is_digit(*(str+2))) + if (isdigit(*(str+2))) reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0'); } /* we will leave sorting out the final value @@ -242,13 +242,13 @@ void machine_real_restart(unsigned char *code, int length) /* Set up the IDT for real mode. */ - __asm__ __volatile__ ("lidt %0" : : "m" (real_mode_idt)); + load_idt(&real_mode_idt); /* Set up a GDT from which we can load segment descriptors for real mode. The GDT is not used in real mode; it is just needed here to prepare the descriptors. */ - __asm__ __volatile__ ("lgdt %0" : : "m" (real_mode_gdt)); + load_gdt(&real_mode_gdt); /* Load the data segment registers, and thus the descriptors ready for real mode. The base address of each segment is 0x100, 16 times the @@ -284,7 +284,7 @@ void machine_shutdown(void) reboot_cpu_id = 0; /* See if there has been given a command line override */ - if ((reboot_cpu_id != -1) && (reboot_cpu < NR_CPUS) && + if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) && cpu_isset(reboot_cpu, cpu_online_map)) { reboot_cpu_id = reboot_cpu; } @@ -311,14 +311,12 @@ void machine_shutdown(void) #endif } -void machine_restart(char * __unused) +void machine_emergency_restart(void) { - machine_shutdown(); - if (!reboot_thru_bios) { if (efi_enabled) { efi.reset_system(EFI_RESET_COLD, EFI_SUCCESS, 0, NULL); - __asm__ __volatile__("lidt %0": :"m" (no_idt)); + load_idt(&no_idt); __asm__ __volatile__("int3"); } /* rebooting needs to touch the page at absolute addr 0 */ @@ -327,7 +325,7 @@ void machine_restart(char * __unused) mach_reboot_fixups(); /* for board specific fixups */ mach_reboot(); /* That didn't work - force a triple fault.. */ - __asm__ __volatile__("lidt %0": :"m" (no_idt)); + load_idt(&no_idt); __asm__ __volatile__("int3"); } } @@ -337,23 +335,22 @@ void machine_restart(char * __unused) machine_real_restart(jump_to_bios, sizeof(jump_to_bios)); } -EXPORT_SYMBOL(machine_restart); +void machine_restart(char * __unused) +{ + machine_shutdown(); + machine_emergency_restart(); +} void machine_halt(void) { } -EXPORT_SYMBOL(machine_halt); - void machine_power_off(void) { - lapic_shutdown(); + machine_shutdown(); - if (efi_enabled) - efi.reset_system(EFI_RESET_SHUTDOWN, EFI_SUCCESS, 0, NULL); if (pm_power_off) pm_power_off(); } -EXPORT_SYMBOL(machine_power_off); diff --git a/arch/i386/kernel/semaphore.c b/arch/i386/kernel/semaphore.c index 469f496e55c0..7455ab643943 100644 --- a/arch/i386/kernel/semaphore.c +++ b/arch/i386/kernel/semaphore.c @@ -13,171 +13,9 @@ * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org> */ #include <linux/config.h> -#include <linux/sched.h> -#include <linux/err.h> -#include <linux/init.h> #include <asm/semaphore.h> /* - * Semaphores are implemented using a two-way counter: - * The "count" variable is decremented for each process - * that tries to acquire the semaphore, while the "sleeping" - * variable is a count of such acquires. - * - * Notably, the inline "up()" and "down()" functions can - * efficiently test if they need to do any extra work (up - * needs to do something only if count was negative before - * the increment operation. - * - * "sleeping" and the contention routine ordering is protected - * by the spinlock in the semaphore's waitqueue head. - * - * Note that these functions are only called when there is - * contention on the lock, and as such all this is the - * "non-critical" part of the whole semaphore business. The - * critical part is the inline stuff in <asm/semaphore.h> - * where we want to avoid any extra jumps and calls. - */ - -/* - * Logic: - * - only on a boundary condition do we need to care. When we go - * from a negative count to a non-negative, we wake people up. - * - when we go from a non-negative count to a negative do we - * (a) synchronize with the "sleeper" count and (b) make sure - * that we're on the wakeup list before we synchronize so that - * we cannot lose wakeup events. - */ - -static fastcall void __attribute_used__ __up(struct semaphore *sem) -{ - wake_up(&sem->wait); -} - -static fastcall void __attribute_used__ __sched __down(struct semaphore * sem) -{ - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - unsigned long flags; - - tsk->state = TASK_UNINTERRUPTIBLE; - spin_lock_irqsave(&sem->wait.lock, flags); - add_wait_queue_exclusive_locked(&sem->wait, &wait); - - sem->sleepers++; - for (;;) { - int sleepers = sem->sleepers; - - /* - * Add "everybody else" into it. They aren't - * playing, because we own the spinlock in - * the wait_queue_head. - */ - if (!atomic_add_negative(sleepers - 1, &sem->count)) { - sem->sleepers = 0; - break; - } - sem->sleepers = 1; /* us - see -1 above */ - spin_unlock_irqrestore(&sem->wait.lock, flags); - - schedule(); - - spin_lock_irqsave(&sem->wait.lock, flags); - tsk->state = TASK_UNINTERRUPTIBLE; - } - remove_wait_queue_locked(&sem->wait, &wait); - wake_up_locked(&sem->wait); - spin_unlock_irqrestore(&sem->wait.lock, flags); - tsk->state = TASK_RUNNING; -} - -static fastcall int __attribute_used__ __sched __down_interruptible(struct semaphore * sem) -{ - int retval = 0; - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - unsigned long flags; - - tsk->state = TASK_INTERRUPTIBLE; - spin_lock_irqsave(&sem->wait.lock, flags); - add_wait_queue_exclusive_locked(&sem->wait, &wait); - - sem->sleepers++; - for (;;) { - int sleepers = sem->sleepers; - - /* - * With signals pending, this turns into - * the trylock failure case - we won't be - * sleeping, and we* can't get the lock as - * it has contention. Just correct the count - * and exit. - */ - if (signal_pending(current)) { - retval = -EINTR; - sem->sleepers = 0; - atomic_add(sleepers, &sem->count); - break; - } - - /* - * Add "everybody else" into it. They aren't - * playing, because we own the spinlock in - * wait_queue_head. The "-1" is because we're - * still hoping to get the semaphore. - */ - if (!atomic_add_negative(sleepers - 1, &sem->count)) { - sem->sleepers = 0; - break; - } - sem->sleepers = 1; /* us - see -1 above */ - spin_unlock_irqrestore(&sem->wait.lock, flags); - - schedule(); - - spin_lock_irqsave(&sem->wait.lock, flags); - tsk->state = TASK_INTERRUPTIBLE; - } - remove_wait_queue_locked(&sem->wait, &wait); - wake_up_locked(&sem->wait); - spin_unlock_irqrestore(&sem->wait.lock, flags); - - tsk->state = TASK_RUNNING; - return retval; -} - -/* - * Trylock failed - make sure we correct for - * having decremented the count. - * - * We could have done the trylock with a - * single "cmpxchg" without failure cases, - * but then it wouldn't work on a 386. - */ -static fastcall int __attribute_used__ __down_trylock(struct semaphore * sem) -{ - int sleepers; - unsigned long flags; - - spin_lock_irqsave(&sem->wait.lock, flags); - sleepers = sem->sleepers + 1; - sem->sleepers = 0; - - /* - * Add "everybody else" and us into it. They aren't - * playing, because we own the spinlock in the - * wait_queue_head. - */ - if (!atomic_add_negative(sleepers, &sem->count)) { - wake_up_locked(&sem->wait); - } - - spin_unlock_irqrestore(&sem->wait.lock, flags); - return 1; -} - - -/* * The semaphore operations have a special calling sequence that * allow us to do a simpler in-line version of them. These routines * need to convert that sequence back into the C sequence when diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 7306353c520e..dc39ca6a7eca 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -82,19 +82,19 @@ EXPORT_SYMBOL(efi_enabled); /* cpu data as detected by the assembly code in head.S */ struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; /* common cpu data for all cpus */ -struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; +struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; EXPORT_SYMBOL(boot_cpu_data); unsigned long mmu_cr4_features; -#ifdef CONFIG_ACPI_INTERPRETER +#ifdef CONFIG_ACPI int acpi_disabled = 0; #else int acpi_disabled = 1; #endif EXPORT_SYMBOL(acpi_disabled); -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI int __initdata acpi_force = 0; extern acpi_interrupt_flags acpi_sci_flags; #endif @@ -139,6 +139,7 @@ struct sys_desc_table_struct { unsigned char table[0]; }; struct edid_info edid_info; +EXPORT_SYMBOL_GPL(edid_info); struct ist_info ist_info; #if defined(CONFIG_X86_SPEEDSTEP_SMI) || \ defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) @@ -370,12 +371,16 @@ static void __init limit_regions(unsigned long long size) int i; if (efi_enabled) { - for (i = 0; i < memmap.nr_map; i++) { - current_addr = memmap.map[i].phys_addr + - (memmap.map[i].num_pages << 12); - if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) { + efi_memory_desc_t *md; + void *p; + + for (p = memmap.map, i = 0; p < memmap.map_end; + p += memmap.desc_size, i++) { + md = p; + current_addr = md->phys_addr + (md->num_pages << 12); + if (md->type == EFI_CONVENTIONAL_MEMORY) { if (current_addr >= size) { - memmap.map[i].num_pages -= + md->num_pages -= (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT); memmap.nr_map = i + 1; return; @@ -794,7 +799,7 @@ static void __init parse_cmdline_early (char ** cmdline_p) } #endif -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI /* "acpi=off" disables both ACPI table parsing and interpreter */ else if (!memcmp(from, "acpi=off", 8)) { disable_acpi(); @@ -846,11 +851,16 @@ static void __init parse_cmdline_early (char ** cmdline_p) #endif #ifdef CONFIG_X86_LOCAL_APIC + if (!memcmp(from, "disable_timer_pin_1", 19)) + disable_timer_pin_1 = 1; + if (!memcmp(from, "enable_timer_pin_1", 18)) + disable_timer_pin_1 = -1; + /* disable IO-APIC */ else if (!memcmp(from, "noapic", 6)) disable_ioapic_setup(); #endif /* CONFIG_X86_LOCAL_APIC */ -#endif /* CONFIG_ACPI_BOOT */ +#endif /* CONFIG_ACPI */ #ifdef CONFIG_X86_LOCAL_APIC /* enable local APIC */ @@ -1295,7 +1305,7 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat */ static void __init register_memory(void) { - unsigned long gapstart, gapsize; + unsigned long gapstart, gapsize, round; unsigned long long last; int i; @@ -1340,14 +1350,14 @@ static void __init register_memory(void) } /* - * Start allocating dynamic PCI memory a bit into the gap, - * aligned up to the nearest megabyte. - * - * Question: should we try to pad it up a bit (do something - * like " + (gapsize >> 3)" in there too?). We now have the - * technology. + * See how much we want to round up: start off with + * rounding to the next 1MB area. */ - pci_mem_start = (gapstart + 0xfffff) & ~0xfffff; + round = 0x100000; + while ((gapsize >> 4) > round) + round += round; + /* Fun with two's complement */ + pci_mem_start = (gapstart + round) & -round; printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", pci_mem_start, gapstart, gapsize); @@ -1414,7 +1424,7 @@ static struct nop { This runs before SMP is initialized to avoid SMP problems with self modifying code. This implies that assymetric systems where APs have less capabilities than the boot processor are not handled. - In this case boot with "noreplacement". */ + Tough. Make sure you disable such features by hand. */ void apply_alternatives(void *start, void *end) { struct alt_instr *a; @@ -1442,24 +1452,12 @@ void apply_alternatives(void *start, void *end) } } -static int no_replacement __initdata = 0; - void __init alternative_instructions(void) { extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; - if (no_replacement) - return; apply_alternatives(__alt_instructions, __alt_instructions_end); } -static int __init noreplacement_setup(char *s) -{ - no_replacement = 1; - return 0; -} - -__setup("noreplacement", noreplacement_setup); - static char * __init machine_specific_memory_setup(void); #ifdef CONFIG_MCA @@ -1587,14 +1585,20 @@ void __init setup_arch(char **cmdline_p) if (efi_enabled) efi_map_memmap(); -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI /* * Parse the ACPI tables for possible boot-time SMP configuration. */ acpi_boot_table_init(); acpi_boot_init(); -#endif +#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) + if (def_to_bigsmp) + printk(KERN_WARNING "More than 8 CPUs detected and " + "CONFIG_X86_PC cannot handle it.\nUse " + "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n"); +#endif +#endif #ifdef CONFIG_X86_LOCAL_APIC if (smp_found_config) get_smp_config(); diff --git a/arch/i386/kernel/sigframe.h b/arch/i386/kernel/sigframe.h index d21b14f5c25c..0b2221711dad 100644 --- a/arch/i386/kernel/sigframe.h +++ b/arch/i386/kernel/sigframe.h @@ -1,6 +1,6 @@ struct sigframe { - char *pretcode; + char __user *pretcode; int sig; struct sigcontext sc; struct _fpstate fpstate; @@ -10,10 +10,10 @@ struct sigframe struct rt_sigframe { - char *pretcode; + char __user *pretcode; int sig; - struct siginfo *pinfo; - void *puc; + struct siginfo __user *pinfo; + void __user *puc; struct siginfo info; struct ucontext uc; struct _fpstate fpstate; diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index 89ef7adc63a4..61eb0c8a6e47 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -278,9 +278,9 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, int tmp, err = 0; tmp = 0; - __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp)); + savesegment(gs, tmp); err |= __put_user(tmp, (unsigned int __user *)&sc->gs); - __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp)); + savesegment(fs, tmp); err |= __put_user(tmp, (unsigned int __user *)&sc->fs); err |= __put_user(regs->xes, (unsigned int __user *)&sc->es); @@ -577,10 +577,11 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, else ret = setup_frame(sig, ka, oldset, regs); - if (ret && !(ka->sa.sa_flags & SA_NODEFER)) { + if (ret) { spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); - sigaddset(¤t->blocked,sig); + if (!(ka->sa.sa_flags & SA_NODEFER)) + sigaddset(¤t->blocked,sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); } @@ -603,7 +604,9 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset) * We want the common case to go fast, which * is why we may in certain cases get here from * kernel mode. Just return without doing anything - * if so. + * if so. vm86 regs switched out by assembly code + * before reaching here, so testing against kernel + * CS suffices. */ if (!user_mode(regs)) return 1; diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index cec4bde67161..48b55db3680f 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -576,7 +576,7 @@ static void stop_this_cpu (void * dummy) local_irq_disable(); disable_local_APIC(); if (cpu_data[smp_processor_id()].hlt_works_ok) - for(;;) __asm__("hlt"); + for(;;) halt(); for (;;); } diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index d66bf489a2e9..5f0a95d76a4f 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -68,26 +68,28 @@ EXPORT_SYMBOL(smp_num_siblings); #endif /* Package ID of each logical CPU */ -int phys_proc_id[NR_CPUS] = {[0 ... NR_CPUS-1] = BAD_APICID}; +int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; EXPORT_SYMBOL(phys_proc_id); /* Core ID of each logical CPU */ -int cpu_core_id[NR_CPUS] = {[0 ... NR_CPUS-1] = BAD_APICID}; +int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; EXPORT_SYMBOL(cpu_core_id); -cpumask_t cpu_sibling_map[NR_CPUS]; +cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_sibling_map); -cpumask_t cpu_core_map[NR_CPUS]; +cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); /* bitmap of online cpus */ -cpumask_t cpu_online_map; +cpumask_t cpu_online_map __read_mostly; EXPORT_SYMBOL(cpu_online_map); cpumask_t cpu_callin_map; cpumask_t cpu_callout_map; EXPORT_SYMBOL(cpu_callout_map); +cpumask_t cpu_possible_map; +EXPORT_SYMBOL(cpu_possible_map); static cpumask_t smp_commenced_mask; /* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there @@ -100,7 +102,7 @@ static int __devinitdata tsc_sync_disabled; struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; EXPORT_SYMBOL(cpu_data); -u8 x86_cpu_to_apicid[NR_CPUS] = +u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0xff }; EXPORT_SYMBOL(x86_cpu_to_apicid); @@ -200,7 +202,7 @@ static void __devinit smp_store_cpu_info(int id) goto valid_k7; /* If we get here, it's not a certified SMP capable AMD system. */ - tainted |= TAINT_UNSAFE_SMP; + add_taint(TAINT_UNSAFE_SMP); } valid_k7: @@ -550,10 +552,10 @@ extern struct { #ifdef CONFIG_NUMA /* which logical CPUs are on which nodes */ -cpumask_t node_2_cpu_mask[MAX_NUMNODES] = +cpumask_t node_2_cpu_mask[MAX_NUMNODES] __read_mostly = { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE }; /* which node each logical CPU is on */ -int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 }; +int cpu_2_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 }; EXPORT_SYMBOL(cpu_2_node); /* set up a mapping between cpu and node. */ @@ -581,7 +583,7 @@ static inline void unmap_cpu_to_node(int cpu) #endif /* CONFIG_NUMA */ -u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; +u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; static void map_cpu_to_logical_apicid(void) { @@ -1017,8 +1019,8 @@ int __devinit smp_prepare_cpu(int cpu) tsc_sync_disabled = 1; /* init low mem mapping */ - memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, - sizeof(swapper_pg_dir[0]) * KERNEL_PGD_PTRS); + clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, + KERNEL_PGD_PTRS); flush_tlb_all(); schedule_work(&task); wait_for_completion(&done); @@ -1265,6 +1267,7 @@ void __devinit smp_prepare_boot_cpu(void) cpu_set(smp_processor_id(), cpu_online_map); cpu_set(smp_processor_id(), cpu_callout_map); cpu_set(smp_processor_id(), cpu_present_map); + cpu_set(smp_processor_id(), cpu_possible_map); per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; } @@ -1327,8 +1330,7 @@ void __cpu_die(unsigned int cpu) printk ("CPU %d is now offline\n", cpu); return; } - current->state = TASK_UNINTERRUPTIBLE; - schedule_timeout(HZ/10); + msleep(100); } printk(KERN_ERR "CPU %u didn't die...\n", cpu); } diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c index 7b3b27d64409..516bf5653b02 100644 --- a/arch/i386/kernel/srat.c +++ b/arch/i386/kernel/srat.c @@ -213,12 +213,18 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c node_end_pfn[nid] = memory_chunk->end_pfn; } +static u8 pxm_to_nid_map[MAX_PXM_DOMAINS];/* _PXM to logical node ID map */ + +int pxm_to_node(int pxm) +{ + return pxm_to_nid_map[pxm]; +} + /* Parse the ACPI Static Resource Affinity Table */ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) { u8 *start, *end, *p; int i, j, nid; - u8 pxm_to_nid_map[MAX_PXM_DOMAINS];/* _PXM to logical node ID map */ u8 nid_to_pxm_map[MAX_NUMNODES];/* logical node ID to _PXM map */ start = (u8 *)(&(sratp->reserved) + 1); /* skip header */ diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index 3db9a04aec6e..9b21a31d4f4e 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -251,7 +251,7 @@ ENTRY(sys_call_table) .long sys_io_submit .long sys_io_cancel .long sys_fadvise64 /* 250 */ - .long sys_set_zone_reclaim + .long sys_ni_syscall .long sys_exit_group .long sys_lookup_dcookie .long sys_epoll_create @@ -291,3 +291,6 @@ ENTRY(sys_call_table) .long sys_keyctl .long sys_ioprio_set .long sys_ioprio_get /* 290 */ + .long sys_inotify_init + .long sys_inotify_add_watch + .long sys_inotify_rm_watch diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 2854c357377f..2883a4d4f01f 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -91,7 +91,7 @@ EXPORT_SYMBOL(rtc_lock); DEFINE_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); -struct timer_opts *cur_timer = &timer_none; +struct timer_opts *cur_timer __read_mostly = &timer_none; /* * This is a special lock that is owned by the CPU and holds the index @@ -194,10 +194,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&xtime, sec, nsec); set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); write_sequnlock_irq(&xtime_lock); clock_was_set(); return 0; @@ -252,8 +249,7 @@ EXPORT_SYMBOL(profile_pc); * timer_interrupt() needs to keep up the real-time clock, * as well as call the "do_timer()" routine every clocktick */ -static inline void do_timer_interrupt(int irq, void *dev_id, - struct pt_regs *regs) +static inline void do_timer_interrupt(int irq, struct pt_regs *regs) { #ifdef CONFIG_X86_IO_APIC if (timer_ack) { @@ -307,7 +303,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) cur_timer->mark_offset(); - do_timer_interrupt(irq, NULL, regs); + do_timer_interrupt(irq, regs); write_sequnlock(&xtime_lock); return IRQ_HANDLED; @@ -333,8 +329,7 @@ EXPORT_SYMBOL(get_cmos_time); static void sync_cmos_clock(unsigned long dummy); -static struct timer_list sync_cmos_timer = - TIMER_INITIALIZER(sync_cmos_clock, 0, 0); +static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); static void sync_cmos_clock(unsigned long dummy) { @@ -348,7 +343,7 @@ static void sync_cmos_clock(unsigned long dummy) * This code is run on a timer. If the clock is set, that timer * may not expire at the correct time. Thus, we adjust... */ - if ((time_status & STA_UNSYNC) != 0) + if (!ntp_synced()) /* * Not synced, exit, do not restart a timer (if one is * running, let it run out). @@ -383,6 +378,7 @@ void notify_arch_cmos_timer(void) static long clock_cmos_diff, sleep_start; +static struct timer_opts *last_timer; static int timer_suspend(struct sys_device *dev, pm_message_t state) { /* @@ -391,6 +387,10 @@ static int timer_suspend(struct sys_device *dev, pm_message_t state) clock_cmos_diff = -get_cmos_time(); clock_cmos_diff += get_seconds(); sleep_start = get_cmos_time(); + last_timer = cur_timer; + cur_timer = &timer_none; + if (last_timer->suspend) + last_timer->suspend(state); return 0; } @@ -404,6 +404,7 @@ static int timer_resume(struct sys_device *dev) if (is_hpet_enabled()) hpet_reenable(); #endif + setup_pit_timer(); sec = get_cmos_time() + clock_cmos_diff; sleep_length = (get_cmos_time() - sleep_start) * HZ; write_seqlock_irqsave(&xtime_lock, flags); @@ -412,6 +413,11 @@ static int timer_resume(struct sys_device *dev) write_sequnlock_irqrestore(&xtime_lock, flags); jiffies += sleep_length; wall_jiffies += sleep_length; + if (last_timer->resume) + last_timer->resume(); + cur_timer = last_timer; + last_timer = NULL; + touch_softlockup_watchdog(); return 0; } diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c index d766e0963ac1..d973a8b681fd 100644 --- a/arch/i386/kernel/timers/timer_hpet.c +++ b/arch/i386/kernel/timers/timer_hpet.c @@ -18,8 +18,8 @@ #include "mach_timer.h" #include <asm/hpet.h> -static unsigned long hpet_usec_quotient; /* convert hpet clks to usec */ -static unsigned long tsc_hpet_quotient; /* convert tsc to hpet clks */ +static unsigned long hpet_usec_quotient __read_mostly; /* convert hpet clks to usec */ +static unsigned long tsc_hpet_quotient __read_mostly; /* convert tsc to hpet clks */ static unsigned long hpet_last; /* hpet counter value at last tick*/ static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */ static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */ @@ -136,6 +136,8 @@ static void delay_hpet(unsigned long loops) } while ((hpet_end - hpet_start) < (loops)); } +static struct timer_opts timer_hpet; + static int __init init_hpet(char* override) { unsigned long result, remain; @@ -163,6 +165,8 @@ static int __init init_hpet(char* override) } set_cyc2ns_scale(cpu_khz/1000); } + /* set this only when cpu_has_tsc */ + timer_hpet.read_timer = read_timer_tsc; } /* @@ -177,16 +181,29 @@ static int __init init_hpet(char* override) return 0; } +static int hpet_resume(void) +{ + write_seqlock(&monotonic_lock); + /* Assume this is the last mark offset time */ + rdtsc(last_tsc_low, last_tsc_high); + + if (hpet_use_timer) + hpet_last = hpet_readl(HPET_T0_CMP) - hpet_tick; + else + hpet_last = hpet_readl(HPET_COUNTER); + write_sequnlock(&monotonic_lock); + return 0; +} /************************************************************/ /* tsc timer_opts struct */ -static struct timer_opts timer_hpet = { +static struct timer_opts timer_hpet __read_mostly = { .name = "hpet", .mark_offset = mark_offset_hpet, .get_offset = get_offset_hpet, .monotonic_clock = monotonic_clock_hpet, .delay = delay_hpet, - .read_timer = read_timer_tsc, + .resume = hpet_resume, }; struct init_timer_opts __initdata timer_hpet_init = { diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c index 06de036a820c..eddb64038234 100644 --- a/arch/i386/kernel/timers/timer_pit.c +++ b/arch/i386/kernel/timers/timer_pit.c @@ -175,30 +175,3 @@ void setup_pit_timer(void) outb(LATCH >> 8 , PIT_CH0); /* MSB */ spin_unlock_irqrestore(&i8253_lock, flags); } - -static int timer_resume(struct sys_device *dev) -{ - setup_pit_timer(); - return 0; -} - -static struct sysdev_class timer_sysclass = { - set_kset_name("timer_pit"), - .resume = timer_resume, -}; - -static struct sys_device device_timer = { - .id = 0, - .cls = &timer_sysclass, -}; - -static int __init init_timer_sysfs(void) -{ - int error = sysdev_class_register(&timer_sysclass); - if (!error) - error = sysdev_register(&device_timer); - return error; -} - -device_initcall(init_timer_sysfs); - diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c index 4ef20e663498..264edaaac315 100644 --- a/arch/i386/kernel/timers/timer_pm.c +++ b/arch/i386/kernel/timers/timer_pm.c @@ -186,6 +186,14 @@ static void mark_offset_pmtmr(void) } } +static int pmtmr_resume(void) +{ + write_seqlock(&monotonic_lock); + /* Assume this is the last mark offset time */ + offset_tick = read_pmtmr(); + write_sequnlock(&monotonic_lock); + return 0; +} static unsigned long long monotonic_clock_pmtmr(void) { @@ -247,6 +255,7 @@ static struct timer_opts timer_pmtmr = { .monotonic_clock = monotonic_clock_pmtmr, .delay = delay_pmtmr, .read_timer = read_timer_tsc, + .resume = pmtmr_resume, }; struct init_timer_opts __initdata timer_pmtmr_init = { diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index 8f4e4d5bc560..6dd470cc9f72 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -543,6 +543,19 @@ static int __init init_tsc(char* override) return -ENODEV; } +static int tsc_resume(void) +{ + write_seqlock(&monotonic_lock); + /* Assume this is the last mark offset time */ + rdtsc(last_tsc_low, last_tsc_high); +#ifdef CONFIG_HPET_TIMER + if (is_hpet_enabled() && hpet_use_timer) + hpet_last = hpet_readl(HPET_COUNTER); +#endif + write_sequnlock(&monotonic_lock); + return 0; +} + #ifndef CONFIG_X86_TSC /* disable flag for tsc. Takes effect by clearing the TSC cpu flag * in cpu/common.c */ @@ -573,6 +586,7 @@ static struct timer_opts timer_tsc = { .monotonic_clock = monotonic_clock_tsc, .delay = delay_tsc, .read_timer = read_timer_tsc, + .resume = tsc_resume, }; struct init_timer_opts __initdata timer_tsc_init = { diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index a61f33d06ea3..09a58cb6daa7 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -210,7 +210,7 @@ void show_registers(struct pt_regs *regs) unsigned short ss; esp = (unsigned long) (®s->esp); - ss = __KERNEL_DS; + savesegment(ss, ss); if (user_mode(regs)) { in_kernel = 0; esp = regs->esp; @@ -267,9 +267,6 @@ static void handle_BUG(struct pt_regs *regs) char c; unsigned long eip; - if (user_mode(regs)) - goto no_bug; /* Not in kernel */ - eip = regs->eip; if (eip < PAGE_OFFSET) @@ -366,8 +363,9 @@ static inline void die_if_kernel(const char * str, struct pt_regs * regs, long e die(str, regs, err); } -static void do_trap(int trapnr, int signr, char *str, int vm86, - struct pt_regs * regs, long error_code, siginfo_t *info) +static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86, + struct pt_regs * regs, long error_code, + siginfo_t *info) { struct task_struct *tsk = current; tsk->thread.error_code = error_code; @@ -463,7 +461,8 @@ DO_ERROR(12, SIGBUS, "stack segment", stack_segment) DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0) -fastcall void do_general_protection(struct pt_regs * regs, long error_code) +fastcall void __kprobes do_general_protection(struct pt_regs * regs, + long error_code) { int cpu = get_cpu(); struct tss_struct *tss = &per_cpu(init_tss, cpu); @@ -568,6 +567,10 @@ static DEFINE_SPINLOCK(nmi_print_lock); void die_nmi (struct pt_regs *regs, const char *msg) { + if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 0, SIGINT) == + NOTIFY_STOP) + return; + spin_lock(&nmi_print_lock); /* * We are in trouble anyway, lets at least try @@ -656,7 +659,7 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code) ++nmi_count(cpu); - if (!nmi_callback(regs, cpu)) + if (!rcu_dereference(nmi_callback)(regs, cpu)) default_do_nmi(regs); nmi_exit(); @@ -664,7 +667,7 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code) void set_nmi_callback(nmi_callback_t callback) { - nmi_callback = callback; + rcu_assign_pointer(nmi_callback, callback); } EXPORT_SYMBOL_GPL(set_nmi_callback); @@ -675,7 +678,7 @@ void unset_nmi_callback(void) EXPORT_SYMBOL_GPL(unset_nmi_callback); #ifdef CONFIG_KPROBES -fastcall void do_int3(struct pt_regs *regs, long error_code) +fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code) { if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) @@ -709,7 +712,7 @@ fastcall void do_int3(struct pt_regs *regs, long error_code) * find every occurrence of the TF bit that could be saved away even * by user code) */ -fastcall void do_debug(struct pt_regs * regs, long error_code) +fastcall void __kprobes do_debug(struct pt_regs * regs, long error_code) { unsigned int condition; struct task_struct *tsk = current; @@ -803,15 +806,17 @@ void math_error(void __user *eip) */ cwd = get_fpu_cwd(task); swd = get_fpu_swd(task); - switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) { + switch (swd & ~cwd & 0x3f) { case 0x000: default: break; case 0x001: /* Invalid Op */ - case 0x041: /* Stack Fault */ - case 0x241: /* Stack Fault | Direction */ + /* + * swd & 0x240 == 0x040: Stack Underflow + * swd & 0x240 == 0x240: Stack Overflow + * User must clear the SF bit (0x40) if set + */ info.si_code = FPE_FLTINV; - /* Should we clear the SF or let user space do it ???? */ break; case 0x002: /* Denormalize */ case 0x010: /* Underflow */ @@ -1006,7 +1011,7 @@ void __init trap_init_f00f_bug(void) * it uses the read-only mapped virtual address. */ idt_descr.address = fix_to_virt(FIX_F00F_IDT); - __asm__ __volatile__("lidt %0" : : "m" (idt_descr)); + load_idt(&idt_descr); } #endif diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c index ec0f68ce6886..16b485009622 100644 --- a/arch/i386/kernel/vm86.c +++ b/arch/i386/kernel/vm86.c @@ -294,8 +294,8 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk */ info->regs32->eax = 0; tsk->thread.saved_esp0 = tsk->thread.esp0; - asm volatile("mov %%fs,%0":"=m" (tsk->thread.saved_fs)); - asm volatile("mov %%gs,%0":"=m" (tsk->thread.saved_gs)); + savesegment(fs, tsk->thread.saved_fs); + savesegment(gs, tsk->thread.saved_gs); tss = &per_cpu(init_tss, get_cpu()); tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; @@ -542,7 +542,7 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code) unsigned char opcode; unsigned char __user *csp; unsigned char __user *ssp; - unsigned short ip, sp; + unsigned short ip, sp, orig_flags; int data32, pref_done; #define CHECK_IF_IN_TRAP \ @@ -551,8 +551,12 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code) #define VM86_FAULT_RETURN do { \ if (VMPI.force_return_for_pic && (VEFLAGS & (IF_MASK | VIF_MASK))) \ return_to_32bit(regs, VM86_PICRETURN); \ + if (orig_flags & TF_MASK) \ + handle_vm86_trap(regs, 0, 1); \ return; } while (0) + orig_flags = *(unsigned short *)®s->eflags; + csp = (unsigned char __user *) (regs->cs << 4); ssp = (unsigned char __user *) (regs->ss << 4); sp = SP(regs); diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 7e01a528a83a..4710195b6b74 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -22,6 +22,7 @@ SECTIONS *(.text) SCHED_TEXT LOCK_TEXT + KPROBES_TEXT *(.fixup) *(.gnu.warning) } = 0x9090 @@ -57,6 +58,9 @@ SECTIONS *(.data.cacheline_aligned) } + /* rarely changed data like cpu maps */ + . = ALIGN(32); + .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) } _edata = .; /* End of data section */ . = ALIGN(THREAD_SIZE); /* init_task */ @@ -140,12 +144,7 @@ SECTIONS *(.exitcall.exit) } - /* Stabs debugging sections. */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } + STABS_DEBUG + + DWARF_DEBUG } diff --git a/arch/i386/kernel/vsyscall-sigreturn.S b/arch/i386/kernel/vsyscall-sigreturn.S index c8fcf75b9be3..fadb5bc3c374 100644 --- a/arch/i386/kernel/vsyscall-sigreturn.S +++ b/arch/i386/kernel/vsyscall-sigreturn.S @@ -7,7 +7,7 @@ */ #include <asm/unistd.h> -#include <asm/asm_offsets.h> +#include <asm/asm-offsets.h> /* XXX @@ -15,7 +15,7 @@ */ .text - .org __kernel_vsyscall+32 + .org __kernel_vsyscall+32,0x90 .globl __kernel_sigreturn .type __kernel_sigreturn,@function __kernel_sigreturn: @@ -35,6 +35,7 @@ __kernel_rt_sigreturn: int $0x80 .LEND_rt_sigreturn: .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn + .balign 32 .previous .section .eh_frame,"a",@progbits diff --git a/arch/i386/kernel/vsyscall.lds.S b/arch/i386/kernel/vsyscall.lds.S index a7977707c8e5..98699ca6e52d 100644 --- a/arch/i386/kernel/vsyscall.lds.S +++ b/arch/i386/kernel/vsyscall.lds.S @@ -3,7 +3,7 @@ * object prelinked to its virtual address, and with only one read-only * segment (that fits in one page). This script controls its layout. */ -#include <asm/asm_offsets.h> +#include <asm/asm-offsets.h> SECTIONS { |
