Merge branch 'x86/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (821 commits) x86: make 64bit hpet_set_mapping to use ioremap too, v2 x86: get x86_phys_bits early x86: max_low_pfn_mapped fix #4 x86: change _node_to_cpumask_ptr to return const ptr x86: I/O APIC: remove an IRQ2-mask hack x86: fix numaq_tsc_disable calling x86, e820: remove end_user_pfn x86: max_low_pfn_mapped fix, #3 x86: max_low_pfn_mapped fix, #2 x86: max_low_pfn_mapped fix, #1 x86_64: fix delayed signals x86: remove conflicting nx6325 and nx6125 quirks x86: Recover timer_ack lost in the merge of the NMI watchdog x86: I/O APIC: Never configure IRQ2 x86: L-APIC: Always fully configure IRQ0 x86: L-APIC: Set IRQ0 as edge-triggered x86: merge dwarf2 headers x86: use AS_CFI instead of UNWIND_INFO x86: use ignore macro instead of hash comment x86: use matching CFI_ENDPROC ...
This commit is contained in:
Коммит
a3da5bf84a
|
@ -0,0 +1,71 @@
|
|||
What: /sys/firmware/memmap/
|
||||
Date: June 2008
|
||||
Contact: Bernhard Walle <bwalle@suse.de>
|
||||
Description:
|
||||
On all platforms, the firmware provides a memory map which the
|
||||
kernel reads. The resources from that memory map are registered
|
||||
in the kernel resource tree and exposed to userspace via
|
||||
/proc/iomem (together with other resources).
|
||||
|
||||
However, on most architectures that firmware-provided memory
|
||||
map is modified afterwards by the kernel itself, either because
|
||||
the kernel merges that memory map with other information or
|
||||
just because the user overwrites that memory map via command
|
||||
line.
|
||||
|
||||
kexec needs the raw firmware-provided memory map to setup the
|
||||
parameter segment of the kernel that should be booted with
|
||||
kexec. Also, the raw memory map is useful for debugging. For
|
||||
that reason, /sys/firmware/memmap is an interface that provides
|
||||
the raw memory map to userspace.
|
||||
|
||||
The structure is as follows: Under /sys/firmware/memmap there
|
||||
are subdirectories with the number of the entry as their name:
|
||||
|
||||
/sys/firmware/memmap/0
|
||||
/sys/firmware/memmap/1
|
||||
/sys/firmware/memmap/2
|
||||
/sys/firmware/memmap/3
|
||||
...
|
||||
|
||||
The maximum depends on the number of memory map entries provided
|
||||
by the firmware. The order is just the order that the firmware
|
||||
provides.
|
||||
|
||||
Each directory contains three files:
|
||||
|
||||
start : The start address (as hexadecimal number with the
|
||||
'0x' prefix).
|
||||
end : The end address, inclusive (regardless whether the
|
||||
firmware provides inclusive or exclusive ranges).
|
||||
type : Type of the entry as string. See below for a list of
|
||||
valid types.
|
||||
|
||||
So, for example:
|
||||
|
||||
/sys/firmware/memmap/0/start
|
||||
/sys/firmware/memmap/0/end
|
||||
/sys/firmware/memmap/0/type
|
||||
/sys/firmware/memmap/1/start
|
||||
...
|
||||
|
||||
Currently following types exist:
|
||||
|
||||
- System RAM
|
||||
- ACPI Tables
|
||||
- ACPI Non-volatile Storage
|
||||
- reserved
|
||||
|
||||
Following shell snippet can be used to display that memory
|
||||
map in a human-readable format:
|
||||
|
||||
-------------------- 8< ----------------------------------------
|
||||
#!/bin/bash
|
||||
cd /sys/firmware/memmap
|
||||
for dir in * ; do
|
||||
start=$(cat $dir/start)
|
||||
end=$(cat $dir/end)
|
||||
type=$(cat $dir/type)
|
||||
printf "%016x-%016x (%s)\n" $start $[ $end +1] "$type"
|
||||
done
|
||||
-------------------- >8 ----------------------------------------
|
|
@ -109,7 +109,7 @@ There are two possible methods of using Kdump.
|
|||
2) Or use the system kernel binary itself as dump-capture kernel and there is
|
||||
no need to build a separate dump-capture kernel. This is possible
|
||||
only with the architecutres which support a relocatable kernel. As
|
||||
of today i386 and ia64 architectures support relocatable kernel.
|
||||
of today, i386, x86_64 and ia64 architectures support relocatable kernel.
|
||||
|
||||
Building a relocatable kernel is advantageous from the point of view that
|
||||
one does not have to build a second kernel for capturing the dump. But
|
||||
|
|
|
@ -271,6 +271,17 @@ and is between 256 and 4096 characters. It is defined in the file
|
|||
aic79xx= [HW,SCSI]
|
||||
See Documentation/scsi/aic79xx.txt.
|
||||
|
||||
amd_iommu= [HW,X86-84]
|
||||
Pass parameters to the AMD IOMMU driver in the system.
|
||||
Possible values are:
|
||||
isolate - enable device isolation (each device, as far
|
||||
as possible, will get its own protection
|
||||
domain)
|
||||
amd_iommu_size= [HW,X86-64]
|
||||
Define the size of the aperture for the AMD IOMMU
|
||||
driver. Possible values are:
|
||||
'32M', '64M' (default), '128M', '256M', '512M', '1G'
|
||||
|
||||
amijoy.map= [HW,JOY] Amiga joystick support
|
||||
Map of devices attached to JOY0DAT and JOY1DAT
|
||||
Format: <a>,<b>
|
||||
|
@ -599,6 +610,29 @@ and is between 256 and 4096 characters. It is defined in the file
|
|||
See drivers/char/README.epca and
|
||||
Documentation/digiepca.txt.
|
||||
|
||||
disable_mtrr_cleanup [X86]
|
||||
enable_mtrr_cleanup [X86]
|
||||
The kernel tries to adjust MTRR layout from continuous
|
||||
to discrete, to make X server driver able to add WB
|
||||
entry later. This parameter enables/disables that.
|
||||
|
||||
mtrr_chunk_size=nn[KMG] [X86]
|
||||
used for mtrr cleanup. It is largest continous chunk
|
||||
that could hold holes aka. UC entries.
|
||||
|
||||
mtrr_gran_size=nn[KMG] [X86]
|
||||
Used for mtrr cleanup. It is granularity of mtrr block.
|
||||
Default is 1.
|
||||
Large value could prevent small alignment from
|
||||
using up MTRRs.
|
||||
|
||||
mtrr_spare_reg_nr=n [X86]
|
||||
Format: <integer>
|
||||
Range: 0,7 : spare reg number
|
||||
Default : 1
|
||||
Used for mtrr cleanup. It is spare mtrr entries number.
|
||||
Set to 2 or more if your graphical card needs more.
|
||||
|
||||
disable_mtrr_trim [X86, Intel and AMD only]
|
||||
By default the kernel will trim any uncacheable
|
||||
memory out of your available memory pool based on
|
||||
|
@ -2116,6 +2150,9 @@ and is between 256 and 4096 characters. It is defined in the file
|
|||
usbhid.mousepoll=
|
||||
[USBHID] The interval which mice are to be polled at.
|
||||
|
||||
add_efi_memmap [EFI; x86-32,X86-64] Include EFI memory map in
|
||||
kernel's map of available physical RAM.
|
||||
|
||||
vdso= [X86-32,SH,x86-64]
|
||||
vdso=2: enable compat VDSO (default with COMPAT_VDSO)
|
||||
vdso=1: enable VDSO (default)
|
||||
|
|
|
@ -10,7 +10,7 @@ us to generate 'watchdog NMI interrupts'. (NMI: Non Maskable Interrupt
|
|||
which get executed even if the system is otherwise locked up hard).
|
||||
This can be used to debug hard kernel lockups. By executing periodic
|
||||
NMI interrupts, the kernel can monitor whether any CPU has locked up,
|
||||
and print out debugging messages if so.
|
||||
and print out debugging messages if so.
|
||||
|
||||
In order to use the NMI watchdog, you need to have APIC support in your
|
||||
kernel. For SMP kernels, APIC support gets compiled in automatically. For
|
||||
|
@ -22,8 +22,7 @@ CONFIG_X86_UP_IOAPIC is for uniprocessor with an IO-APIC. [Note: certain
|
|||
kernel debugging options, such as Kernel Stack Meter or Kernel Tracer,
|
||||
may implicitly disable the NMI watchdog.]
|
||||
|
||||
For x86-64, the needed APIC is always compiled in, and the NMI watchdog is
|
||||
always enabled with I/O-APIC mode (nmi_watchdog=1).
|
||||
For x86-64, the needed APIC is always compiled in.
|
||||
|
||||
Using local APIC (nmi_watchdog=2) needs the first performance register, so
|
||||
you can't use it for other purposes (such as high precision performance
|
||||
|
@ -63,16 +62,15 @@ when the system is idle), but if your system locks up on anything but the
|
|||
"hlt", then you are out of luck -- the event will not happen at all and the
|
||||
watchdog won't trigger. This is a shortcoming of the local APIC watchdog
|
||||
-- unfortunately there is no "clock ticks" event that would work all the
|
||||
time. The I/O APIC watchdog is driven externally and has no such shortcoming.
|
||||
time. The I/O APIC watchdog is driven externally and has no such shortcoming.
|
||||
But its NMI frequency is much higher, resulting in a more significant hit
|
||||
to the overall system performance.
|
||||
|
||||
NOTE: starting with 2.4.2-ac18 the NMI-oopser is disabled by default,
|
||||
you have to enable it with a boot time parameter. Prior to 2.4.2-ac18
|
||||
the NMI-oopser is enabled unconditionally on x86 SMP boxes.
|
||||
On x86 nmi_watchdog is disabled by default so you have to enable it with
|
||||
a boot time parameter.
|
||||
|
||||
On x86-64 the NMI oopser is on by default. On 64bit Intel CPUs
|
||||
it uses IO-APIC by default and on AMD it uses local APIC.
|
||||
NOTE: In kernels prior to 2.4.2-ac18 the NMI-oopser is enabled unconditionally
|
||||
on x86 SMP boxes.
|
||||
|
||||
[ feel free to send bug reports, suggestions and patches to
|
||||
Ingo Molnar <mingo@redhat.com> or the Linux SMP mailing
|
||||
|
|
|
@ -1,17 +1,14 @@
|
|||
THE LINUX/I386 BOOT PROTOCOL
|
||||
----------------------------
|
||||
THE LINUX/x86 BOOT PROTOCOL
|
||||
---------------------------
|
||||
|
||||
H. Peter Anvin <hpa@zytor.com>
|
||||
Last update 2007-05-23
|
||||
|
||||
On the i386 platform, the Linux kernel uses a rather complicated boot
|
||||
On the x86 platform, the Linux kernel uses a rather complicated boot
|
||||
convention. This has evolved partially due to historical aspects, as
|
||||
well as the desire in the early days to have the kernel itself be a
|
||||
bootable image, the complicated PC memory model and due to changed
|
||||
expectations in the PC industry caused by the effective demise of
|
||||
real-mode DOS as a mainstream operating system.
|
||||
|
||||
Currently, the following versions of the Linux/i386 boot protocol exist.
|
||||
Currently, the following versions of the Linux/x86 boot protocol exist.
|
||||
|
||||
Old kernels: zImage/Image support only. Some very early kernels
|
||||
may not even support a command line.
|
||||
|
@ -372,10 +369,17 @@ Protocol: 2.00+
|
|||
- If 0, the protected-mode code is loaded at 0x10000.
|
||||
- If 1, the protected-mode code is loaded at 0x100000.
|
||||
|
||||
Bit 5 (write): QUIET_FLAG
|
||||
- If 0, print early messages.
|
||||
- If 1, suppress early messages.
|
||||
This requests to the kernel (decompressor and early
|
||||
kernel) to not write early messages that require
|
||||
accessing the display hardware directly.
|
||||
|
||||
Bit 6 (write): KEEP_SEGMENTS
|
||||
Protocol: 2.07+
|
||||
- if 0, reload the segment registers in the 32bit entry point.
|
||||
- if 1, do not reload the segment registers in the 32bit entry point.
|
||||
- If 0, reload the segment registers in the 32bit entry point.
|
||||
- If 1, do not reload the segment registers in the 32bit entry point.
|
||||
Assume that %cs %ds %ss %es are all set to flat segments with
|
||||
a base of 0 (or the equivalent for their environment).
|
||||
|
||||
|
@ -504,7 +508,7 @@ Protocol: 2.06+
|
|||
maximum size was 255.
|
||||
|
||||
Field name: hardware_subarch
|
||||
Type: write
|
||||
Type: write (optional, defaults to x86/PC)
|
||||
Offset/size: 0x23c/4
|
||||
Protocol: 2.07+
|
||||
|
||||
|
@ -520,11 +524,13 @@ Protocol: 2.07+
|
|||
0x00000002 Xen
|
||||
|
||||
Field name: hardware_subarch_data
|
||||
Type: write
|
||||
Type: write (subarch-dependent)
|
||||
Offset/size: 0x240/8
|
||||
Protocol: 2.07+
|
||||
|
||||
A pointer to data that is specific to hardware subarch
|
||||
This field is currently unused for the default x86/PC environment,
|
||||
do not modify.
|
||||
|
||||
Field name: payload_offset
|
||||
Type: read
|
||||
|
@ -545,6 +551,34 @@ Protocol: 2.08+
|
|||
|
||||
The length of the payload.
|
||||
|
||||
Field name: setup_data
|
||||
Type: write (special)
|
||||
Offset/size: 0x250/8
|
||||
Protocol: 2.09+
|
||||
|
||||
The 64-bit physical pointer to NULL terminated single linked list of
|
||||
struct setup_data. This is used to define a more extensible boot
|
||||
parameters passing mechanism. The definition of struct setup_data is
|
||||
as follow:
|
||||
|
||||
struct setup_data {
|
||||
u64 next;
|
||||
u32 type;
|
||||
u32 len;
|
||||
u8 data[0];
|
||||
};
|
||||
|
||||
Where, the next is a 64-bit physical pointer to the next node of
|
||||
linked list, the next field of the last node is 0; the type is used
|
||||
to identify the contents of data; the len is the length of data
|
||||
field; the data holds the real payload.
|
||||
|
||||
This list may be modified at a number of points during the bootup
|
||||
process. Therefore, when modifying this list one should always make
|
||||
sure to consider the case where the linked list already contains
|
||||
entries.
|
||||
|
||||
|
||||
**** THE IMAGE CHECKSUM
|
||||
|
||||
From boot protocol version 2.08 onwards the CRC-32 is calculated over
|
||||
|
@ -553,6 +587,7 @@ initial remainder of 0xffffffff. The checksum is appended to the
|
|||
file; therefore the CRC of the file up to the limit specified in the
|
||||
syssize field of the header is always 0.
|
||||
|
||||
|
||||
**** THE KERNEL COMMAND LINE
|
||||
|
||||
The kernel command line has become an important way for the boot
|
||||
|
@ -584,28 +619,6 @@ command line is entered using the following protocol:
|
|||
covered by setup_move_size, so you may need to adjust this
|
||||
field.
|
||||
|
||||
Field name: setup_data
|
||||
Type: write (obligatory)
|
||||
Offset/size: 0x250/8
|
||||
Protocol: 2.09+
|
||||
|
||||
The 64-bit physical pointer to NULL terminated single linked list of
|
||||
struct setup_data. This is used to define a more extensible boot
|
||||
parameters passing mechanism. The definition of struct setup_data is
|
||||
as follow:
|
||||
|
||||
struct setup_data {
|
||||
u64 next;
|
||||
u32 type;
|
||||
u32 len;
|
||||
u8 data[0];
|
||||
};
|
||||
|
||||
Where, the next is a 64-bit physical pointer to the next node of
|
||||
linked list, the next field of the last node is 0; the type is used
|
||||
to identify the contents of data; the len is the length of data
|
||||
field; the data holds the real payload.
|
||||
|
||||
|
||||
**** MEMORY LAYOUT OF THE REAL-MODE CODE
|
||||
|
|
@ -11,9 +11,8 @@ ffffc10000000000 - ffffc1ffffffffff (=40 bits) hole
|
|||
ffffc20000000000 - ffffe1ffffffffff (=45 bits) vmalloc/ioremap space
|
||||
ffffe20000000000 - ffffe2ffffffffff (=40 bits) virtual memory map (1TB)
|
||||
... unused hole ...
|
||||
ffffffff80000000 - ffffffff82800000 (=40 MB) kernel text mapping, from phys 0
|
||||
... unused hole ...
|
||||
ffffffff88000000 - fffffffffff00000 (=1919 MB) module mapping space
|
||||
ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0
|
||||
ffffffffa0000000 - fffffffffff00000 (=1536 MB) module mapping space
|
||||
|
||||
The direct mapping covers all memory in the system up to the highest
|
||||
memory address (this means in some cases it can also include PCI memory
|
|
@ -36,3 +36,7 @@ Mechanics:
|
|||
services.
|
||||
noefi turn off all EFI runtime services
|
||||
reboot_type=k turn off EFI reboot runtime service
|
||||
- If the EFI memory map has additional entries not in the E820 map,
|
||||
you can include those entries in the kernels memory map of available
|
||||
physical RAM by using the following kernel command line parameter.
|
||||
add_efi_memmap include EFI memory map of available physical RAM
|
|
@ -376,6 +376,12 @@ L: linux-geode@lists.infradead.org (moderated for non-subscribers)
|
|||
W: http://www.amd.com/us-en/ConnectivitySolutions/TechnicalResources/0,,50_2334_2452_11363,00.html
|
||||
S: Supported
|
||||
|
||||
AMD IOMMU (AMD-VI)
|
||||
P: Joerg Roedel
|
||||
M: joerg.roedel@amd.com
|
||||
L: iommu@lists.linux-foundation.org
|
||||
S: Supported
|
||||
|
||||
AMS (Apple Motion Sensor) DRIVER
|
||||
P: Stelian Pop
|
||||
M: stelian@popies.net
|
||||
|
|
282
arch/x86/Kconfig
282
arch/x86/Kconfig
|
@ -121,7 +121,7 @@ config ARCH_HAS_CACHE_LINE_SIZE
|
|||
def_bool y
|
||||
|
||||
config HAVE_SETUP_PER_CPU_AREA
|
||||
def_bool X86_64 || (X86_SMP && !X86_VOYAGER)
|
||||
def_bool X86_64_SMP || (X86_SMP && !X86_VOYAGER)
|
||||
|
||||
config HAVE_CPUMASK_OF_CPU_MAP
|
||||
def_bool X86_64_SMP
|
||||
|
@ -181,12 +181,12 @@ config X86_64_SMP
|
|||
config X86_HT
|
||||
bool
|
||||
depends on SMP
|
||||
depends on (X86_32 && !(X86_VISWS || X86_VOYAGER)) || X86_64
|
||||
depends on (X86_32 && !X86_VOYAGER) || X86_64
|
||||
default y
|
||||
|
||||
config X86_BIOS_REBOOT
|
||||
bool
|
||||
depends on !X86_VISWS && !X86_VOYAGER
|
||||
depends on !X86_VOYAGER
|
||||
default y
|
||||
|
||||
config X86_TRAMPOLINE
|
||||
|
@ -230,6 +230,26 @@ config SMP
|
|||
|
||||
If you don't know what to do here, say N.
|
||||
|
||||
config X86_FIND_SMP_CONFIG
|
||||
def_bool y
|
||||
depends on X86_MPPARSE || X86_VOYAGER
|
||||
|
||||
if ACPI
|
||||
config X86_MPPARSE
|
||||
def_bool y
|
||||
bool "Enable MPS table"
|
||||
depends on X86_LOCAL_APIC
|
||||
help
|
||||
For old smp systems that do not have proper acpi support. Newer systems
|
||||
(esp with 64bit cpus) with acpi support, MADT and DSDT will override it
|
||||
endif
|
||||
|
||||
if !ACPI
|
||||
config X86_MPPARSE
|
||||
def_bool y
|
||||
depends on X86_LOCAL_APIC
|
||||
endif
|
||||
|
||||
choice
|
||||
prompt "Subarchitecture Type"
|
||||
default X86_PC
|
||||
|
@ -251,7 +271,7 @@ config X86_ELAN
|
|||
|
||||
config X86_VOYAGER
|
||||
bool "Voyager (NCR)"
|
||||
depends on X86_32 && (SMP || BROKEN)
|
||||
depends on X86_32 && (SMP || BROKEN) && !PCI
|
||||
help
|
||||
Voyager is an MCA-based 32-way capable SMP architecture proprietary
|
||||
to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based.
|
||||
|
@ -261,16 +281,27 @@ config X86_VOYAGER
|
|||
If you do not specifically know you have a Voyager based machine,
|
||||
say N here, otherwise the kernel you build will not be bootable.
|
||||
|
||||
config X86_GENERICARCH
|
||||
bool "Generic architecture"
|
||||
depends on X86_32
|
||||
help
|
||||
This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default
|
||||
subarchitectures. It is intended for a generic binary kernel.
|
||||
if you select them all, kernel will probe it one by one. and will
|
||||
fallback to default.
|
||||
|
||||
if X86_GENERICARCH
|
||||
|
||||
config X86_NUMAQ
|
||||
bool "NUMAQ (IBM/Sequent)"
|
||||
depends on SMP && X86_32
|
||||
depends on SMP && X86_32 && PCI && X86_MPPARSE
|
||||
select NUMA
|
||||
help
|
||||
This option is used for getting Linux to run on a (IBM/Sequent) NUMA
|
||||
multiquad box. This changes the way that processors are bootstrapped,
|
||||
and uses Clustered Logical APIC addressing mode instead of Flat Logical.
|
||||
You will need a new lynxer.elf file to flash your firmware with - send
|
||||
email to <Martin.Bligh@us.ibm.com>.
|
||||
This option is used for getting Linux to run on a NUMAQ (IBM/Sequent)
|
||||
NUMA multiquad box. This changes the way that processors are
|
||||
bootstrapped, and uses Clustered Logical APIC addressing mode instead
|
||||
of Flat Logical. You will need a new lynxer.elf file to flash your
|
||||
firmware with - send email to <Martin.Bligh@us.ibm.com>.
|
||||
|
||||
config X86_SUMMIT
|
||||
bool "Summit/EXA (IBM x440)"
|
||||
|
@ -279,46 +310,21 @@ config X86_SUMMIT
|
|||
This option is needed for IBM systems that use the Summit/EXA chipset.
|
||||
In particular, it is needed for the x440.
|
||||
|
||||
If you don't have one of these computers, you should say N here.
|
||||
If you want to build a NUMA kernel, you must select ACPI.
|
||||
|
||||
config X86_BIGSMP
|
||||
bool "Support for other sub-arch SMP systems with more than 8 CPUs"
|
||||
depends on X86_32 && SMP
|
||||
help
|
||||
This option is needed for the systems that have more than 8 CPUs
|
||||
and if the system is not of any sub-arch type above.
|
||||
|
||||
If you don't have such a system, you should say N here.
|
||||
|
||||
config X86_VISWS
|
||||
bool "SGI 320/540 (Visual Workstation)"
|
||||
depends on X86_32
|
||||
help
|
||||
The SGI Visual Workstation series is an IA32-based workstation
|
||||
based on SGI systems chips with some legacy PC hardware attached.
|
||||
|
||||
Say Y here to create a kernel to run on the SGI 320 or 540.
|
||||
|
||||
A kernel compiled for the Visual Workstation will not run on PCs
|
||||
and vice versa. See <file:Documentation/sgi-visws.txt> for details.
|
||||
|
||||
config X86_GENERICARCH
|
||||
bool "Generic architecture (Summit, bigsmp, ES7000, default)"
|
||||
depends on X86_32
|
||||
help
|
||||
This option compiles in the Summit, bigsmp, ES7000, default subarchitectures.
|
||||
It is intended for a generic binary kernel.
|
||||
If you want a NUMA kernel, select ACPI. We need SRAT for NUMA.
|
||||
|
||||
config X86_ES7000
|
||||
bool "Support for Unisys ES7000 IA32 series"
|
||||
depends on X86_32 && SMP
|
||||
help
|
||||
Support for Unisys ES7000 systems. Say 'Y' here if this kernel is
|
||||
supposed to run on an IA32-based Unisys ES7000 system.
|
||||
Only choose this option if you have such a system, otherwise you
|
||||
should say N here.
|
||||
|
||||
config X86_BIGSMP
|
||||
bool "Support for big SMP systems with more than 8 CPUs"
|
||||
depends on X86_32 && SMP
|
||||
help
|
||||
This option is needed for the systems that have more than 8 CPUs
|
||||
and if the system is not of any sub-arch type above.
|
||||
|
||||
endif
|
||||
|
||||
config X86_RDC321X
|
||||
bool "RDC R-321x SoC"
|
||||
|
@ -337,7 +343,7 @@ config X86_RDC321X
|
|||
config X86_VSMP
|
||||
bool "Support for ScaleMP vSMP"
|
||||
select PARAVIRT
|
||||
depends on X86_64
|
||||
depends on X86_64 && PCI
|
||||
help
|
||||
Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is
|
||||
supposed to run on these EM64T-based machines. Only choose this option
|
||||
|
@ -345,6 +351,18 @@ config X86_VSMP
|
|||
|
||||
endchoice
|
||||
|
||||
config X86_VISWS
|
||||
bool "SGI 320/540 (Visual Workstation)"
|
||||
depends on X86_32 && PCI && !X86_VOYAGER && X86_MPPARSE && PCI_GODIRECT
|
||||
help
|
||||
The SGI Visual Workstation series is an IA32-based workstation
|
||||
based on SGI systems chips with some legacy PC hardware attached.
|
||||
|
||||
Say Y here to create a kernel to run on the SGI 320 or 540.
|
||||
|
||||
A kernel compiled for the Visual Workstation will run on general
|
||||
PCs as well. See <file:Documentation/sgi-visws.txt> for details.
|
||||
|
||||
config SCHED_NO_NO_OMIT_FRAME_POINTER
|
||||
def_bool y
|
||||
prompt "Single-depth WCHAN output"
|
||||
|
@ -373,7 +391,7 @@ config VMI
|
|||
bool "VMI Guest support"
|
||||
select PARAVIRT
|
||||
depends on X86_32
|
||||
depends on !(X86_VISWS || X86_VOYAGER)
|
||||
depends on !X86_VOYAGER
|
||||
help
|
||||
VMI provides a paravirtualized interface to the VMware ESX server
|
||||
(it could be used by other hypervisors in theory too, but is not
|
||||
|
@ -384,7 +402,7 @@ config KVM_CLOCK
|
|||
bool "KVM paravirtualized clock"
|
||||
select PARAVIRT
|
||||
select PARAVIRT_CLOCK
|
||||
depends on !(X86_VISWS || X86_VOYAGER)
|
||||
depends on !X86_VOYAGER
|
||||
help
|
||||
Turning on this option will allow you to run a paravirtualized clock
|
||||
when running over the KVM hypervisor. Instead of relying on a PIT
|
||||
|
@ -395,7 +413,7 @@ config KVM_CLOCK
|
|||
config KVM_GUEST
|
||||
bool "KVM Guest support"
|
||||
select PARAVIRT
|
||||
depends on !(X86_VISWS || X86_VOYAGER)
|
||||
depends on !X86_VOYAGER
|
||||
help
|
||||
This option enables various optimizations for running under the KVM
|
||||
hypervisor.
|
||||
|
@ -404,7 +422,7 @@ source "arch/x86/lguest/Kconfig"
|
|||
|
||||
config PARAVIRT
|
||||
bool "Enable paravirtualization code"
|
||||
depends on !(X86_VISWS || X86_VOYAGER)
|
||||
depends on !X86_VOYAGER
|
||||
help
|
||||
This changes the kernel so it can modify itself when it is run
|
||||
under a hypervisor, potentially improving performance significantly
|
||||
|
@ -417,51 +435,33 @@ config PARAVIRT_CLOCK
|
|||
|
||||
endif
|
||||
|
||||
config MEMTEST_BOOTPARAM
|
||||
bool "Memtest boot parameter"
|
||||
config PARAVIRT_DEBUG
|
||||
bool "paravirt-ops debugging"
|
||||
depends on PARAVIRT && DEBUG_KERNEL
|
||||
help
|
||||
Enable to debug paravirt_ops internals. Specifically, BUG if
|
||||
a paravirt_op is missing when it is called.
|
||||
|
||||
config MEMTEST
|
||||
bool "Memtest"
|
||||
depends on X86_64
|
||||
default y
|
||||
help
|
||||
This option adds a kernel parameter 'memtest', which allows memtest
|
||||
to be disabled at boot. If this option is selected, memtest
|
||||
functionality can be disabled with memtest=0 on the kernel
|
||||
command line. The purpose of this option is to allow a single
|
||||
kernel image to be distributed with memtest built in, but not
|
||||
necessarily enabled.
|
||||
|
||||
to be set.
|
||||
memtest=0, mean disabled; -- default
|
||||
memtest=1, mean do 1 test pattern;
|
||||
...
|
||||
memtest=4, mean do 4 test patterns.
|
||||
If you are unsure how to answer this question, answer Y.
|
||||
|
||||
config MEMTEST_BOOTPARAM_VALUE
|
||||
int "Memtest boot parameter default value (0-4)"
|
||||
depends on MEMTEST_BOOTPARAM
|
||||
range 0 4
|
||||
default 0
|
||||
help
|
||||
This option sets the default value for the kernel parameter
|
||||
'memtest', which allows memtest to be disabled at boot. If this
|
||||
option is set to 0 (zero), the memtest kernel parameter will
|
||||
default to 0, disabling memtest at bootup. If this option is
|
||||
set to 4, the memtest kernel parameter will default to 4,
|
||||
enabling memtest at bootup, and use that as pattern number.
|
||||
|
||||
If you are unsure how to answer this question, answer 0.
|
||||
|
||||
config ACPI_SRAT
|
||||
def_bool y
|
||||
depends on X86_32 && ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH)
|
||||
select ACPI_NUMA
|
||||
|
||||
config HAVE_ARCH_PARSE_SRAT
|
||||
def_bool y
|
||||
depends on ACPI_SRAT
|
||||
|
||||
config X86_SUMMIT_NUMA
|
||||
def_bool y
|
||||
depends on X86_32 && NUMA && (X86_SUMMIT || X86_GENERICARCH)
|
||||
depends on X86_32 && NUMA && X86_GENERICARCH
|
||||
|
||||
config X86_CYCLONE_TIMER
|
||||
def_bool y
|
||||
depends on X86_32 && X86_SUMMIT || X86_GENERICARCH
|
||||
depends on X86_GENERICARCH
|
||||
|
||||
config ES7000_CLUSTERED_APIC
|
||||
def_bool y
|
||||
|
@ -549,6 +549,21 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
|
|||
Calgary anyway, pass 'iommu=calgary' on the kernel command line.
|
||||
If unsure, say Y.
|
||||
|
||||
config AMD_IOMMU
|
||||
bool "AMD IOMMU support"
|
||||
select SWIOTLB
|
||||
depends on X86_64 && PCI && ACPI
|
||||
help
|
||||
With this option you can enable support for AMD IOMMU hardware in
|
||||
your system. An IOMMU is a hardware component which provides
|
||||
remapping of DMA memory accesses from devices. With an AMD IOMMU you
|
||||
can isolate the the DMA memory of different devices and protect the
|
||||
system from misbehaving device drivers or hardware.
|
||||
|
||||
You can find out if your system has an AMD IOMMU if you look into
|
||||
your BIOS for an option to enable it or if you have an IVRS ACPI
|
||||
table.
|
||||
|
||||
# need this always selected by IOMMU for the VIA workaround
|
||||
config SWIOTLB
|
||||
bool
|
||||
|
@ -560,21 +575,36 @@ config SWIOTLB
|
|||
3 GB of memory. If unsure, say Y.
|
||||
|
||||
config IOMMU_HELPER
|
||||
def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB)
|
||||
def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
|
||||
config MAXSMP
|
||||
bool "Configure Maximum number of SMP Processors and NUMA Nodes"
|
||||
depends on X86_64 && SMP
|
||||
default n
|
||||
help
|
||||
Configure maximum number of CPUS and NUMA Nodes for this architecture.
|
||||
If unsure, say N.
|
||||
|
||||
if MAXSMP
|
||||
config NR_CPUS
|
||||
int "Maximum number of CPUs (2-255)"
|
||||
range 2 255
|
||||
int
|
||||
default "4096"
|
||||
endif
|
||||
|
||||
if !MAXSMP
|
||||
config NR_CPUS
|
||||
int "Maximum number of CPUs (2-4096)"
|
||||
range 2 4096
|
||||
depends on SMP
|
||||
default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000
|
||||
default "8"
|
||||
help
|
||||
This allows you to specify the maximum number of CPUs which this
|
||||
kernel will support. The maximum supported value is 255 and the
|
||||
kernel will support. The maximum supported value is 4096 and the
|
||||
minimum value which makes sense is 2.
|
||||
|
||||
This is purely to save memory - each supported CPU adds
|
||||
approximately eight kilobytes to the kernel image.
|
||||
endif
|
||||
|
||||
config SCHED_SMT
|
||||
bool "SMT (Hyperthreading) scheduler support"
|
||||
|
@ -598,7 +628,7 @@ source "kernel/Kconfig.preempt"
|
|||
|
||||
config X86_UP_APIC
|
||||
bool "Local APIC support on uniprocessors"
|
||||
depends on X86_32 && !SMP && !(X86_VISWS || X86_VOYAGER || X86_GENERICARCH)
|
||||
depends on X86_32 && !SMP && !(X86_VOYAGER || X86_GENERICARCH)
|
||||
help
|
||||
A local APIC (Advanced Programmable Interrupt Controller) is an
|
||||
integrated interrupt controller in the CPU. If you have a single-CPU
|
||||
|
@ -623,11 +653,11 @@ config X86_UP_IOAPIC
|
|||
|
||||
config X86_LOCAL_APIC
|
||||
def_bool y
|
||||
depends on X86_64 || (X86_32 && (X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) || X86_GENERICARCH))
|
||||
depends on X86_64 || (X86_32 && (X86_UP_APIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH))
|
||||
|
||||
config X86_IO_APIC
|
||||
def_bool y
|
||||
depends on X86_64 || (X86_32 && (X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) || X86_GENERICARCH))
|
||||
depends on X86_64 || (X86_32 && (X86_UP_IOAPIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH))
|
||||
|
||||
config X86_VISWS_APIC
|
||||
def_bool y
|
||||
|
@ -681,7 +711,7 @@ config X86_MCE_NONFATAL
|
|||
|
||||
config X86_MCE_P4THERMAL
|
||||
bool "check for P4 thermal throttling interrupt."
|
||||
depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP) && !X86_VISWS
|
||||
depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP)
|
||||
help
|
||||
Enabling this feature will cause a message to be printed when the P4
|
||||
enters thermal throttling.
|
||||
|
@ -911,9 +941,9 @@ config X86_PAE
|
|||
config NUMA
|
||||
bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)"
|
||||
depends on SMP
|
||||
depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || (X86_SUMMIT || X86_GENERICARCH) && ACPI) && EXPERIMENTAL)
|
||||
depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL)
|
||||
default n if X86_PC
|
||||
default y if (X86_NUMAQ || X86_SUMMIT)
|
||||
default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP)
|
||||
help
|
||||
Enable NUMA (Non Uniform Memory Access) support.
|
||||
The kernel will try to allocate memory used by a CPU on the
|
||||
|
@ -965,13 +995,25 @@ config NUMA_EMU
|
|||
into virtual nodes when booted with "numa=fake=N", where N is the
|
||||
number of nodes. This is only useful for debugging.
|
||||
|
||||
if MAXSMP
|
||||
|
||||
config NODES_SHIFT
|
||||
int "Max num nodes shift(1-9)"
|
||||
range 1 9 if X86_64
|
||||
int
|
||||
default "9"
|
||||
endif
|
||||
|
||||
if !MAXSMP
|
||||
config NODES_SHIFT
|
||||
int "Maximum NUMA Nodes (as a power of 2)"
|
||||
range 1 9 if X86_64
|
||||
default "6" if X86_64
|
||||
default "4" if X86_NUMAQ
|
||||
default "3"
|
||||
depends on NEED_MULTIPLE_NODES
|
||||
help
|
||||
Specify the maximum number of NUMA Nodes available on the target
|
||||
system. Increases memory reserved to accomodate various tables.
|
||||
endif
|
||||
|
||||
config HAVE_ARCH_BOOTMEM_NODE
|
||||
def_bool y
|
||||
|
@ -1090,6 +1132,40 @@ config MTRR
|
|||
|
||||
See <file:Documentation/mtrr.txt> for more information.
|
||||
|
||||
config MTRR_SANITIZER
|
||||
def_bool y
|
||||
prompt "MTRR cleanup support"
|
||||
depends on MTRR
|
||||
help
|
||||
Convert MTRR layout from continuous to discrete, so some X driver
|
||||
could add WB entries.
|
||||
|
||||
Say N here if you see bootup problems (boot crash, boot hang,
|
||||
spontaneous reboots).
|
||||
|
||||
Could be disabled with disable_mtrr_cleanup. Also mtrr_chunk_size
|
||||
could be used to send largest mtrr entry size for continuous block
|
||||
to hold holes (aka. UC entries)
|
||||
|
||||
If unsure, say Y.
|
||||
|
||||
config MTRR_SANITIZER_ENABLE_DEFAULT
|
||||
int "MTRR cleanup enable value (0-1)"
|
||||
range 0 1
|
||||
default "0"
|
||||
depends on MTRR_SANITIZER
|
||||
help
|
||||
Enable mtrr cleanup default value
|
||||
|
||||
config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT
|
||||
int "MTRR cleanup spare reg num (0-7)"
|
||||
range 0 7
|
||||
default "1"
|
||||
depends on MTRR_SANITIZER
|
||||
help
|
||||
mtrr cleanup spare entries default, it can be changed via
|
||||
mtrr_spare_reg_nr=
|
||||
|
||||
config X86_PAT
|
||||
bool
|
||||
prompt "x86 PAT support"
|
||||
|
@ -1190,7 +1266,6 @@ config KEXEC
|
|||
|
||||
config CRASH_DUMP
|
||||
bool "kernel crash dumps (EXPERIMENTAL)"
|
||||
depends on EXPERIMENTAL
|
||||
depends on X86_64 || (X86_32 && HIGHMEM)
|
||||
help
|
||||
Generate crash dump after being started by kexec.
|
||||
|
@ -1339,7 +1414,7 @@ config X86_APM_BOOT
|
|||
|
||||
menuconfig APM
|
||||
tristate "APM (Advanced Power Management) BIOS support"
|
||||
depends on X86_32 && PM_SLEEP && !X86_VISWS
|
||||
depends on X86_32 && PM_SLEEP
|
||||
---help---
|
||||
APM is a BIOS specification for saving power using several different
|
||||
techniques. This is mostly useful for battery powered laptops with
|
||||
|
@ -1475,8 +1550,7 @@ endmenu
|
|||
menu "Bus options (PCI etc.)"
|
||||
|
||||
config PCI
|
||||
bool "PCI support" if !X86_VISWS && !X86_VSMP
|
||||
depends on !X86_VOYAGER
|
||||
bool "PCI support"
|
||||
default y
|
||||
select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC)
|
||||
help
|
||||
|
@ -1487,7 +1561,7 @@ config PCI
|
|||
|
||||
choice
|
||||
prompt "PCI access mode"
|
||||
depends on X86_32 && PCI && !X86_VISWS
|
||||
depends on X86_32 && PCI
|
||||
default PCI_GOANY
|
||||
---help---
|
||||
On PCI systems, the BIOS can be used to detect the PCI devices and
|
||||
|
@ -1524,12 +1598,12 @@ endchoice
|
|||
|
||||
config PCI_BIOS
|
||||
def_bool y
|
||||
depends on X86_32 && !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY)
|
||||
depends on X86_32 && PCI && (PCI_GOBIOS || PCI_GOANY)
|
||||
|
||||
# x86-64 doesn't support PCI BIOS access from long mode so always go direct.
|
||||
config PCI_DIRECT
|
||||
def_bool y
|
||||
depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY || PCI_GOOLPC) || X86_VISWS)
|
||||
depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY || PCI_GOOLPC))
|
||||
|
||||
config PCI_MMCONFIG
|
||||
def_bool y
|
||||
|
@ -1589,7 +1663,7 @@ if X86_32
|
|||
|
||||
config ISA
|
||||
bool "ISA support"
|
||||
depends on !(X86_VOYAGER || X86_VISWS)
|
||||
depends on !X86_VOYAGER
|
||||
help
|
||||
Find out whether you have ISA slots on your motherboard. ISA is the
|
||||
name of a bus system, i.e. the way the CPU talks to the other stuff
|
||||
|
@ -1616,7 +1690,7 @@ config EISA
|
|||
source "drivers/eisa/Kconfig"
|
||||
|
||||
config MCA
|
||||
bool "MCA support" if !(X86_VISWS || X86_VOYAGER)
|
||||
bool "MCA support" if !X86_VOYAGER
|
||||
default y if X86_VOYAGER
|
||||
help
|
||||
MicroChannel Architecture is found in some IBM PS/2 machines and
|
||||
|
|
|
@ -344,7 +344,7 @@ config X86_F00F_BUG
|
|||
|
||||
config X86_WP_WORKS_OK
|
||||
def_bool y
|
||||
depends on X86_32 && !M386
|
||||
depends on !M386
|
||||
|
||||
config X86_INVLPG
|
||||
def_bool y
|
||||
|
@ -399,6 +399,10 @@ config X86_TSC
|
|||
def_bool y
|
||||
depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64
|
||||
|
||||
config X86_CMPXCHG64
|
||||
def_bool y
|
||||
depends on X86_PAE || X86_64
|
||||
|
||||
# this should be set for all -march=.. options where the compiler
|
||||
# generates cmov.
|
||||
config X86_CMOV
|
||||
|
|
|
@ -20,6 +20,14 @@ config NONPROMISC_DEVMEM
|
|||
|
||||
If in doubt, say Y.
|
||||
|
||||
config X86_VERBOSE_BOOTUP
|
||||
bool "Enable verbose x86 bootup info messages"
|
||||
default y
|
||||
help
|
||||
Enables the informational output from the decompression stage
|
||||
(e.g. bzImage) of the boot. If you disable this you will still
|
||||
see errors. Disable this if you want silent bootup.
|
||||
|
||||
config EARLY_PRINTK
|
||||
bool "Early printk" if EMBEDDED
|
||||
default y
|
||||
|
@ -60,7 +68,7 @@ config DEBUG_PAGEALLOC
|
|||
config DEBUG_PER_CPU_MAPS
|
||||
bool "Debug access to per_cpu maps"
|
||||
depends on DEBUG_KERNEL
|
||||
depends on X86_64_SMP
|
||||
depends on X86_SMP
|
||||
default n
|
||||
help
|
||||
Say Y to verify that the per_cpu map being accessed has
|
||||
|
@ -129,15 +137,6 @@ config 4KSTACKS
|
|||
on the VM subsystem for higher order allocations. This option
|
||||
will also use IRQ stacks to compensate for the reduced stackspace.
|
||||
|
||||
config X86_FIND_SMP_CONFIG
|
||||
def_bool y
|
||||
depends on X86_LOCAL_APIC || X86_VOYAGER
|
||||
depends on X86_32
|
||||
|
||||
config X86_MPPARSE
|
||||
def_bool y
|
||||
depends on (X86_32 && (X86_LOCAL_APIC && !X86_VISWS)) || X86_64
|
||||
|
||||
config DOUBLEFAULT
|
||||
default y
|
||||
bool "Enable doublefault exception handler" if EMBEDDED
|
||||
|
|
|
@ -113,33 +113,11 @@ mcore-y := arch/x86/mach-default/
|
|||
mflags-$(CONFIG_X86_VOYAGER) := -Iinclude/asm-x86/mach-voyager
|
||||
mcore-$(CONFIG_X86_VOYAGER) := arch/x86/mach-voyager/
|
||||
|
||||
# VISWS subarch support
|
||||
mflags-$(CONFIG_X86_VISWS) := -Iinclude/asm-x86/mach-visws
|
||||
mcore-$(CONFIG_X86_VISWS) := arch/x86/mach-visws/
|
||||
|
||||
# NUMAQ subarch support
|
||||
mflags-$(CONFIG_X86_NUMAQ) := -Iinclude/asm-x86/mach-numaq
|
||||
mcore-$(CONFIG_X86_NUMAQ) := arch/x86/mach-default/
|
||||
|
||||
# BIGSMP subarch support
|
||||
mflags-$(CONFIG_X86_BIGSMP) := -Iinclude/asm-x86/mach-bigsmp
|
||||
mcore-$(CONFIG_X86_BIGSMP) := arch/x86/mach-default/
|
||||
|
||||
#Summit subarch support
|
||||
mflags-$(CONFIG_X86_SUMMIT) := -Iinclude/asm-x86/mach-summit
|
||||
mcore-$(CONFIG_X86_SUMMIT) := arch/x86/mach-default/
|
||||
|
||||
# generic subarchitecture
|
||||
mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic
|
||||
fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/
|
||||
mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/
|
||||
|
||||
|
||||
# ES7000 subarch support
|
||||
mflags-$(CONFIG_X86_ES7000) := -Iinclude/asm-x86/mach-es7000
|
||||
fcore-$(CONFIG_X86_ES7000) := arch/x86/mach-es7000/
|
||||
mcore-$(CONFIG_X86_ES7000) := arch/x86/mach-default/
|
||||
|
||||
# RDC R-321x subarch support
|
||||
mflags-$(CONFIG_X86_RDC321X) := -Iinclude/asm-x86/mach-rdc321x
|
||||
mcore-$(CONFIG_X86_RDC321X) := arch/x86/mach-default/
|
||||
|
@ -160,6 +138,7 @@ KBUILD_AFLAGS += $(mflags-y)
|
|||
|
||||
head-y := arch/x86/kernel/head_$(BITS).o
|
||||
head-y += arch/x86/kernel/head$(BITS).o
|
||||
head-y += arch/x86/kernel/head.o
|
||||
head-y += arch/x86/kernel/init_task.o
|
||||
|
||||
libs-y += arch/x86/lib/
|
||||
|
@ -210,12 +189,12 @@ all: bzImage
|
|||
|
||||
# KBUILD_IMAGE specify target image being built
|
||||
KBUILD_IMAGE := $(boot)/bzImage
|
||||
zImage zlilo zdisk: KBUILD_IMAGE := arch/x86/boot/zImage
|
||||
zImage zlilo zdisk: KBUILD_IMAGE := $(boot)/zImage
|
||||
|
||||
zImage bzImage: vmlinux
|
||||
$(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
|
||||
$(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot
|
||||
$(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/bzImage
|
||||
$(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@
|
||||
|
||||
compressed: zImage
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/* -*- linux-c -*- ------------------------------------------------------- *
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
* Copyright 2007 rPath, Inc. - All Rights Reserved
|
||||
* Copyright 2007-2008 rPath, Inc. - All Rights Reserved
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2.
|
||||
|
@ -95,6 +95,9 @@ static void enable_a20_kbc(void)
|
|||
|
||||
outb(0xdf, 0x60); /* A20 on */
|
||||
empty_8042();
|
||||
|
||||
outb(0xff, 0x64); /* Null command, but UHCI wants it */
|
||||
empty_8042();
|
||||
}
|
||||
|
||||
static void enable_a20_fast(void)
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include <asm/page.h>
|
||||
#include <asm/boot.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
.section ".text.head"
|
||||
|
@ -109,7 +110,7 @@ startup_32:
|
|||
|
||||
/* Enable PAE mode */
|
||||
xorl %eax, %eax
|
||||
orl $(1 << 5), %eax
|
||||
orl $(X86_CR4_PAE), %eax
|
||||
movl %eax, %cr4
|
||||
|
||||
/*
|
||||
|
@ -170,7 +171,7 @@ startup_32:
|
|||
pushl %eax
|
||||
|
||||
/* Enter paged protected Mode, activating Long Mode */
|
||||
movl $0x80000001, %eax /* Enable Paging and Protected mode */
|
||||
movl $(X86_CR0_PG | X86_CR0_PE), %eax /* Enable Paging and Protected mode */
|
||||
movl %eax, %cr0
|
||||
|
||||
/* Jump from 32bit compatibility mode into 64bit mode. */
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include <asm/io.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/boot.h>
|
||||
#include <asm/bootparam.h>
|
||||
|
||||
/* WARNING!!
|
||||
* This code is compiled with -fPIC and it is relocated dynamically
|
||||
|
@ -187,13 +188,8 @@ static void gzip_release(void **);
|
|||
/*
|
||||
* This is set up by the setup-routine at boot-time
|
||||
*/
|
||||
static unsigned char *real_mode; /* Pointer to real-mode data */
|
||||
|
||||
#define RM_EXT_MEM_K (*(unsigned short *)(real_mode + 0x2))
|
||||
#ifndef STANDARD_MEMORY_BIOS_CALL
|
||||
#define RM_ALT_MEM_K (*(unsigned long *)(real_mode + 0x1e0))
|
||||
#endif
|
||||
#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0))
|
||||
static struct boot_params *real_mode; /* Pointer to real-mode data */
|
||||
static int quiet;
|
||||
|
||||
extern unsigned char input_data[];
|
||||
extern int input_len;
|
||||
|
@ -206,7 +202,8 @@ static void free(void *where);
|
|||
static void *memset(void *s, int c, unsigned n);
|
||||
static void *memcpy(void *dest, const void *src, unsigned n);
|
||||
|
||||
static void putstr(const char *);
|
||||
static void __putstr(int, const char *);
|
||||
#define putstr(__x) __putstr(0, __x)
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define memptr long
|
||||
|
@ -221,10 +218,6 @@ static char *vidmem;
|
|||
static int vidport;
|
||||
static int lines, cols;
|
||||
|
||||
#ifdef CONFIG_X86_NUMAQ
|
||||
void *xquad_portio;
|
||||
#endif
|
||||
|
||||
#include "../../../../lib/inflate.c"
|
||||
|
||||
static void *malloc(int size)
|
||||
|
@ -270,18 +263,24 @@ static void scroll(void)
|
|||
vidmem[i] = ' ';
|
||||
}
|
||||
|
||||
static void putstr(const char *s)
|
||||
static void __putstr(int error, const char *s)
|
||||
{
|
||||
int x, y, pos;
|
||||
char c;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
if (RM_SCREEN_INFO.orig_video_mode == 0 && lines == 0 && cols == 0)
|
||||
#ifndef CONFIG_X86_VERBOSE_BOOTUP
|
||||
if (!error)
|
||||
return;
|
||||
#endif
|
||||
|
||||
x = RM_SCREEN_INFO.orig_x;
|
||||
y = RM_SCREEN_INFO.orig_y;
|
||||
#ifdef CONFIG_X86_32
|
||||
if (real_mode->screen_info.orig_video_mode == 0 &&
|
||||
lines == 0 && cols == 0)
|
||||
return;
|
||||
#endif
|
||||
|
||||
x = real_mode->screen_info.orig_x;
|
||||
y = real_mode->screen_info.orig_y;
|
||||
|
||||
while ((c = *s++) != '\0') {
|
||||
if (c == '\n') {
|
||||
|
@ -302,8 +301,8 @@ static void putstr(const char *s)
|
|||
}
|
||||
}
|
||||
|
||||
RM_SCREEN_INFO.orig_x = x;
|
||||
RM_SCREEN_INFO.orig_y = y;
|
||||
real_mode->screen_info.orig_x = x;
|
||||
real_mode->screen_info.orig_y = y;
|
||||
|
||||
pos = (x + cols * y) * 2; /* Update cursor position */
|
||||
outb(14, vidport);
|
||||
|
@ -366,9 +365,9 @@ static void flush_window(void)
|
|||
|
||||
static void error(char *x)
|
||||
{
|
||||
putstr("\n\n");
|
||||
putstr(x);
|
||||
putstr("\n\n -- System halted");
|
||||
__putstr(1, "\n\n");
|
||||
__putstr(1, x);
|
||||
__putstr(1, "\n\n -- System halted");
|
||||
|
||||
while (1)
|
||||
asm("hlt");
|
||||
|
@ -395,7 +394,8 @@ static void parse_elf(void *output)
|
|||
return;
|
||||
}
|
||||
|
||||
putstr("Parsing ELF... ");
|
||||
if (!quiet)
|
||||
putstr("Parsing ELF... ");
|
||||
|
||||
phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum);
|
||||
if (!phdrs)
|
||||
|
@ -430,7 +430,10 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
|
|||
{
|
||||
real_mode = rmode;
|
||||
|
||||
if (RM_SCREEN_INFO.orig_video_mode == 7) {
|
||||
if (real_mode->hdr.loadflags & QUIET_FLAG)
|
||||
quiet = 1;
|
||||
|
||||
if (real_mode->screen_info.orig_video_mode == 7) {
|
||||
vidmem = (char *) 0xb0000;
|
||||
vidport = 0x3b4;
|
||||
} else {
|
||||
|
@ -438,8 +441,8 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
|
|||
vidport = 0x3d4;
|
||||
}
|
||||
|
||||
lines = RM_SCREEN_INFO.orig_video_lines;
|
||||
cols = RM_SCREEN_INFO.orig_video_cols;
|
||||
lines = real_mode->screen_info.orig_video_lines;
|
||||
cols = real_mode->screen_info.orig_video_cols;
|
||||
|
||||
window = output; /* Output buffer (Normally at 1M) */
|
||||
free_mem_ptr = heap; /* Heap */
|
||||
|
@ -465,9 +468,11 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
|
|||
#endif
|
||||
|
||||
makecrc();
|
||||
putstr("\nDecompressing Linux... ");
|
||||
if (!quiet)
|
||||
putstr("\nDecompressing Linux... ");
|
||||
gunzip();
|
||||
parse_elf(output);
|
||||
putstr("done.\nBooting the kernel.\n");
|
||||
if (!quiet)
|
||||
putstr("done.\nBooting the kernel.\n");
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -10,16 +10,20 @@
|
|||
#define USE_BSD
|
||||
#include <endian.h>
|
||||
|
||||
#define MAX_SHDRS 100
|
||||
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
|
||||
static Elf32_Ehdr ehdr;
|
||||
static Elf32_Shdr shdr[MAX_SHDRS];
|
||||
static Elf32_Sym *symtab[MAX_SHDRS];
|
||||
static Elf32_Rel *reltab[MAX_SHDRS];
|
||||
static char *strtab[MAX_SHDRS];
|
||||
static unsigned long reloc_count, reloc_idx;
|
||||
static unsigned long *relocs;
|
||||
|
||||
struct section {
|
||||
Elf32_Shdr shdr;
|
||||
struct section *link;
|
||||
Elf32_Sym *symtab;
|
||||
Elf32_Rel *reltab;
|
||||
char *strtab;
|
||||
};
|
||||
static struct section *secs;
|
||||
|
||||
/*
|
||||
* Following symbols have been audited. There values are constant and do
|
||||
* not change if bzImage is loaded at a different physical address than
|
||||
|
@ -35,7 +39,7 @@ static int is_safe_abs_reloc(const char* sym_name)
|
|||
{
|
||||
int i;
|
||||
|
||||
for(i = 0; i < ARRAY_SIZE(safe_abs_relocs); i++) {
|
||||
for (i = 0; i < ARRAY_SIZE(safe_abs_relocs); i++) {
|
||||
if (!strcmp(sym_name, safe_abs_relocs[i]))
|
||||
/* Match found */
|
||||
return 1;
|
||||
|
@ -137,10 +141,10 @@ static const char *sec_name(unsigned shndx)
|
|||
{
|
||||
const char *sec_strtab;
|
||||
const char *name;
|
||||
sec_strtab = strtab[ehdr.e_shstrndx];
|
||||
sec_strtab = secs[ehdr.e_shstrndx].strtab;
|
||||
name = "<noname>";
|
||||
if (shndx < ehdr.e_shnum) {
|
||||
name = sec_strtab + shdr[shndx].sh_name;
|
||||
name = sec_strtab + secs[shndx].shdr.sh_name;
|
||||
}
|
||||
else if (shndx == SHN_ABS) {
|
||||
name = "ABSOLUTE";
|
||||
|
@ -159,7 +163,7 @@ static const char *sym_name(const char *sym_strtab, Elf32_Sym *sym)
|
|||
name = sym_strtab + sym->st_name;
|
||||
}
|
||||
else {
|
||||
name = sec_name(shdr[sym->st_shndx].sh_name);
|
||||
name = sec_name(secs[sym->st_shndx].shdr.sh_name);
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
@ -244,29 +248,34 @@ static void read_ehdr(FILE *fp)
|
|||
static void read_shdrs(FILE *fp)
|
||||
{
|
||||
int i;
|
||||
if (ehdr.e_shnum > MAX_SHDRS) {
|
||||
die("%d section headers supported: %d\n",
|
||||
ehdr.e_shnum, MAX_SHDRS);
|
||||
Elf32_Shdr shdr;
|
||||
|
||||
secs = calloc(ehdr.e_shnum, sizeof(struct section));
|
||||
if (!secs) {
|
||||
die("Unable to allocate %d section headers\n",
|
||||
ehdr.e_shnum);
|
||||
}
|
||||
if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) {
|
||||
die("Seek to %d failed: %s\n",
|
||||
ehdr.e_shoff, strerror(errno));
|
||||
}
|
||||
if (fread(&shdr, sizeof(shdr[0]), ehdr.e_shnum, fp) != ehdr.e_shnum) {
|
||||
die("Cannot read ELF section headers: %s\n",
|
||||
strerror(errno));
|
||||
}
|
||||
for(i = 0; i < ehdr.e_shnum; i++) {
|
||||
shdr[i].sh_name = elf32_to_cpu(shdr[i].sh_name);
|
||||
shdr[i].sh_type = elf32_to_cpu(shdr[i].sh_type);
|
||||
shdr[i].sh_flags = elf32_to_cpu(shdr[i].sh_flags);
|
||||
shdr[i].sh_addr = elf32_to_cpu(shdr[i].sh_addr);
|
||||
shdr[i].sh_offset = elf32_to_cpu(shdr[i].sh_offset);
|
||||
shdr[i].sh_size = elf32_to_cpu(shdr[i].sh_size);
|
||||
shdr[i].sh_link = elf32_to_cpu(shdr[i].sh_link);
|
||||
shdr[i].sh_info = elf32_to_cpu(shdr[i].sh_info);
|
||||
shdr[i].sh_addralign = elf32_to_cpu(shdr[i].sh_addralign);
|
||||
shdr[i].sh_entsize = elf32_to_cpu(shdr[i].sh_entsize);
|
||||
for (i = 0; i < ehdr.e_shnum; i++) {
|
||||
struct section *sec = &secs[i];
|
||||
if (fread(&shdr, sizeof shdr, 1, fp) != 1)
|
||||
die("Cannot read ELF section headers %d/%d: %s\n",
|
||||
i, ehdr.e_shnum, strerror(errno));
|
||||
sec->shdr.sh_name = elf32_to_cpu(shdr.sh_name);
|
||||
sec->shdr.sh_type = elf32_to_cpu(shdr.sh_type);
|
||||
sec->shdr.sh_flags = elf32_to_cpu(shdr.sh_flags);
|
||||
sec->shdr.sh_addr = elf32_to_cpu(shdr.sh_addr);
|
||||
sec->shdr.sh_offset = elf32_to_cpu(shdr.sh_offset);
|
||||
sec->shdr.sh_size = elf32_to_cpu(shdr.sh_size);
|
||||
sec->shdr.sh_link = elf32_to_cpu(shdr.sh_link);
|
||||
sec->shdr.sh_info = elf32_to_cpu(shdr.sh_info);
|
||||
sec->shdr.sh_addralign = elf32_to_cpu(shdr.sh_addralign);
|
||||
sec->shdr.sh_entsize = elf32_to_cpu(shdr.sh_entsize);
|
||||
if (sec->shdr.sh_link < ehdr.e_shnum)
|
||||
sec->link = &secs[sec->shdr.sh_link];
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -274,20 +283,22 @@ static void read_shdrs(FILE *fp)
|
|||
static void read_strtabs(FILE *fp)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < ehdr.e_shnum; i++) {
|
||||
if (shdr[i].sh_type != SHT_STRTAB) {
|
||||
for (i = 0; i < ehdr.e_shnum; i++) {
|
||||
struct section *sec = &secs[i];
|
||||
if (sec->shdr.sh_type != SHT_STRTAB) {
|
||||
continue;
|
||||
}
|
||||
strtab[i] = malloc(shdr[i].sh_size);
|
||||
if (!strtab[i]) {
|
||||
sec->strtab = malloc(sec->shdr.sh_size);
|
||||
if (!sec->strtab) {
|
||||
die("malloc of %d bytes for strtab failed\n",
|
||||
shdr[i].sh_size);
|
||||
sec->shdr.sh_size);
|
||||
}
|
||||
if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) {
|
||||
if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) {
|
||||
die("Seek to %d failed: %s\n",
|
||||
shdr[i].sh_offset, strerror(errno));
|
||||
sec->shdr.sh_offset, strerror(errno));
|
||||
}
|
||||
if (fread(strtab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) {
|
||||
if (fread(sec->strtab, 1, sec->shdr.sh_size, fp)
|
||||
!= sec->shdr.sh_size) {
|
||||
die("Cannot read symbol table: %s\n",
|
||||
strerror(errno));
|
||||
}
|
||||
|
@ -297,28 +308,31 @@ static void read_strtabs(FILE *fp)
|
|||
static void read_symtabs(FILE *fp)
|
||||
{
|
||||
int i,j;
|
||||
for(i = 0; i < ehdr.e_shnum; i++) {
|
||||
if (shdr[i].sh_type != SHT_SYMTAB) {
|
||||
for (i = 0; i < ehdr.e_shnum; i++) {
|
||||
struct section *sec = &secs[i];
|
||||
if (sec->shdr.sh_type != SHT_SYMTAB) {
|
||||
continue;
|
||||
}
|
||||
symtab[i] = malloc(shdr[i].sh_size);
|
||||
if (!symtab[i]) {
|
||||
sec->symtab = malloc(sec->shdr.sh_size);
|
||||
if (!sec->symtab) {
|
||||
die("malloc of %d bytes for symtab failed\n",
|
||||
shdr[i].sh_size);
|
||||
sec->shdr.sh_size);
|
||||
}
|
||||
if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) {
|
||||
if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) {
|
||||
die("Seek to %d failed: %s\n",
|
||||
shdr[i].sh_offset, strerror(errno));
|
||||
sec->shdr.sh_offset, strerror(errno));
|
||||
}
|
||||
if (fread(symtab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) {
|
||||
if (fread(sec->symtab, 1, sec->shdr.sh_size, fp)
|
||||
!= sec->shdr.sh_size) {
|
||||
die("Cannot read symbol table: %s\n",
|
||||
strerror(errno));
|
||||
}
|
||||
for(j = 0; j < shdr[i].sh_size/sizeof(symtab[i][0]); j++) {
|
||||
symtab[i][j].st_name = elf32_to_cpu(symtab[i][j].st_name);
|
||||
symtab[i][j].st_value = elf32_to_cpu(symtab[i][j].st_value);
|
||||
symtab[i][j].st_size = elf32_to_cpu(symtab[i][j].st_size);
|
||||
symtab[i][j].st_shndx = elf16_to_cpu(symtab[i][j].st_shndx);
|
||||
for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Sym); j++) {
|
||||
Elf32_Sym *sym = &sec->symtab[j];
|
||||
sym->st_name = elf32_to_cpu(sym->st_name);
|
||||
sym->st_value = elf32_to_cpu(sym->st_value);
|
||||
sym->st_size = elf32_to_cpu(sym->st_size);
|
||||
sym->st_shndx = elf16_to_cpu(sym->st_shndx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -327,26 +341,29 @@ static void read_symtabs(FILE *fp)
|
|||
static void read_relocs(FILE *fp)
|
||||
{
|
||||
int i,j;
|
||||
for(i = 0; i < ehdr.e_shnum; i++) {
|
||||
if (shdr[i].sh_type != SHT_REL) {
|
||||
for (i = 0; i < ehdr.e_shnum; i++) {
|
||||
struct section *sec = &secs[i];
|
||||
if (sec->shdr.sh_type != SHT_REL) {
|
||||
continue;
|
||||
}
|
||||
reltab[i] = malloc(shdr[i].sh_size);
|
||||
if (!reltab[i]) {
|
||||
sec->reltab = malloc(sec->shdr.sh_size);
|
||||
if (!sec->reltab) {
|
||||
die("malloc of %d bytes for relocs failed\n",
|
||||
shdr[i].sh_size);
|
||||
sec->shdr.sh_size);
|
||||
}
|
||||
if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) {
|
||||
if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) {
|
||||
die("Seek to %d failed: %s\n",
|
||||
shdr[i].sh_offset, strerror(errno));
|
||||
sec->shdr.sh_offset, strerror(errno));
|
||||
}
|
||||
if (fread(reltab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) {
|
||||
if (fread(sec->reltab, 1, sec->shdr.sh_size, fp)
|
||||
!= sec->shdr.sh_size) {
|
||||
die("Cannot read symbol table: %s\n",
|
||||
strerror(errno));
|
||||
}
|
||||
for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) {
|
||||
reltab[i][j].r_offset = elf32_to_cpu(reltab[i][j].r_offset);
|
||||
reltab[i][j].r_info = elf32_to_cpu(reltab[i][j].r_info);
|
||||
for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) {
|
||||
Elf32_Rel *rel = &sec->reltab[j];
|
||||
rel->r_offset = elf32_to_cpu(rel->r_offset);
|
||||
rel->r_info = elf32_to_cpu(rel->r_info);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -357,19 +374,21 @@ static void print_absolute_symbols(void)
|
|||
int i;
|
||||
printf("Absolute symbols\n");
|
||||
printf(" Num: Value Size Type Bind Visibility Name\n");
|
||||
for(i = 0; i < ehdr.e_shnum; i++) {
|
||||
for (i = 0; i < ehdr.e_shnum; i++) {
|
||||
struct section *sec = &secs[i];
|
||||
char *sym_strtab;
|
||||
Elf32_Sym *sh_symtab;
|
||||
int j;
|
||||
if (shdr[i].sh_type != SHT_SYMTAB) {
|
||||
|
||||
if (sec->shdr.sh_type != SHT_SYMTAB) {
|
||||
continue;
|
||||
}
|
||||
sh_symtab = symtab[i];
|
||||
sym_strtab = strtab[shdr[i].sh_link];
|
||||
for(j = 0; j < shdr[i].sh_size/sizeof(symtab[0][0]); j++) {
|
||||
sh_symtab = sec->symtab;
|
||||
sym_strtab = sec->link->strtab;
|
||||
for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Sym); j++) {
|
||||
Elf32_Sym *sym;
|
||||
const char *name;
|
||||
sym = &symtab[i][j];
|
||||
sym = &sec->symtab[j];
|
||||
name = sym_name(sym_strtab, sym);
|
||||
if (sym->st_shndx != SHN_ABS) {
|
||||
continue;
|
||||
|
@ -389,26 +408,27 @@ static void print_absolute_relocs(void)
|
|||
{
|
||||
int i, printed = 0;
|
||||
|
||||
for(i = 0; i < ehdr.e_shnum; i++) {
|
||||
for (i = 0; i < ehdr.e_shnum; i++) {
|
||||
struct section *sec = &secs[i];
|
||||
struct section *sec_applies, *sec_symtab;
|
||||
char *sym_strtab;
|
||||
Elf32_Sym *sh_symtab;
|
||||
unsigned sec_applies, sec_symtab;
|
||||
int j;
|
||||
if (shdr[i].sh_type != SHT_REL) {
|
||||
if (sec->shdr.sh_type != SHT_REL) {
|
||||
continue;
|
||||
}
|
||||
sec_symtab = shdr[i].sh_link;
|
||||
sec_applies = shdr[i].sh_info;
|
||||
if (!(shdr[sec_applies].sh_flags & SHF_ALLOC)) {
|
||||
sec_symtab = sec->link;
|
||||
sec_applies = &secs[sec->shdr.sh_info];
|
||||
if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) {
|
||||
continue;
|
||||
}
|
||||
sh_symtab = symtab[sec_symtab];
|
||||
sym_strtab = strtab[shdr[sec_symtab].sh_link];
|
||||
for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) {
|
||||
sh_symtab = sec_symtab->symtab;
|
||||
sym_strtab = sec_symtab->link->strtab;
|
||||
for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) {
|
||||
Elf32_Rel *rel;
|
||||
Elf32_Sym *sym;
|
||||
const char *name;
|
||||
rel = &reltab[i][j];
|
||||
rel = &sec->reltab[j];
|
||||
sym = &sh_symtab[ELF32_R_SYM(rel->r_info)];
|
||||
name = sym_name(sym_strtab, sym);
|
||||
if (sym->st_shndx != SHN_ABS) {
|
||||
|
@ -456,26 +476,28 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym))
|
|||
{
|
||||
int i;
|
||||
/* Walk through the relocations */
|
||||
for(i = 0; i < ehdr.e_shnum; i++) {
|
||||
for (i = 0; i < ehdr.e_shnum; i++) {
|
||||
char *sym_strtab;
|
||||
Elf32_Sym *sh_symtab;
|
||||
unsigned sec_applies, sec_symtab;
|
||||
struct section *sec_applies, *sec_symtab;
|
||||
int j;
|
||||
if (shdr[i].sh_type != SHT_REL) {
|
||||
struct section *sec = &secs[i];
|
||||
|
||||
if (sec->shdr.sh_type != SHT_REL) {
|
||||
continue;
|
||||
}
|
||||
sec_symtab = shdr[i].sh_link;
|
||||
sec_applies = shdr[i].sh_info;
|
||||
if (!(shdr[sec_applies].sh_flags & SHF_ALLOC)) {
|
||||
sec_symtab = sec->link;
|
||||
sec_applies = &secs[sec->shdr.sh_info];
|
||||
if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) {
|
||||
continue;
|
||||
}
|
||||
sh_symtab = symtab[sec_symtab];
|
||||
sym_strtab = strtab[shdr[sec_symtab].sh_link];
|
||||
for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) {
|
||||
sh_symtab = sec_symtab->symtab;
|
||||
sym_strtab = sec->link->strtab;
|
||||
for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) {
|
||||
Elf32_Rel *rel;
|
||||
Elf32_Sym *sym;
|
||||
unsigned r_type;
|
||||
rel = &reltab[i][j];
|
||||
rel = &sec->reltab[j];
|
||||
sym = &sh_symtab[ELF32_R_SYM(rel->r_info)];
|
||||
r_type = ELF32_R_TYPE(rel->r_info);
|
||||
/* Don't visit relocations to absolute symbols */
|
||||
|
@ -539,7 +561,7 @@ static void emit_relocs(int as_text)
|
|||
*/
|
||||
printf(".section \".data.reloc\",\"a\"\n");
|
||||
printf(".balign 4\n");
|
||||
for(i = 0; i < reloc_count; i++) {
|
||||
for (i = 0; i < reloc_count; i++) {
|
||||
printf("\t .long 0x%08lx\n", relocs[i]);
|
||||
}
|
||||
printf("\n");
|
||||
|
@ -550,7 +572,7 @@ static void emit_relocs(int as_text)
|
|||
/* Print a stop */
|
||||
printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]);
|
||||
/* Now print each relocation */
|
||||
for(i = 0; i < reloc_count; i++) {
|
||||
for (i = 0; i < reloc_count; i++) {
|
||||
buf[0] = (relocs[i] >> 0) & 0xff;
|
||||
buf[1] = (relocs[i] >> 8) & 0xff;
|
||||
buf[2] = (relocs[i] >> 16) & 0xff;
|
||||
|
@ -577,7 +599,7 @@ int main(int argc, char **argv)
|
|||
show_absolute_relocs = 0;
|
||||
as_text = 0;
|
||||
fname = NULL;
|
||||
for(i = 1; i < argc; i++) {
|
||||
for (i = 1; i < argc; i++) {
|
||||
char *arg = argv[i];
|
||||
if (*arg == '-') {
|
||||
if (strcmp(argv[1], "--abs-syms") == 0) {
|
||||
|
|
|
@ -28,6 +28,8 @@ static char *cpu_name(int level)
|
|||
if (level == 64) {
|
||||
return "x86-64";
|
||||
} else {
|
||||
if (level == 15)
|
||||
level = 6;
|
||||
sprintf(buf, "i%d86", level);
|
||||
return buf;
|
||||
}
|
||||
|
|
|
@ -165,6 +165,10 @@ void main(void)
|
|||
/* Set the video mode */
|
||||
set_video();
|
||||
|
||||
/* Parse command line for 'quiet' and pass it to decompressor. */
|
||||
if (cmdline_find_option_bool("quiet"))
|
||||
boot_params.hdr.loadflags |= QUIET_FLAG;
|
||||
|
||||
/* Do the last things and invoke protected mode */
|
||||
go_to_protected_mode();
|
||||
}
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
*/
|
||||
|
||||
#include "boot.h"
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#define SMAP 0x534d4150 /* ASCII "SMAP" */
|
||||
|
||||
|
@ -53,7 +54,7 @@ static int detect_memory_e820(void)
|
|||
|
||||
count++;
|
||||
desc++;
|
||||
} while (next && count < E820MAX);
|
||||
} while (next && count < ARRAY_SIZE(boot_params.e820_map));
|
||||
|
||||
return boot_params.e820_entries = count;
|
||||
}
|
||||
|
|
|
@ -33,6 +33,8 @@ protected_mode_jump:
|
|||
movw %cs, %bx
|
||||
shll $4, %ebx
|
||||
addl %ebx, 2f
|
||||
jmp 1f # Short jump to serialize on 386/486
|
||||
1:
|
||||
|
||||
movw $__BOOT_DS, %cx
|
||||
movw $__BOOT_TSS, %di
|
||||
|
@ -40,8 +42,6 @@ protected_mode_jump:
|
|||
movl %cr0, %edx
|
||||
orb $X86_CR0_PE, %dl # Protected mode
|
||||
movl %edx, %cr0
|
||||
jmp 1f # Short jump to serialize on 386/486
|
||||
1:
|
||||
|
||||
# Transition to 32-bit mode
|
||||
.byte 0x66, 0xea # ljmpl opcode
|
||||
|
|
|
@ -259,8 +259,7 @@ static int vga_probe(void)
|
|||
return mode_count[adapter];
|
||||
}
|
||||
|
||||
__videocard video_vga =
|
||||
{
|
||||
__videocard video_vga = {
|
||||
.card_name = "VGA",
|
||||
.probe = vga_probe,
|
||||
.set_mode = vga_set_mode,
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -61,6 +61,19 @@
|
|||
CFI_UNDEFINED r15
|
||||
.endm
|
||||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
ENTRY(native_usergs_sysret32)
|
||||
swapgs
|
||||
sysretl
|
||||
ENDPROC(native_usergs_sysret32)
|
||||
|
||||
ENTRY(native_irq_enable_sysexit)
|
||||
swapgs
|
||||
sti
|
||||
sysexit
|
||||
ENDPROC(native_irq_enable_sysexit)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* 32bit SYSENTER instruction entry.
|
||||
*
|
||||
|
@ -85,14 +98,14 @@ ENTRY(ia32_sysenter_target)
|
|||
CFI_SIGNAL_FRAME
|
||||
CFI_DEF_CFA rsp,0
|
||||
CFI_REGISTER rsp,rbp
|
||||
swapgs
|
||||
SWAPGS_UNSAFE_STACK
|
||||
movq %gs:pda_kernelstack, %rsp
|
||||
addq $(PDA_STACKOFFSET),%rsp
|
||||
/*
|
||||
* No need to follow this irqs on/off section: the syscall
|
||||
* disabled irqs, here we enable it straight after entry:
|
||||
*/
|
||||
sti
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
movl %ebp,%ebp /* zero extension */
|
||||
pushq $__USER32_DS
|
||||
CFI_ADJUST_CFA_OFFSET 8
|
||||
|
@ -103,7 +116,7 @@ ENTRY(ia32_sysenter_target)
|
|||
pushfq
|
||||
CFI_ADJUST_CFA_OFFSET 8
|
||||
/*CFI_REL_OFFSET rflags,0*/
|
||||
movl 8*3-THREAD_SIZE+threadinfo_sysenter_return(%rsp), %r10d
|
||||
movl 8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d
|
||||
CFI_REGISTER rip,r10
|
||||
pushq $__USER32_CS
|
||||
CFI_ADJUST_CFA_OFFSET 8
|
||||
|
@ -123,8 +136,9 @@ ENTRY(ia32_sysenter_target)
|
|||
.quad 1b,ia32_badarg
|
||||
.previous
|
||||
GET_THREAD_INFO(%r10)
|
||||
orl $TS_COMPAT,threadinfo_status(%r10)
|
||||
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
|
||||
orl $TS_COMPAT,TI_status(%r10)
|
||||
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
|
||||
TI_flags(%r10)
|
||||
CFI_REMEMBER_STATE
|
||||
jnz sysenter_tracesys
|
||||
sysenter_do_call:
|
||||
|
@ -134,11 +148,11 @@ sysenter_do_call:
|
|||
call *ia32_sys_call_table(,%rax,8)
|
||||
movq %rax,RAX-ARGOFFSET(%rsp)
|
||||
GET_THREAD_INFO(%r10)
|
||||
cli
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
TRACE_IRQS_OFF
|
||||
testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
|
||||
testl $_TIF_ALLWORK_MASK,TI_flags(%r10)
|
||||
jnz int_ret_from_sys_call
|
||||
andl $~TS_COMPAT,threadinfo_status(%r10)
|
||||
andl $~TS_COMPAT,TI_status(%r10)
|
||||
/* clear IF, that popfq doesn't enable interrupts early */
|
||||
andl $~0x200,EFLAGS-R11(%rsp)
|
||||
movl RIP-R11(%rsp),%edx /* User %eip */
|
||||
|
@ -151,10 +165,7 @@ sysenter_do_call:
|
|||
CFI_ADJUST_CFA_OFFSET -8
|
||||
CFI_REGISTER rsp,rcx
|
||||
TRACE_IRQS_ON
|
||||
swapgs
|
||||
sti /* sti only takes effect after the next instruction */
|
||||
/* sysexit */
|
||||
.byte 0xf, 0x35
|
||||
ENABLE_INTERRUPTS_SYSEXIT32
|
||||
|
||||
sysenter_tracesys:
|
||||
CFI_RESTORE_STATE
|
||||
|
@ -200,7 +211,7 @@ ENTRY(ia32_cstar_target)
|
|||
CFI_DEF_CFA rsp,PDA_STACKOFFSET
|
||||
CFI_REGISTER rip,rcx
|
||||
/*CFI_REGISTER rflags,r11*/
|
||||
swapgs
|
||||
SWAPGS_UNSAFE_STACK
|
||||
movl %esp,%r8d
|
||||
CFI_REGISTER rsp,r8
|
||||
movq %gs:pda_kernelstack,%rsp
|
||||
|
@ -208,7 +219,7 @@ ENTRY(ia32_cstar_target)
|
|||
* No need to follow this irqs on/off section: the syscall
|
||||
* disabled irqs and here we enable it straight after entry:
|
||||
*/
|
||||
sti
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
SAVE_ARGS 8,1,1
|
||||
movl %eax,%eax /* zero extension */
|
||||
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
|
||||
|
@ -230,8 +241,9 @@ ENTRY(ia32_cstar_target)
|
|||
.quad 1b,ia32_badarg
|
||||
.previous
|
||||
GET_THREAD_INFO(%r10)
|
||||
orl $TS_COMPAT,threadinfo_status(%r10)
|
||||
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
|
||||
orl $TS_COMPAT,TI_status(%r10)
|
||||
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
|
||||
TI_flags(%r10)
|
||||
CFI_REMEMBER_STATE
|
||||
jnz cstar_tracesys
|
||||
cstar_do_call:
|
||||
|
@ -241,11 +253,11 @@ cstar_do_call:
|
|||
call *ia32_sys_call_table(,%rax,8)
|
||||
movq %rax,RAX-ARGOFFSET(%rsp)
|
||||
GET_THREAD_INFO(%r10)
|
||||
cli
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
TRACE_IRQS_OFF
|
||||
testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
|
||||
testl $_TIF_ALLWORK_MASK,TI_flags(%r10)
|
||||
jnz int_ret_from_sys_call
|
||||
andl $~TS_COMPAT,threadinfo_status(%r10)
|
||||
andl $~TS_COMPAT,TI_status(%r10)
|
||||
RESTORE_ARGS 1,-ARG_SKIP,1,1,1
|
||||
movl RIP-ARGOFFSET(%rsp),%ecx
|
||||
CFI_REGISTER rip,rcx
|
||||
|
@ -254,8 +266,7 @@ cstar_do_call:
|
|||
TRACE_IRQS_ON
|
||||
movl RSP-ARGOFFSET(%rsp),%esp
|
||||
CFI_RESTORE rsp
|
||||
swapgs
|
||||
sysretl
|
||||
USERGS_SYSRET32
|
||||
|
||||
cstar_tracesys:
|
||||
CFI_RESTORE_STATE
|
||||
|
@ -310,12 +321,12 @@ ENTRY(ia32_syscall)
|
|||
/*CFI_REL_OFFSET rflags,EFLAGS-RIP*/
|
||||
/*CFI_REL_OFFSET cs,CS-RIP*/
|
||||
CFI_REL_OFFSET rip,RIP-RIP
|
||||
swapgs
|
||||
SWAPGS
|
||||
/*
|
||||
* No need to follow this irqs on/off section: the syscall
|
||||
* disabled irqs and here we enable it straight after entry:
|
||||
*/
|
||||
sti
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
movl %eax,%eax
|
||||
pushq %rax
|
||||
CFI_ADJUST_CFA_OFFSET 8
|
||||
|
@ -324,8 +335,9 @@ ENTRY(ia32_syscall)
|
|||
this could be a problem. */
|
||||
SAVE_ARGS 0,0,1
|
||||
GET_THREAD_INFO(%r10)
|
||||
orl $TS_COMPAT,threadinfo_status(%r10)
|
||||
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
|
||||
orl $TS_COMPAT,TI_status(%r10)
|
||||
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
|
||||
TI_flags(%r10)
|
||||
jnz ia32_tracesys
|
||||
ia32_do_syscall:
|
||||
cmpl $(IA32_NR_syscalls-1),%eax
|
||||
|
@ -370,13 +382,11 @@ quiet_ni_syscall:
|
|||
PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi
|
||||
PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi
|
||||
PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx
|
||||
PTREGSCALL stub32_sigsuspend, sys32_sigsuspend, %rcx
|
||||
PTREGSCALL stub32_execve, sys32_execve, %rcx
|
||||
PTREGSCALL stub32_fork, sys_fork, %rdi
|
||||
PTREGSCALL stub32_clone, sys32_clone, %rdx
|
||||
PTREGSCALL stub32_vfork, sys_vfork, %rdi
|
||||
PTREGSCALL stub32_iopl, sys_iopl, %rsi
|
||||
PTREGSCALL stub32_rt_sigsuspend, sys_rt_sigsuspend, %rdx
|
||||
|
||||
ENTRY(ia32_ptregs_common)
|
||||
popq %r11
|
||||
|
@ -476,7 +486,7 @@ ia32_sys_call_table:
|
|||
.quad sys_ssetmask
|
||||
.quad sys_setreuid16 /* 70 */
|
||||
.quad sys_setregid16
|
||||
.quad stub32_sigsuspend
|
||||
.quad sys32_sigsuspend
|
||||
.quad compat_sys_sigpending
|
||||
.quad sys_sethostname
|
||||
.quad compat_sys_setrlimit /* 75 */
|
||||
|
@ -583,7 +593,7 @@ ia32_sys_call_table:
|
|||
.quad sys32_rt_sigpending
|
||||
.quad compat_sys_rt_sigtimedwait
|
||||
.quad sys32_rt_sigqueueinfo
|
||||
.quad stub32_rt_sigsuspend
|
||||
.quad sys_rt_sigsuspend
|
||||
.quad sys32_pread /* 180 */
|
||||
.quad sys32_pwrite
|
||||
.quad sys_chown16
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# Makefile for the linux kernel.
|
||||
#
|
||||
|
||||
extra-y := head_$(BITS).o head$(BITS).o init_task.o vmlinux.lds
|
||||
extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinux.lds
|
||||
|
||||
CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
|
||||
|
||||
|
@ -13,20 +13,21 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
|
|||
nostackp := $(call cc-option, -fno-stack-protector)
|
||||
CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
|
||||
CFLAGS_hpet.o := $(nostackp)
|
||||
CFLAGS_tsc_64.o := $(nostackp)
|
||||
CFLAGS_tsc.o := $(nostackp)
|
||||
|
||||
obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
|
||||
obj-y += traps_$(BITS).o irq_$(BITS).o
|
||||
obj-y += time_$(BITS).o ioport.o ldt.o
|
||||
obj-y += setup_$(BITS).o i8259_$(BITS).o setup.o
|
||||
obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
|
||||
obj-$(CONFIG_X86_VISWS) += visws_quirks.o
|
||||
obj-$(CONFIG_X86_32) += probe_roms_32.o
|
||||
obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
|
||||
obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
|
||||
obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o
|
||||
obj-y += bootflag.o e820_$(BITS).o
|
||||
obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
|
||||
obj-y += bootflag.o e820.o
|
||||
obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
|
||||
obj-y += alternative.o i8253.o pci-nommu.o
|
||||
obj-$(CONFIG_X86_64) += bugs_64.o
|
||||
obj-y += tsc_$(BITS).o io_delay.o rtc.o
|
||||
obj-y += tsc.o io_delay.o rtc.o
|
||||
|
||||
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
|
||||
obj-y += process.o
|
||||
|
@ -53,7 +54,7 @@ obj-$(CONFIG_X86_32_SMP) += smpcommon.o
|
|||
obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o
|
||||
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
|
||||
obj-$(CONFIG_X86_MPPARSE) += mpparse.o
|
||||
obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi_$(BITS).o
|
||||
obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi.o
|
||||
obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o
|
||||
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
|
||||
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
|
||||
|
@ -64,7 +65,6 @@ obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o
|
|||
obj-y += vsmp_64.o
|
||||
obj-$(CONFIG_KPROBES) += kprobes.o
|
||||
obj-$(CONFIG_MODULES) += module_$(BITS).o
|
||||
obj-$(CONFIG_ACPI_SRAT) += srat_32.o
|
||||
obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o
|
||||
obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
|
||||
obj-$(CONFIG_KGDB) += kgdb.o
|
||||
|
@ -94,12 +94,13 @@ obj-$(CONFIG_OLPC) += olpc.o
|
|||
###
|
||||
# 64 bit specific files
|
||||
ifeq ($(CONFIG_X86_64),y)
|
||||
obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o
|
||||
obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
|
||||
obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
|
||||
obj-$(CONFIG_AUDIT) += audit_64.o
|
||||
|
||||
obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
|
||||
obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
|
||||
obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o
|
||||
obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o
|
||||
|
||||
obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
#include <asm/pgtable.h>
|
||||
#include <asm/io_apic.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/genapic.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/mpspec.h>
|
||||
#include <asm/smp.h>
|
||||
|
@ -106,21 +107,6 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
|
|||
*/
|
||||
enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
/* rely on all ACPI tables being in the direct mapping */
|
||||
char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size)
|
||||
{
|
||||
if (!phys_addr || !size)
|
||||
return NULL;
|
||||
|
||||
if (phys_addr+size <= (max_pfn_mapped << PAGE_SHIFT) + PAGE_SIZE)
|
||||
return __va(phys_addr);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/*
|
||||
* Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
|
||||
|
@ -139,11 +125,15 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
|
|||
unsigned long base, offset, mapped_size;
|
||||
int idx;
|
||||
|
||||
if (phys + size < 8 * 1024 * 1024)
|
||||
if (!phys || !size)
|
||||
return NULL;
|
||||
|
||||
if (phys+size <= (max_low_pfn_mapped << PAGE_SHIFT))
|
||||
return __va(phys);
|
||||
|
||||
offset = phys & (PAGE_SIZE - 1);
|
||||
mapped_size = PAGE_SIZE - offset;
|
||||
clear_fixmap(FIX_ACPI_END);
|
||||
set_fixmap(FIX_ACPI_END, phys);
|
||||
base = fix_to_virt(FIX_ACPI_END);
|
||||
|
||||
|
@ -155,13 +145,13 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
|
|||
if (--idx < FIX_ACPI_BEGIN)
|
||||
return NULL; /* cannot handle this */
|
||||
phys += PAGE_SIZE;
|
||||
clear_fixmap(idx);
|
||||
set_fixmap(idx, phys);
|
||||
mapped_size += PAGE_SIZE;
|
||||
}
|
||||
|
||||
return ((unsigned char *)base + offset);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PCI_MMCONFIG
|
||||
/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */
|
||||
|
@ -338,8 +328,6 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
|
|||
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
|
||||
struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS];
|
||||
|
||||
static int __init
|
||||
acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
|
||||
{
|
||||
|
@ -514,8 +502,6 @@ int acpi_register_gsi(u32 gsi, int triggering, int polarity)
|
|||
* Make sure all (legacy) PCI IRQs are set as level-triggered.
|
||||
*/
|
||||
if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
|
||||
extern void eisa_set_level_irq(unsigned int irq);
|
||||
|
||||
if (triggering == ACPI_LEVEL_SENSITIVE)
|
||||
eisa_set_level_irq(gsi);
|
||||
}
|
||||
|
@ -860,6 +846,364 @@ static int __init acpi_parse_madt_lapic_entries(void)
|
|||
#endif /* CONFIG_X86_LOCAL_APIC */
|
||||
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
#define MP_ISA_BUS 0
|
||||
|
||||
#ifdef CONFIG_X86_ES7000
|
||||
extern int es7000_plat;
|
||||
#endif
|
||||
|
||||
static struct {
|
||||
int apic_id;
|
||||
int gsi_base;
|
||||
int gsi_end;
|
||||
DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
|
||||
} mp_ioapic_routing[MAX_IO_APICS];
|
||||
|
||||
static int mp_find_ioapic(int gsi)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
/* Find the IOAPIC that manages this GSI. */
|
||||
for (i = 0; i < nr_ioapics; i++) {
|
||||
if ((gsi >= mp_ioapic_routing[i].gsi_base)
|
||||
&& (gsi <= mp_ioapic_routing[i].gsi_end))
|
||||
return i;
|
||||
}
|
||||
|
||||
printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static u8 __init uniq_ioapic_id(u8 id)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
|
||||
!APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
|
||||
return io_apic_get_unique_id(nr_ioapics, id);
|
||||
else
|
||||
return id;
|
||||
#else
|
||||
int i;
|
||||
DECLARE_BITMAP(used, 256);
|
||||
bitmap_zero(used, 256);
|
||||
for (i = 0; i < nr_ioapics; i++) {
|
||||
struct mp_config_ioapic *ia = &mp_ioapics[i];
|
||||
__set_bit(ia->mp_apicid, used);
|
||||
}
|
||||
if (!test_bit(id, used))
|
||||
return id;
|
||||
return find_first_zero_bit(used, 256);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int bad_ioapic(unsigned long address)
|
||||
{
|
||||
if (nr_ioapics >= MAX_IO_APICS) {
|
||||
printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
|
||||
"(found %d)\n", MAX_IO_APICS, nr_ioapics);
|
||||
panic("Recompile kernel with bigger MAX_IO_APICS!\n");
|
||||
}
|
||||
if (!address) {
|
||||
printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
|
||||
" found in table, skipping!\n");
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
|
||||
{
|
||||
int idx = 0;
|
||||
|
||||
if (bad_ioapic(address))
|
||||
return;
|
||||
|
||||
idx = nr_ioapics;
|
||||
|
||||
mp_ioapics[idx].mp_type = MP_IOAPIC;
|
||||
mp_ioapics[idx].mp_flags = MPC_APIC_USABLE;
|
||||
mp_ioapics[idx].mp_apicaddr = address;
|
||||
|
||||
set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
|
||||
mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id);
|
||||
#ifdef CONFIG_X86_32
|
||||
mp_ioapics[idx].mp_apicver = io_apic_get_version(idx);
|
||||
#else
|
||||
mp_ioapics[idx].mp_apicver = 0;
|
||||
#endif
|
||||
/*
|
||||
* Build basic GSI lookup table to facilitate gsi->io_apic lookups
|
||||
* and to prevent reprogramming of IOAPIC pins (PCI GSIs).
|
||||
*/
|
||||
mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid;
|
||||
mp_ioapic_routing[idx].gsi_base = gsi_base;
|
||||
mp_ioapic_routing[idx].gsi_end = gsi_base +
|
||||
io_apic_get_redir_entries(idx);
|
||||
|
||||
printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, "
|
||||
"GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid,
|
||||
mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr,
|
||||
mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
|
||||
|
||||
nr_ioapics++;
|
||||
}
|
||||
|
||||
static void assign_to_mp_irq(struct mp_config_intsrc *m,
|
||||
struct mp_config_intsrc *mp_irq)
|
||||
{
|
||||
memcpy(mp_irq, m, sizeof(struct mp_config_intsrc));
|
||||
}
|
||||
|
||||
static int mp_irq_cmp(struct mp_config_intsrc *mp_irq,
|
||||
struct mp_config_intsrc *m)
|
||||
{
|
||||
return memcmp(mp_irq, m, sizeof(struct mp_config_intsrc));
|
||||
}
|
||||
|
||||
static void save_mp_irq(struct mp_config_intsrc *m)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < mp_irq_entries; i++) {
|
||||
if (!mp_irq_cmp(&mp_irqs[i], m))
|
||||
return;
|
||||
}
|
||||
|
||||
assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
|
||||
if (++mp_irq_entries == MAX_IRQ_SOURCES)
|
||||
panic("Max # of irq sources exceeded!!\n");
|
||||
}
|
||||
|
||||
void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
|
||||
{
|
||||
int ioapic;
|
||||
int pin;
|
||||
struct mp_config_intsrc mp_irq;
|
||||
|
||||
/*
|
||||
* Convert 'gsi' to 'ioapic.pin'.
|
||||
*/
|
||||
ioapic = mp_find_ioapic(gsi);
|
||||
if (ioapic < 0)
|
||||
return;
|
||||
pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
|
||||
|
||||
/*
|
||||
* TBD: This check is for faulty timer entries, where the override
|
||||
* erroneously sets the trigger to level, resulting in a HUGE
|
||||
* increase of timer interrupts!
|
||||
*/
|
||||
if ((bus_irq == 0) && (trigger == 3))
|
||||
trigger = 1;
|
||||
|
||||
mp_irq.mp_type = MP_INTSRC;
|
||||
mp_irq.mp_irqtype = mp_INT;
|
||||
mp_irq.mp_irqflag = (trigger << 2) | polarity;
|
||||
mp_irq.mp_srcbus = MP_ISA_BUS;
|
||||
mp_irq.mp_srcbusirq = bus_irq; /* IRQ */
|
||||
mp_irq.mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */
|
||||
mp_irq.mp_dstirq = pin; /* INTIN# */
|
||||
|
||||
save_mp_irq(&mp_irq);
|
||||
}
|
||||
|
||||
void __init mp_config_acpi_legacy_irqs(void)
|
||||
{
|
||||
int i;
|
||||
int ioapic;
|
||||
unsigned int dstapic;
|
||||
struct mp_config_intsrc mp_irq;
|
||||
|
||||
#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
|
||||
/*
|
||||
* Fabricate the legacy ISA bus (bus #31).
|
||||
*/
|
||||
mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
|
||||
#endif
|
||||
set_bit(MP_ISA_BUS, mp_bus_not_pci);
|
||||
Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
|
||||
|
||||
#ifdef CONFIG_X86_ES7000
|
||||
/*
|
||||
* Older generations of ES7000 have no legacy identity mappings
|
||||
*/
|
||||
if (es7000_plat == 1)
|
||||
return;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Locate the IOAPIC that manages the ISA IRQs (0-15).
|
||||
*/
|
||||
ioapic = mp_find_ioapic(0);
|
||||
if (ioapic < 0)
|
||||
return;
|
||||
dstapic = mp_ioapics[ioapic].mp_apicid;
|
||||
|
||||
/*
|
||||
* Use the default configuration for the IRQs 0-15. Unless
|
||||
* overridden by (MADT) interrupt source override entries.
|
||||
*/
|
||||
for (i = 0; i < 16; i++) {
|
||||
int idx;
|
||||
|
||||
for (idx = 0; idx < mp_irq_entries; idx++) {
|
||||
struct mp_config_intsrc *irq = mp_irqs + idx;
|
||||
|
||||
/* Do we already have a mapping for this ISA IRQ? */
|
||||
if (irq->mp_srcbus == MP_ISA_BUS
|
||||
&& irq->mp_srcbusirq == i)
|
||||
break;
|
||||
|
||||
/* Do we already have a mapping for this IOAPIC pin */
|
||||
if (irq->mp_dstapic == dstapic &&
|
||||
irq->mp_dstirq == i)
|
||||
break;
|
||||
}
|
||||
|
||||
if (idx != mp_irq_entries) {
|
||||
printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
|
||||
continue; /* IRQ already used */
|
||||
}
|
||||
|
||||
mp_irq.mp_type = MP_INTSRC;
|
||||
mp_irq.mp_irqflag = 0; /* Conforming */
|
||||
mp_irq.mp_srcbus = MP_ISA_BUS;
|
||||
mp_irq.mp_dstapic = dstapic;
|
||||
mp_irq.mp_irqtype = mp_INT;
|
||||
mp_irq.mp_srcbusirq = i; /* Identity mapped */
|
||||
mp_irq.mp_dstirq = i;
|
||||
|
||||
save_mp_irq(&mp_irq);
|
||||
}
|
||||
}
|
||||
|
||||
int mp_register_gsi(u32 gsi, int triggering, int polarity)
|
||||
{
|
||||
int ioapic;
|
||||
int ioapic_pin;
|
||||
#ifdef CONFIG_X86_32
|
||||
#define MAX_GSI_NUM 4096
|
||||
#define IRQ_COMPRESSION_START 64
|
||||
|
||||
static int pci_irq = IRQ_COMPRESSION_START;
|
||||
/*
|
||||
* Mapping between Global System Interrupts, which
|
||||
* represent all possible interrupts, and IRQs
|
||||
* assigned to actual devices.
|
||||
*/
|
||||
static int gsi_to_irq[MAX_GSI_NUM];
|
||||
#else
|
||||
|
||||
if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
|
||||
return gsi;
|
||||
#endif
|
||||
|
||||
/* Don't set up the ACPI SCI because it's already set up */
|
||||
if (acpi_gbl_FADT.sci_interrupt == gsi)
|
||||
return gsi;
|
||||
|
||||
ioapic = mp_find_ioapic(gsi);
|
||||
if (ioapic < 0) {
|
||||
printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
|
||||
return gsi;
|
||||
}
|
||||
|
||||
ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
if (ioapic_renumber_irq)
|
||||
gsi = ioapic_renumber_irq(ioapic, gsi);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Avoid pin reprogramming. PRTs typically include entries
|
||||
* with redundant pin->gsi mappings (but unique PCI devices);
|
||||
* we only program the IOAPIC on the first.
|
||||
*/
|
||||
if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
|
||||
printk(KERN_ERR "Invalid reference to IOAPIC pin "
|
||||
"%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
|
||||
ioapic_pin);
|
||||
return gsi;
|
||||
}
|
||||
if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
|
||||
Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
|
||||
mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
|
||||
#ifdef CONFIG_X86_32
|
||||
return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
|
||||
#else
|
||||
return gsi;
|
||||
#endif
|
||||
}
|
||||
|
||||
set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* For GSI >= 64, use IRQ compression
|
||||
*/
|
||||
if ((gsi >= IRQ_COMPRESSION_START)
|
||||
&& (triggering == ACPI_LEVEL_SENSITIVE)) {
|
||||
/*
|
||||
* For PCI devices assign IRQs in order, avoiding gaps
|
||||
* due to unused I/O APIC pins.
|
||||
*/
|
||||
int irq = gsi;
|
||||
if (gsi < MAX_GSI_NUM) {
|
||||
/*
|
||||
* Retain the VIA chipset work-around (gsi > 15), but
|
||||
* avoid a problem where the 8254 timer (IRQ0) is setup
|
||||
* via an override (so it's not on pin 0 of the ioapic),
|
||||
* and at the same time, the pin 0 interrupt is a PCI
|
||||
* type. The gsi > 15 test could cause these two pins
|
||||
* to be shared as IRQ0, and they are not shareable.
|
||||
* So test for this condition, and if necessary, avoid
|
||||
* the pin collision.
|
||||
*/
|
||||
gsi = pci_irq++;
|
||||
/*
|
||||
* Don't assign IRQ used by ACPI SCI
|
||||
*/
|
||||
if (gsi == acpi_gbl_FADT.sci_interrupt)
|
||||
gsi = pci_irq++;
|
||||
gsi_to_irq[irq] = gsi;
|
||||
} else {
|
||||
printk(KERN_ERR "GSI %u is too high\n", gsi);
|
||||
return gsi;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
|
||||
triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
|
||||
polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
|
||||
return gsi;
|
||||
}
|
||||
|
||||
int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
|
||||
u32 gsi, int triggering, int polarity)
|
||||
{
|
||||
#ifdef CONFIG_X86_MPPARSE
|
||||
struct mp_config_intsrc mp_irq;
|
||||
int ioapic;
|
||||
|
||||
if (!acpi_ioapic)
|
||||
return 0;
|
||||
|
||||
/* print the entry should happen on mptable identically */
|
||||
mp_irq.mp_type = MP_INTSRC;
|
||||
mp_irq.mp_irqtype = mp_INT;
|
||||
mp_irq.mp_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
|
||||
(polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
|
||||
mp_irq.mp_srcbus = number;
|
||||
mp_irq.mp_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
|
||||
ioapic = mp_find_ioapic(gsi);
|
||||
mp_irq.mp_dstapic = mp_ioapic_routing[ioapic].apic_id;
|
||||
mp_irq.mp_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base;
|
||||
|
||||
save_mp_irq(&mp_irq);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Parse IOAPIC related entries in MADT
|
||||
* returns 0 on success, < 0 on error
|
||||
|
@ -1009,8 +1353,6 @@ static void __init acpi_process_madt(void)
|
|||
return;
|
||||
}
|
||||
|
||||
#ifdef __i386__
|
||||
|
||||
static int __init disable_acpi_irq(const struct dmi_system_id *d)
|
||||
{
|
||||
if (!acpi_force) {
|
||||
|
@ -1060,6 +1402,16 @@ static int __init force_acpi_ht(const struct dmi_system_id *d)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Force ignoring BIOS IRQ0 pin2 override
|
||||
*/
|
||||
static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
|
||||
{
|
||||
pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", d->ident);
|
||||
acpi_skip_timer_override = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If your system is blacklisted here, but you find that acpi=force
|
||||
* works for you, please contact acpi-devel@sourceforge.net
|
||||
|
@ -1227,11 +1579,35 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
|
|||
DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
|
||||
},
|
||||
},
|
||||
/*
|
||||
* HP laptops which use a DSDT reporting as HP/SB400/10000,
|
||||
* which includes some code which overrides all temperature
|
||||
* trip points to 16C if the INTIN2 input of the I/O APIC
|
||||
* is enabled. This input is incorrectly designated the
|
||||
* ISA IRQ 0 via an interrupt source override even though
|
||||
* it is wired to the output of the master 8259A and INTIN0
|
||||
* is not connected at all. Force ignoring BIOS IRQ0 pin2
|
||||
* override in that cases.
|
||||
*/
|
||||
{
|
||||
.callback = dmi_ignore_irq0_timer_override,
|
||||
.ident = "HP NX6125 laptop",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
|
||||
DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6125"),
|
||||
},
|
||||
},
|
||||
{
|
||||
.callback = dmi_ignore_irq0_timer_override,
|
||||
.ident = "HP NX6325 laptop",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
|
||||
DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"),
|
||||
},
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
#endif /* __i386__ */
|
||||
|
||||
/*
|
||||
* acpi_boot_table_init() and acpi_boot_init()
|
||||
* called from setup_arch(), always.
|
||||
|
@ -1259,9 +1635,7 @@ int __init acpi_boot_table_init(void)
|
|||
{
|
||||
int error;
|
||||
|
||||
#ifdef __i386__
|
||||
dmi_check_system(acpi_dmi_table);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If acpi_disabled, bail out
|
||||
|
@ -1386,6 +1760,20 @@ static int __init parse_pci(char *arg)
|
|||
}
|
||||
early_param("pci", parse_pci);
|
||||
|
||||
int __init acpi_mps_check(void)
|
||||
{
|
||||
#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_X86_MPPARSE)
|
||||
/* mptable code is not built-in*/
|
||||
if (acpi_disabled || acpi_noirq) {
|
||||
printk(KERN_WARNING "MPS support code is not built-in.\n"
|
||||
"Using acpi=off or acpi=noirq or pci=noacpi "
|
||||
"may have problem\n");
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
static int __init parse_acpi_skip_timer_override(char *arg)
|
||||
{
|
||||
|
|
|
@ -86,7 +86,9 @@ int acpi_save_state_mem(void)
|
|||
saved_magic = 0x12345678;
|
||||
#else /* CONFIG_64BIT */
|
||||
header->trampoline_segment = setup_trampoline() >> 4;
|
||||
init_rsp = (unsigned long)temp_stack + 4096;
|
||||
#ifdef CONFIG_SMP
|
||||
stack_start.sp = temp_stack + 4096;
|
||||
#endif
|
||||
initial_code = (unsigned long)wakeup_long64;
|
||||
saved_magic = 0x123456789abcdef0;
|
||||
#endif /* CONFIG_64BIT */
|
||||
|
|
|
@ -0,0 +1,962 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
|
||||
* Author: Joerg Roedel <joerg.roedel@amd.com>
|
||||
* Leo Duran <leo.duran@amd.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published
|
||||
* by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#include <linux/pci.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <linux/iommu-helper.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/gart.h>
|
||||
#include <asm/amd_iommu_types.h>
|
||||
#include <asm/amd_iommu.h>
|
||||
|
||||
#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
|
||||
|
||||
#define to_pages(addr, size) \
|
||||
(round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
|
||||
|
||||
static DEFINE_RWLOCK(amd_iommu_devtable_lock);
|
||||
|
||||
struct command {
|
||||
u32 data[4];
|
||||
};
|
||||
|
||||
static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
|
||||
struct unity_map_entry *e);
|
||||
|
||||
static int iommu_has_npcache(struct amd_iommu *iommu)
|
||||
{
|
||||
return iommu->cap & IOMMU_CAP_NPCACHE;
|
||||
}
|
||||
|
||||
static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
|
||||
{
|
||||
u32 tail, head;
|
||||
u8 *target;
|
||||
|
||||
tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
|
||||
target = (iommu->cmd_buf + tail);
|
||||
memcpy_toio(target, cmd, sizeof(*cmd));
|
||||
tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
|
||||
head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
|
||||
if (tail == head)
|
||||
return -ENOMEM;
|
||||
writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
spin_lock_irqsave(&iommu->lock, flags);
|
||||
ret = __iommu_queue_command(iommu, cmd);
|
||||
spin_unlock_irqrestore(&iommu->lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int iommu_completion_wait(struct amd_iommu *iommu)
|
||||
{
|
||||
int ret;
|
||||
struct command cmd;
|
||||
volatile u64 ready = 0;
|
||||
unsigned long ready_phys = virt_to_phys(&ready);
|
||||
|
||||
memset(&cmd, 0, sizeof(cmd));
|
||||
cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK;
|
||||
cmd.data[1] = HIGH_U32(ready_phys);
|
||||
cmd.data[2] = 1; /* value written to 'ready' */
|
||||
CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
|
||||
|
||||
iommu->need_sync = 0;
|
||||
|
||||
ret = iommu_queue_command(iommu, &cmd);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
while (!ready)
|
||||
cpu_relax();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
|
||||
{
|
||||
struct command cmd;
|
||||
|
||||
BUG_ON(iommu == NULL);
|
||||
|
||||
memset(&cmd, 0, sizeof(cmd));
|
||||
CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
|
||||
cmd.data[0] = devid;
|
||||
|
||||
iommu->need_sync = 1;
|
||||
|
||||
return iommu_queue_command(iommu, &cmd);
|
||||
}
|
||||
|
||||
static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
|
||||
u64 address, u16 domid, int pde, int s)
|
||||
{
|
||||
struct command cmd;
|
||||
|
||||
memset(&cmd, 0, sizeof(cmd));
|
||||
address &= PAGE_MASK;
|
||||
CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
|
||||
cmd.data[1] |= domid;
|
||||
cmd.data[2] = LOW_U32(address);
|
||||
cmd.data[3] = HIGH_U32(address);
|
||||
if (s)
|
||||
cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
|
||||
if (pde)
|
||||
cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
|
||||
|
||||
iommu->need_sync = 1;
|
||||
|
||||
return iommu_queue_command(iommu, &cmd);
|
||||
}
|
||||
|
||||
static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
|
||||
u64 address, size_t size)
|
||||
{
|
||||
int s = 0;
|
||||
unsigned pages = to_pages(address, size);
|
||||
|
||||
address &= PAGE_MASK;
|
||||
|
||||
if (pages > 1) {
|
||||
/*
|
||||
* If we have to flush more than one page, flush all
|
||||
* TLB entries for this domain
|
||||
*/
|
||||
address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
|
||||
s = 1;
|
||||
}
|
||||
|
||||
iommu_queue_inv_iommu_pages(iommu, address, domid, 0, s);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int iommu_map(struct protection_domain *dom,
|
||||
unsigned long bus_addr,
|
||||
unsigned long phys_addr,
|
||||
int prot)
|
||||
{
|
||||
u64 __pte, *pte, *page;
|
||||
|
||||
bus_addr = PAGE_ALIGN(bus_addr);
|
||||
phys_addr = PAGE_ALIGN(bus_addr);
|
||||
|
||||
/* only support 512GB address spaces for now */
|
||||
if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
|
||||
return -EINVAL;
|
||||
|
||||
pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
|
||||
|
||||
if (!IOMMU_PTE_PRESENT(*pte)) {
|
||||
page = (u64 *)get_zeroed_page(GFP_KERNEL);
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
*pte = IOMMU_L2_PDE(virt_to_phys(page));
|
||||
}
|
||||
|
||||
pte = IOMMU_PTE_PAGE(*pte);
|
||||
pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
|
||||
|
||||
if (!IOMMU_PTE_PRESENT(*pte)) {
|
||||
page = (u64 *)get_zeroed_page(GFP_KERNEL);
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
*pte = IOMMU_L1_PDE(virt_to_phys(page));
|
||||
}
|
||||
|
||||
pte = IOMMU_PTE_PAGE(*pte);
|
||||
pte = &pte[IOMMU_PTE_L0_INDEX(bus_addr)];
|
||||
|
||||
if (IOMMU_PTE_PRESENT(*pte))
|
||||
return -EBUSY;
|
||||
|
||||
__pte = phys_addr | IOMMU_PTE_P;
|
||||
if (prot & IOMMU_PROT_IR)
|
||||
__pte |= IOMMU_PTE_IR;
|
||||
if (prot & IOMMU_PROT_IW)
|
||||
__pte |= IOMMU_PTE_IW;
|
||||
|
||||
*pte = __pte;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int iommu_for_unity_map(struct amd_iommu *iommu,
|
||||
struct unity_map_entry *entry)
|
||||
{
|
||||
u16 bdf, i;
|
||||
|
||||
for (i = entry->devid_start; i <= entry->devid_end; ++i) {
|
||||
bdf = amd_iommu_alias_table[i];
|
||||
if (amd_iommu_rlookup_table[bdf] == iommu)
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int iommu_init_unity_mappings(struct amd_iommu *iommu)
|
||||
{
|
||||
struct unity_map_entry *entry;
|
||||
int ret;
|
||||
|
||||
list_for_each_entry(entry, &amd_iommu_unity_map, list) {
|
||||
if (!iommu_for_unity_map(iommu, entry))
|
||||
continue;
|
||||
ret = dma_ops_unity_map(iommu->default_dom, entry);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
|
||||
struct unity_map_entry *e)
|
||||
{
|
||||
u64 addr;
|
||||
int ret;
|
||||
|
||||
for (addr = e->address_start; addr < e->address_end;
|
||||
addr += PAGE_SIZE) {
|
||||
ret = iommu_map(&dma_dom->domain, addr, addr, e->prot);
|
||||
if (ret)
|
||||
return ret;
|
||||
/*
|
||||
* if unity mapping is in aperture range mark the page
|
||||
* as allocated in the aperture
|
||||
*/
|
||||
if (addr < dma_dom->aperture_size)
|
||||
__set_bit(addr >> PAGE_SHIFT, dma_dom->bitmap);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
|
||||
u16 devid)
|
||||
{
|
||||
struct unity_map_entry *e;
|
||||
int ret;
|
||||
|
||||
list_for_each_entry(e, &amd_iommu_unity_map, list) {
|
||||
if (!(devid >= e->devid_start && devid <= e->devid_end))
|
||||
continue;
|
||||
ret = dma_ops_unity_map(dma_dom, e);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned long dma_mask_to_pages(unsigned long mask)
|
||||
{
|
||||
return (mask >> PAGE_SHIFT) +
|
||||
(PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
static unsigned long dma_ops_alloc_addresses(struct device *dev,
|
||||
struct dma_ops_domain *dom,
|
||||
unsigned int pages)
|
||||
{
|
||||
unsigned long limit = dma_mask_to_pages(*dev->dma_mask);
|
||||
unsigned long address;
|
||||
unsigned long size = dom->aperture_size >> PAGE_SHIFT;
|
||||
unsigned long boundary_size;
|
||||
|
||||
boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
|
||||
PAGE_SIZE) >> PAGE_SHIFT;
|
||||
limit = limit < size ? limit : size;
|
||||
|
||||
if (dom->next_bit >= limit)
|
||||
dom->next_bit = 0;
|
||||
|
||||
address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
|
||||
0 , boundary_size, 0);
|
||||
if (address == -1)
|
||||
address = iommu_area_alloc(dom->bitmap, limit, 0, pages,
|
||||
0, boundary_size, 0);
|
||||
|
||||
if (likely(address != -1)) {
|
||||
dom->next_bit = address + pages;
|
||||
address <<= PAGE_SHIFT;
|
||||
} else
|
||||
address = bad_dma_address;
|
||||
|
||||
WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
static void dma_ops_free_addresses(struct dma_ops_domain *dom,
|
||||
unsigned long address,
|
||||
unsigned int pages)
|
||||
{
|
||||
address >>= PAGE_SHIFT;
|
||||
iommu_area_free(dom->bitmap, address, pages);
|
||||
}
|
||||
|
||||
static u16 domain_id_alloc(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
int id;
|
||||
|
||||
write_lock_irqsave(&amd_iommu_devtable_lock, flags);
|
||||
id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
|
||||
BUG_ON(id == 0);
|
||||
if (id > 0 && id < MAX_DOMAIN_ID)
|
||||
__set_bit(id, amd_iommu_pd_alloc_bitmap);
|
||||
else
|
||||
id = 0;
|
||||
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
|
||||
unsigned long start_page,
|
||||
unsigned int pages)
|
||||
{
|
||||
unsigned int last_page = dom->aperture_size >> PAGE_SHIFT;
|
||||
|
||||
if (start_page + pages > last_page)
|
||||
pages = last_page - start_page;
|
||||
|
||||
set_bit_string(dom->bitmap, start_page, pages);
|
||||
}
|
||||
|
||||
static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
|
||||
{
|
||||
int i, j;
|
||||
u64 *p1, *p2, *p3;
|
||||
|
||||
p1 = dma_dom->domain.pt_root;
|
||||
|
||||
if (!p1)
|
||||
return;
|
||||
|
||||
for (i = 0; i < 512; ++i) {
|
||||
if (!IOMMU_PTE_PRESENT(p1[i]))
|
||||
continue;
|
||||
|
||||
p2 = IOMMU_PTE_PAGE(p1[i]);
|
||||
for (j = 0; j < 512; ++i) {
|
||||
if (!IOMMU_PTE_PRESENT(p2[j]))
|
||||
continue;
|
||||
p3 = IOMMU_PTE_PAGE(p2[j]);
|
||||
free_page((unsigned long)p3);
|
||||
}
|
||||
|
||||
free_page((unsigned long)p2);
|
||||
}
|
||||
|
||||
free_page((unsigned long)p1);
|
||||
}
|
||||
|
||||
static void dma_ops_domain_free(struct dma_ops_domain *dom)
|
||||
{
|
||||
if (!dom)
|
||||
return;
|
||||
|
||||
dma_ops_free_pagetable(dom);
|
||||
|
||||
kfree(dom->pte_pages);
|
||||
|
||||
kfree(dom->bitmap);
|
||||
|
||||
kfree(dom);
|
||||
}
|
||||
|
||||
static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
|
||||
unsigned order)
|
||||
{
|
||||
struct dma_ops_domain *dma_dom;
|
||||
unsigned i, num_pte_pages;
|
||||
u64 *l2_pde;
|
||||
u64 address;
|
||||
|
||||
/*
|
||||
* Currently the DMA aperture must be between 32 MB and 1GB in size
|
||||
*/
|
||||
if ((order < 25) || (order > 30))
|
||||
return NULL;
|
||||
|
||||
dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
|
||||
if (!dma_dom)
|
||||
return NULL;
|
||||
|
||||
spin_lock_init(&dma_dom->domain.lock);
|
||||
|
||||
dma_dom->domain.id = domain_id_alloc();
|
||||
if (dma_dom->domain.id == 0)
|
||||
goto free_dma_dom;
|
||||
dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
|
||||
dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
|
||||
dma_dom->domain.priv = dma_dom;
|
||||
if (!dma_dom->domain.pt_root)
|
||||
goto free_dma_dom;
|
||||
dma_dom->aperture_size = (1ULL << order);
|
||||
dma_dom->bitmap = kzalloc(dma_dom->aperture_size / (PAGE_SIZE * 8),
|
||||
GFP_KERNEL);
|
||||
if (!dma_dom->bitmap)
|
||||
goto free_dma_dom;
|
||||
/*
|
||||
* mark the first page as allocated so we never return 0 as
|
||||
* a valid dma-address. So we can use 0 as error value
|
||||
*/
|
||||
dma_dom->bitmap[0] = 1;
|
||||
dma_dom->next_bit = 0;
|
||||
|
||||
if (iommu->exclusion_start &&
|
||||
iommu->exclusion_start < dma_dom->aperture_size) {
|
||||
unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
|
||||
int pages = to_pages(iommu->exclusion_start,
|
||||
iommu->exclusion_length);
|
||||
dma_ops_reserve_addresses(dma_dom, startpage, pages);
|
||||
}
|
||||
|
||||
num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512);
|
||||
dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *),
|
||||
GFP_KERNEL);
|
||||
if (!dma_dom->pte_pages)
|
||||
goto free_dma_dom;
|
||||
|
||||
l2_pde = (u64 *)get_zeroed_page(GFP_KERNEL);
|
||||
if (l2_pde == NULL)
|
||||
goto free_dma_dom;
|
||||
|
||||
dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde));
|
||||
|
||||
for (i = 0; i < num_pte_pages; ++i) {
|
||||
dma_dom->pte_pages[i] = (u64 *)get_zeroed_page(GFP_KERNEL);
|
||||
if (!dma_dom->pte_pages[i])
|
||||
goto free_dma_dom;
|
||||
address = virt_to_phys(dma_dom->pte_pages[i]);
|
||||
l2_pde[i] = IOMMU_L1_PDE(address);
|
||||
}
|
||||
|
||||
return dma_dom;
|
||||
|
||||
free_dma_dom:
|
||||
dma_ops_domain_free(dma_dom);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct protection_domain *domain_for_device(u16 devid)
|
||||
{
|
||||
struct protection_domain *dom;
|
||||
unsigned long flags;
|
||||
|
||||
read_lock_irqsave(&amd_iommu_devtable_lock, flags);
|
||||
dom = amd_iommu_pd_table[devid];
|
||||
read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
|
||||
|
||||
return dom;
|
||||
}
|
||||
|
||||
static void set_device_domain(struct amd_iommu *iommu,
|
||||
struct protection_domain *domain,
|
||||
u16 devid)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
u64 pte_root = virt_to_phys(domain->pt_root);
|
||||
|
||||
pte_root |= (domain->mode & 0x07) << 9;
|
||||
pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | 2;
|
||||
|
||||
write_lock_irqsave(&amd_iommu_devtable_lock, flags);
|
||||
amd_iommu_dev_table[devid].data[0] = pte_root;
|
||||
amd_iommu_dev_table[devid].data[1] = pte_root >> 32;
|
||||
amd_iommu_dev_table[devid].data[2] = domain->id;
|
||||
|
||||
amd_iommu_pd_table[devid] = domain;
|
||||
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
|
||||
|
||||
iommu_queue_inv_dev_entry(iommu, devid);
|
||||
|
||||
iommu->need_sync = 1;
|
||||
}
|
||||
|
||||
static int get_device_resources(struct device *dev,
|
||||
struct amd_iommu **iommu,
|
||||
struct protection_domain **domain,
|
||||
u16 *bdf)
|
||||
{
|
||||
struct dma_ops_domain *dma_dom;
|
||||
struct pci_dev *pcidev;
|
||||
u16 _bdf;
|
||||
|
||||
BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask);
|
||||
|
||||
pcidev = to_pci_dev(dev);
|
||||
_bdf = (pcidev->bus->number << 8) | pcidev->devfn;
|
||||
|
||||
if (_bdf >= amd_iommu_last_bdf) {
|
||||
*iommu = NULL;
|
||||
*domain = NULL;
|
||||
*bdf = 0xffff;
|
||||
return 0;
|
||||
}
|
||||
|
||||
*bdf = amd_iommu_alias_table[_bdf];
|
||||
|
||||
*iommu = amd_iommu_rlookup_table[*bdf];
|
||||
if (*iommu == NULL)
|
||||
return 0;
|
||||
dma_dom = (*iommu)->default_dom;
|
||||
*domain = domain_for_device(*bdf);
|
||||
if (*domain == NULL) {
|
||||
*domain = &dma_dom->domain;
|
||||
set_device_domain(*iommu, *domain, *bdf);
|
||||
printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
|
||||
"device ", (*domain)->id);
|
||||
print_devid(_bdf, 1);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
|
||||
struct dma_ops_domain *dom,
|
||||
unsigned long address,
|
||||
phys_addr_t paddr,
|
||||
int direction)
|
||||
{
|
||||
u64 *pte, __pte;
|
||||
|
||||
WARN_ON(address > dom->aperture_size);
|
||||
|
||||
paddr &= PAGE_MASK;
|
||||
|
||||
pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
|
||||
pte += IOMMU_PTE_L0_INDEX(address);
|
||||
|
||||
__pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
|
||||
|
||||
if (direction == DMA_TO_DEVICE)
|
||||
__pte |= IOMMU_PTE_IR;
|
||||
else if (direction == DMA_FROM_DEVICE)
|
||||
__pte |= IOMMU_PTE_IW;
|
||||
else if (direction == DMA_BIDIRECTIONAL)
|
||||
__pte |= IOMMU_PTE_IR | IOMMU_PTE_IW;
|
||||
|
||||
WARN_ON(*pte);
|
||||
|
||||
*pte = __pte;
|
||||
|
||||
return (dma_addr_t)address;
|
||||
}
|
||||
|
||||
static void dma_ops_domain_unmap(struct amd_iommu *iommu,
|
||||
struct dma_ops_domain *dom,
|
||||
unsigned long address)
|
||||
{
|
||||
u64 *pte;
|
||||
|
||||
if (address >= dom->aperture_size)
|
||||
return;
|
||||
|
||||
WARN_ON(address & 0xfffULL || address > dom->aperture_size);
|
||||
|
||||
pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
|
||||
pte += IOMMU_PTE_L0_INDEX(address);
|
||||
|
||||
WARN_ON(!*pte);
|
||||
|
||||
*pte = 0ULL;
|
||||
}
|
||||
|
||||
static dma_addr_t __map_single(struct device *dev,
|
||||
struct amd_iommu *iommu,
|
||||
struct dma_ops_domain *dma_dom,
|
||||
phys_addr_t paddr,
|
||||
size_t size,
|
||||
int dir)
|
||||
{
|
||||
dma_addr_t offset = paddr & ~PAGE_MASK;
|
||||
dma_addr_t address, start;
|
||||
unsigned int pages;
|
||||
int i;
|
||||
|
||||
pages = to_pages(paddr, size);
|
||||
paddr &= PAGE_MASK;
|
||||
|
||||
address = dma_ops_alloc_addresses(dev, dma_dom, pages);
|
||||
if (unlikely(address == bad_dma_address))
|
||||
goto out;
|
||||
|
||||
start = address;
|
||||
for (i = 0; i < pages; ++i) {
|
||||
dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
|
||||
paddr += PAGE_SIZE;
|
||||
start += PAGE_SIZE;
|
||||
}
|
||||
address += offset;
|
||||
|
||||
out:
|
||||
return address;
|
||||
}
|
||||
|
||||
static void __unmap_single(struct amd_iommu *iommu,
|
||||
struct dma_ops_domain *dma_dom,
|
||||
dma_addr_t dma_addr,
|
||||
size_t size,
|
||||
int dir)
|
||||
{
|
||||
dma_addr_t i, start;
|
||||
unsigned int pages;
|
||||
|
||||
if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size))
|
||||
return;
|
||||
|
||||
pages = to_pages(dma_addr, size);
|
||||
dma_addr &= PAGE_MASK;
|
||||
start = dma_addr;
|
||||
|
||||
for (i = 0; i < pages; ++i) {
|
||||
dma_ops_domain_unmap(iommu, dma_dom, start);
|
||||
start += PAGE_SIZE;
|
||||
}
|
||||
|
||||
dma_ops_free_addresses(dma_dom, dma_addr, pages);
|
||||
}
|
||||
|
||||
static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
|
||||
size_t size, int dir)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct amd_iommu *iommu;
|
||||
struct protection_domain *domain;
|
||||
u16 devid;
|
||||
dma_addr_t addr;
|
||||
|
||||
get_device_resources(dev, &iommu, &domain, &devid);
|
||||
|
||||
if (iommu == NULL || domain == NULL)
|
||||
return (dma_addr_t)paddr;
|
||||
|
||||
spin_lock_irqsave(&domain->lock, flags);
|
||||
addr = __map_single(dev, iommu, domain->priv, paddr, size, dir);
|
||||
if (addr == bad_dma_address)
|
||||
goto out;
|
||||
|
||||
if (iommu_has_npcache(iommu))
|
||||
iommu_flush_pages(iommu, domain->id, addr, size);
|
||||
|
||||
if (iommu->need_sync)
|
||||
iommu_completion_wait(iommu);
|
||||
|
||||
out:
|
||||
spin_unlock_irqrestore(&domain->lock, flags);
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
static void unmap_single(struct device *dev, dma_addr_t dma_addr,
|
||||
size_t size, int dir)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct amd_iommu *iommu;
|
||||
struct protection_domain *domain;
|
||||
u16 devid;
|
||||
|
||||
if (!get_device_resources(dev, &iommu, &domain, &devid))
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&domain->lock, flags);
|
||||
|
||||
__unmap_single(iommu, domain->priv, dma_addr, size, dir);
|
||||
|
||||
iommu_flush_pages(iommu, domain->id, dma_addr, size);
|
||||
|
||||
if (iommu->need_sync)
|
||||
iommu_completion_wait(iommu);
|
||||
|
||||
spin_unlock_irqrestore(&domain->lock, flags);
|
||||
}
|
||||
|
||||
static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
|
||||
int nelems, int dir)
|
||||
{
|
||||
struct scatterlist *s;
|
||||
int i;
|
||||
|
||||
for_each_sg(sglist, s, nelems, i) {
|
||||
s->dma_address = (dma_addr_t)sg_phys(s);
|
||||
s->dma_length = s->length;
|
||||
}
|
||||
|
||||
return nelems;
|
||||
}
|
||||
|
||||
static int map_sg(struct device *dev, struct scatterlist *sglist,
|
||||
int nelems, int dir)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct amd_iommu *iommu;
|
||||
struct protection_domain *domain;
|
||||
u16 devid;
|
||||
int i;
|
||||
struct scatterlist *s;
|
||||
phys_addr_t paddr;
|
||||
int mapped_elems = 0;
|
||||
|
||||
get_device_resources(dev, &iommu, &domain, &devid);
|
||||
|
||||
if (!iommu || !domain)
|
||||
return map_sg_no_iommu(dev, sglist, nelems, dir);
|
||||
|
||||
spin_lock_irqsave(&domain->lock, flags);
|
||||
|
||||
for_each_sg(sglist, s, nelems, i) {
|
||||
paddr = sg_phys(s);
|
||||
|
||||
s->dma_address = __map_single(dev, iommu, domain->priv,
|
||||
paddr, s->length, dir);
|
||||
|
||||
if (s->dma_address) {
|
||||
s->dma_length = s->length;
|
||||
mapped_elems++;
|
||||
} else
|
||||
goto unmap;
|
||||
if (iommu_has_npcache(iommu))
|
||||
iommu_flush_pages(iommu, domain->id, s->dma_address,
|
||||
s->dma_length);
|
||||
}
|
||||
|
||||
if (iommu->need_sync)
|
||||
iommu_completion_wait(iommu);
|
||||
|
||||
out:
|
||||
spin_unlock_irqrestore(&domain->lock, flags);
|
||||
|
||||
return mapped_elems;
|
||||
unmap:
|
||||
for_each_sg(sglist, s, mapped_elems, i) {
|
||||
if (s->dma_address)
|
||||
__unmap_single(iommu, domain->priv, s->dma_address,
|
||||
s->dma_length, dir);
|
||||
s->dma_address = s->dma_length = 0;
|
||||
}
|
||||
|
||||
mapped_elems = 0;
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
static void unmap_sg(struct device *dev, struct scatterlist *sglist,
|
||||
int nelems, int dir)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct amd_iommu *iommu;
|
||||
struct protection_domain *domain;
|
||||
struct scatterlist *s;
|
||||
u16 devid;
|
||||
int i;
|
||||
|
||||
if (!get_device_resources(dev, &iommu, &domain, &devid))
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&domain->lock, flags);
|
||||
|
||||
for_each_sg(sglist, s, nelems, i) {
|
||||
__unmap_single(iommu, domain->priv, s->dma_address,
|
||||
s->dma_length, dir);
|
||||
iommu_flush_pages(iommu, domain->id, s->dma_address,
|
||||
s->dma_length);
|
||||
s->dma_address = s->dma_length = 0;
|
||||
}
|
||||
|
||||
if (iommu->need_sync)
|
||||
iommu_completion_wait(iommu);
|
||||
|
||||
spin_unlock_irqrestore(&domain->lock, flags);
|
||||
}
|
||||
|
||||
static void *alloc_coherent(struct device *dev, size_t size,
|
||||
dma_addr_t *dma_addr, gfp_t flag)
|
||||
{
|
||||
unsigned long flags;
|
||||
void *virt_addr;
|
||||
struct amd_iommu *iommu;
|
||||
struct protection_domain *domain;
|
||||
u16 devid;
|
||||
phys_addr_t paddr;
|
||||
|
||||
virt_addr = (void *)__get_free_pages(flag, get_order(size));
|
||||
if (!virt_addr)
|
||||
return 0;
|
||||
|
||||
memset(virt_addr, 0, size);
|
||||
paddr = virt_to_phys(virt_addr);
|
||||
|
||||
get_device_resources(dev, &iommu, &domain, &devid);
|
||||
|
||||
if (!iommu || !domain) {
|
||||
*dma_addr = (dma_addr_t)paddr;
|
||||
return virt_addr;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&domain->lock, flags);
|
||||
|
||||
*dma_addr = __map_single(dev, iommu, domain->priv, paddr,
|
||||
size, DMA_BIDIRECTIONAL);
|
||||
|
||||
if (*dma_addr == bad_dma_address) {
|
||||
free_pages((unsigned long)virt_addr, get_order(size));
|
||||
virt_addr = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (iommu_has_npcache(iommu))
|
||||
iommu_flush_pages(iommu, domain->id, *dma_addr, size);
|
||||
|
||||
if (iommu->need_sync)
|
||||
iommu_completion_wait(iommu);
|
||||
|
||||
out:
|
||||
spin_unlock_irqrestore(&domain->lock, flags);
|
||||
|
||||
return virt_addr;
|
||||
}
|
||||
|
||||
static void free_coherent(struct device *dev, size_t size,
|
||||
void *virt_addr, dma_addr_t dma_addr)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct amd_iommu *iommu;
|
||||
struct protection_domain *domain;
|
||||
u16 devid;
|
||||
|
||||
get_device_resources(dev, &iommu, &domain, &devid);
|
||||
|
||||
if (!iommu || !domain)
|
||||
goto free_mem;
|
||||
|
||||
spin_lock_irqsave(&domain->lock, flags);
|
||||
|
||||
__unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
|
||||
iommu_flush_pages(iommu, domain->id, dma_addr, size);
|
||||
|
||||
if (iommu->need_sync)
|
||||
iommu_completion_wait(iommu);
|
||||
|
||||
spin_unlock_irqrestore(&domain->lock, flags);
|
||||
|
||||
free_mem:
|
||||
free_pages((unsigned long)virt_addr, get_order(size));
|
||||
}
|
||||
|
||||
/*
|
||||
* If the driver core informs the DMA layer if a driver grabs a device
|
||||
* we don't need to preallocate the protection domains anymore.
|
||||
* For now we have to.
|
||||
*/
|
||||
void prealloc_protection_domains(void)
|
||||
{
|
||||
struct pci_dev *dev = NULL;
|
||||
struct dma_ops_domain *dma_dom;
|
||||
struct amd_iommu *iommu;
|
||||
int order = amd_iommu_aperture_order;
|
||||
u16 devid;
|
||||
|
||||
while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
|
||||
devid = (dev->bus->number << 8) | dev->devfn;
|
||||
if (devid >= amd_iommu_last_bdf)
|
||||
continue;
|
||||
devid = amd_iommu_alias_table[devid];
|
||||
if (domain_for_device(devid))
|
||||
continue;
|
||||
iommu = amd_iommu_rlookup_table[devid];
|
||||
if (!iommu)
|
||||
continue;
|
||||
dma_dom = dma_ops_domain_alloc(iommu, order);
|
||||
if (!dma_dom)
|
||||
continue;
|
||||
init_unity_mappings_for_device(dma_dom, devid);
|
||||
set_device_domain(iommu, &dma_dom->domain, devid);
|
||||
printk(KERN_INFO "AMD IOMMU: Allocated domain %d for device ",
|
||||
dma_dom->domain.id);
|
||||
print_devid(devid, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static struct dma_mapping_ops amd_iommu_dma_ops = {
|
||||
.alloc_coherent = alloc_coherent,
|
||||
.free_coherent = free_coherent,
|
||||
.map_single = map_single,
|
||||
.unmap_single = unmap_single,
|
||||
.map_sg = map_sg,
|
||||
.unmap_sg = unmap_sg,
|
||||
};
|
||||
|
||||
int __init amd_iommu_init_dma_ops(void)
|
||||
{
|
||||
struct amd_iommu *iommu;
|
||||
int order = amd_iommu_aperture_order;
|
||||
int ret;
|
||||
|
||||
list_for_each_entry(iommu, &amd_iommu_list, list) {
|
||||
iommu->default_dom = dma_ops_domain_alloc(iommu, order);
|
||||
if (iommu->default_dom == NULL)
|
||||
return -ENOMEM;
|
||||
ret = iommu_init_unity_mappings(iommu);
|
||||
if (ret)
|
||||
goto free_domains;
|
||||
}
|
||||
|
||||
if (amd_iommu_isolate)
|
||||
prealloc_protection_domains();
|
||||
|
||||
iommu_detected = 1;
|
||||
force_iommu = 1;
|
||||
bad_dma_address = 0;
|
||||
#ifdef CONFIG_GART_IOMMU
|
||||
gart_iommu_aperture_disabled = 1;
|
||||
gart_iommu_aperture = 0;
|
||||
#endif
|
||||
|
||||
dma_ops = &amd_iommu_dma_ops;
|
||||
|
||||
return 0;
|
||||
|
||||
free_domains:
|
||||
|
||||
list_for_each_entry(iommu, &amd_iommu_list, list) {
|
||||
if (iommu->default_dom)
|
||||
dma_ops_domain_free(iommu->default_dom);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
|
@ -0,0 +1,875 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
|
||||
* Author: Joerg Roedel <joerg.roedel@amd.com>
|
||||
* Leo Duran <leo.duran@amd.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published
|
||||
* by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#include <linux/pci.h>
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/sysdev.h>
|
||||
#include <asm/pci-direct.h>
|
||||
#include <asm/amd_iommu_types.h>
|
||||
#include <asm/amd_iommu.h>
|
||||
#include <asm/gart.h>
|
||||
|
||||
/*
|
||||
* definitions for the ACPI scanning code
|
||||
*/
|
||||
#define UPDATE_LAST_BDF(x) do {\
|
||||
if ((x) > amd_iommu_last_bdf) \
|
||||
amd_iommu_last_bdf = (x); \
|
||||
} while (0);
|
||||
|
||||
#define DEVID(bus, devfn) (((bus) << 8) | (devfn))
|
||||
#define PCI_BUS(x) (((x) >> 8) & 0xff)
|
||||
#define IVRS_HEADER_LENGTH 48
|
||||
#define TBL_SIZE(x) (1 << (PAGE_SHIFT + get_order(amd_iommu_last_bdf * (x))))
|
||||
|
||||
#define ACPI_IVHD_TYPE 0x10
|
||||
#define ACPI_IVMD_TYPE_ALL 0x20
|
||||
#define ACPI_IVMD_TYPE 0x21
|
||||
#define ACPI_IVMD_TYPE_RANGE 0x22
|
||||
|
||||
#define IVHD_DEV_ALL 0x01
|
||||
#define IVHD_DEV_SELECT 0x02
|
||||
#define IVHD_DEV_SELECT_RANGE_START 0x03
|
||||
#define IVHD_DEV_RANGE_END 0x04
|
||||
#define IVHD_DEV_ALIAS 0x42
|
||||
#define IVHD_DEV_ALIAS_RANGE 0x43
|
||||
#define IVHD_DEV_EXT_SELECT 0x46
|
||||
#define IVHD_DEV_EXT_SELECT_RANGE 0x47
|
||||
|
||||
#define IVHD_FLAG_HT_TUN_EN 0x00
|
||||
#define IVHD_FLAG_PASSPW_EN 0x01
|
||||
#define IVHD_FLAG_RESPASSPW_EN 0x02
|
||||
#define IVHD_FLAG_ISOC_EN 0x03
|
||||
|
||||
#define IVMD_FLAG_EXCL_RANGE 0x08
|
||||
#define IVMD_FLAG_UNITY_MAP 0x01
|
||||
|
||||
#define ACPI_DEVFLAG_INITPASS 0x01
|
||||
#define ACPI_DEVFLAG_EXTINT 0x02
|
||||
#define ACPI_DEVFLAG_NMI 0x04
|
||||
#define ACPI_DEVFLAG_SYSMGT1 0x10
|
||||
#define ACPI_DEVFLAG_SYSMGT2 0x20
|
||||
#define ACPI_DEVFLAG_LINT0 0x40
|
||||
#define ACPI_DEVFLAG_LINT1 0x80
|
||||
#define ACPI_DEVFLAG_ATSDIS 0x10000000
|
||||
|
||||
struct ivhd_header {
|
||||
u8 type;
|
||||
u8 flags;
|
||||
u16 length;
|
||||
u16 devid;
|
||||
u16 cap_ptr;
|
||||
u64 mmio_phys;
|
||||
u16 pci_seg;
|
||||
u16 info;
|
||||
u32 reserved;
|
||||
} __attribute__((packed));
|
||||
|
||||
struct ivhd_entry {
|
||||
u8 type;
|
||||
u16 devid;
|
||||
u8 flags;
|
||||
u32 ext;
|
||||
} __attribute__((packed));
|
||||
|
||||
struct ivmd_header {
|
||||
u8 type;
|
||||
u8 flags;
|
||||
u16 length;
|
||||
u16 devid;
|
||||
u16 aux;
|
||||
u64 resv;
|
||||
u64 range_start;
|
||||
u64 range_length;
|
||||
} __attribute__((packed));
|
||||
|
||||
static int __initdata amd_iommu_detected;
|
||||
|
||||
u16 amd_iommu_last_bdf;
|
||||
struct list_head amd_iommu_unity_map;
|
||||
unsigned amd_iommu_aperture_order = 26;
|
||||
int amd_iommu_isolate;
|
||||
|
||||
struct list_head amd_iommu_list;
|
||||
struct dev_table_entry *amd_iommu_dev_table;
|
||||
u16 *amd_iommu_alias_table;
|
||||
struct amd_iommu **amd_iommu_rlookup_table;
|
||||
struct protection_domain **amd_iommu_pd_table;
|
||||
unsigned long *amd_iommu_pd_alloc_bitmap;
|
||||
|
||||
static u32 dev_table_size;
|
||||
static u32 alias_table_size;
|
||||
static u32 rlookup_table_size;
|
||||
|
||||
static void __init iommu_set_exclusion_range(struct amd_iommu *iommu)
|
||||
{
|
||||
u64 start = iommu->exclusion_start & PAGE_MASK;
|
||||
u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
|
||||
u64 entry;
|
||||
|
||||
if (!iommu->exclusion_start)
|
||||
return;
|
||||
|
||||
entry = start | MMIO_EXCL_ENABLE_MASK;
|
||||
memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
|
||||
&entry, sizeof(entry));
|
||||
|
||||
entry = limit;
|
||||
memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
|
||||
&entry, sizeof(entry));
|
||||
}
|
||||
|
||||
static void __init iommu_set_device_table(struct amd_iommu *iommu)
|
||||
{
|
||||
u32 entry;
|
||||
|
||||
BUG_ON(iommu->mmio_base == NULL);
|
||||
|
||||
entry = virt_to_phys(amd_iommu_dev_table);
|
||||
entry |= (dev_table_size >> 12) - 1;
|
||||
memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
|
||||
&entry, sizeof(entry));
|
||||
}
|
||||
|
||||
static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
|
||||
{
|
||||
u32 ctrl;
|
||||
|
||||
ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
|
||||
ctrl |= (1 << bit);
|
||||
writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
|
||||
}
|
||||
|
||||
static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
|
||||
{
|
||||
u32 ctrl;
|
||||
|
||||
ctrl = (u64)readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
|
||||
ctrl &= ~(1 << bit);
|
||||
writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
|
||||
}
|
||||
|
||||
void __init iommu_enable(struct amd_iommu *iommu)
|
||||
{
|
||||
printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at ");
|
||||
print_devid(iommu->devid, 0);
|
||||
printk(" cap 0x%hx\n", iommu->cap_ptr);
|
||||
|
||||
iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
|
||||
}
|
||||
|
||||
static u8 * __init iommu_map_mmio_space(u64 address)
|
||||
{
|
||||
u8 *ret;
|
||||
|
||||
if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu"))
|
||||
return NULL;
|
||||
|
||||
ret = ioremap_nocache(address, MMIO_REGION_LENGTH);
|
||||
if (ret != NULL)
|
||||
return ret;
|
||||
|
||||
release_mem_region(address, MMIO_REGION_LENGTH);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
|
||||
{
|
||||
if (iommu->mmio_base)
|
||||
iounmap(iommu->mmio_base);
|
||||
release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH);
|
||||
}
|
||||
|
||||
static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr)
|
||||
{
|
||||
u32 cap;
|
||||
|
||||
cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
|
||||
UPDATE_LAST_BDF(DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
|
||||
{
|
||||
u8 *p = (void *)h, *end = (void *)h;
|
||||
struct ivhd_entry *dev;
|
||||
|
||||
p += sizeof(*h);
|
||||
end += h->length;
|
||||
|
||||
find_last_devid_on_pci(PCI_BUS(h->devid),
|
||||
PCI_SLOT(h->devid),
|
||||
PCI_FUNC(h->devid),
|
||||
h->cap_ptr);
|
||||
|
||||
while (p < end) {
|
||||
dev = (struct ivhd_entry *)p;
|
||||
switch (dev->type) {
|
||||
case IVHD_DEV_SELECT:
|
||||
case IVHD_DEV_RANGE_END:
|
||||
case IVHD_DEV_ALIAS:
|
||||
case IVHD_DEV_EXT_SELECT:
|
||||
UPDATE_LAST_BDF(dev->devid);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
p += 0x04 << (*p >> 6);
|
||||
}
|
||||
|
||||
WARN_ON(p != end);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init find_last_devid_acpi(struct acpi_table_header *table)
|
||||
{
|
||||
int i;
|
||||
u8 checksum = 0, *p = (u8 *)table, *end = (u8 *)table;
|
||||
struct ivhd_header *h;
|
||||
|
||||
/*
|
||||
* Validate checksum here so we don't need to do it when
|
||||
* we actually parse the table
|
||||
*/
|
||||
for (i = 0; i < table->length; ++i)
|
||||
checksum += p[i];
|
||||
if (checksum != 0)
|
||||
/* ACPI table corrupt */
|
||||
return -ENODEV;
|
||||
|
||||
p += IVRS_HEADER_LENGTH;
|
||||
|
||||
end += table->length;
|
||||
while (p < end) {
|
||||
h = (struct ivhd_header *)p;
|
||||
switch (h->type) {
|
||||
case ACPI_IVHD_TYPE:
|
||||
find_last_devid_from_ivhd(h);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
p += h->length;
|
||||
}
|
||||
WARN_ON(p != end);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
|
||||
{
|
||||
u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL,
|
||||
get_order(CMD_BUFFER_SIZE));
|
||||
u64 entry = 0;
|
||||
|
||||
if (cmd_buf == NULL)
|
||||
return NULL;
|
||||
|
||||
iommu->cmd_buf_size = CMD_BUFFER_SIZE;
|
||||
|
||||
memset(cmd_buf, 0, CMD_BUFFER_SIZE);
|
||||
|
||||
entry = (u64)virt_to_phys(cmd_buf);
|
||||
entry |= MMIO_CMD_SIZE_512;
|
||||
memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
|
||||
&entry, sizeof(entry));
|
||||
|
||||
iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
|
||||
|
||||
return cmd_buf;
|
||||
}
|
||||
|
||||
static void __init free_command_buffer(struct amd_iommu *iommu)
|
||||
{
|
||||
if (iommu->cmd_buf)
|
||||
free_pages((unsigned long)iommu->cmd_buf,
|
||||
get_order(CMD_BUFFER_SIZE));
|
||||
}
|
||||
|
||||
static void set_dev_entry_bit(u16 devid, u8 bit)
|
||||
{
|
||||
int i = (bit >> 5) & 0x07;
|
||||
int _bit = bit & 0x1f;
|
||||
|
||||
amd_iommu_dev_table[devid].data[i] |= (1 << _bit);
|
||||
}
|
||||
|
||||
static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags)
|
||||
{
|
||||
if (flags & ACPI_DEVFLAG_INITPASS)
|
||||
set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS);
|
||||
if (flags & ACPI_DEVFLAG_EXTINT)
|
||||
set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS);
|
||||
if (flags & ACPI_DEVFLAG_NMI)
|
||||
set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS);
|
||||
if (flags & ACPI_DEVFLAG_SYSMGT1)
|
||||
set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1);
|
||||
if (flags & ACPI_DEVFLAG_SYSMGT2)
|
||||
set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2);
|
||||
if (flags & ACPI_DEVFLAG_LINT0)
|
||||
set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS);
|
||||
if (flags & ACPI_DEVFLAG_LINT1)
|
||||
set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS);
|
||||
}
|
||||
|
||||
static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
|
||||
{
|
||||
amd_iommu_rlookup_table[devid] = iommu;
|
||||
}
|
||||
|
||||
static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
|
||||
{
|
||||
struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
|
||||
|
||||
if (!(m->flags & IVMD_FLAG_EXCL_RANGE))
|
||||
return;
|
||||
|
||||
if (iommu) {
|
||||
set_dev_entry_bit(m->devid, DEV_ENTRY_EX);
|
||||
iommu->exclusion_start = m->range_start;
|
||||
iommu->exclusion_length = m->range_length;
|
||||
}
|
||||
}
|
||||
|
||||
static void __init init_iommu_from_pci(struct amd_iommu *iommu)
|
||||
{
|
||||
int bus = PCI_BUS(iommu->devid);
|
||||
int dev = PCI_SLOT(iommu->devid);
|
||||
int fn = PCI_FUNC(iommu->devid);
|
||||
int cap_ptr = iommu->cap_ptr;
|
||||
u32 range;
|
||||
|
||||
iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET);
|
||||
|
||||
range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
|
||||
iommu->first_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_FD(range));
|
||||
iommu->last_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_LD(range));
|
||||
}
|
||||
|
||||
static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
|
||||
struct ivhd_header *h)
|
||||
{
|
||||
u8 *p = (u8 *)h;
|
||||
u8 *end = p, flags = 0;
|
||||
u16 dev_i, devid = 0, devid_start = 0, devid_to = 0;
|
||||
u32 ext_flags = 0;
|
||||
bool alias = 0;
|
||||
struct ivhd_entry *e;
|
||||
|
||||
/*
|
||||
* First set the recommended feature enable bits from ACPI
|
||||
* into the IOMMU control registers
|
||||
*/
|
||||
h->flags & IVHD_FLAG_HT_TUN_EN ?
|
||||
iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
|
||||
iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
|
||||
|
||||
h->flags & IVHD_FLAG_PASSPW_EN ?
|
||||
iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
|
||||
iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
|
||||
|
||||
h->flags & IVHD_FLAG_RESPASSPW_EN ?
|
||||
iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
|
||||
iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
|
||||
|
||||
h->flags & IVHD_FLAG_ISOC_EN ?
|
||||
iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
|
||||
iommu_feature_disable(iommu, CONTROL_ISOC_EN);
|
||||
|
||||
/*
|
||||
* make IOMMU memory accesses cache coherent
|
||||
*/
|
||||
iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
|
||||
|
||||
/*
|
||||
* Done. Now parse the device entries
|
||||
*/
|
||||
p += sizeof(struct ivhd_header);
|
||||
end += h->length;
|
||||
|
||||
while (p < end) {
|
||||
e = (struct ivhd_entry *)p;
|
||||
switch (e->type) {
|
||||
case IVHD_DEV_ALL:
|
||||
for (dev_i = iommu->first_device;
|
||||
dev_i <= iommu->last_device; ++dev_i)
|
||||
set_dev_entry_from_acpi(dev_i, e->flags, 0);
|
||||
break;
|
||||
case IVHD_DEV_SELECT:
|
||||
devid = e->devid;
|
||||
set_dev_entry_from_acpi(devid, e->flags, 0);
|
||||
break;
|
||||
case IVHD_DEV_SELECT_RANGE_START:
|
||||
devid_start = e->devid;
|
||||
flags = e->flags;
|
||||
ext_flags = 0;
|
||||
alias = 0;
|
||||
break;
|
||||
case IVHD_DEV_ALIAS:
|
||||
devid = e->devid;
|
||||
devid_to = e->ext >> 8;
|
||||
set_dev_entry_from_acpi(devid, e->flags, 0);
|
||||
amd_iommu_alias_table[devid] = devid_to;
|
||||
break;
|
||||
case IVHD_DEV_ALIAS_RANGE:
|
||||
devid_start = e->devid;
|
||||
flags = e->flags;
|
||||
devid_to = e->ext >> 8;
|
||||
ext_flags = 0;
|
||||
alias = 1;
|
||||
break;
|
||||
case IVHD_DEV_EXT_SELECT:
|
||||
devid = e->devid;
|
||||
set_dev_entry_from_acpi(devid, e->flags, e->ext);
|
||||
break;
|
||||
case IVHD_DEV_EXT_SELECT_RANGE:
|
||||
devid_start = e->devid;
|
||||
flags = e->flags;
|
||||
ext_flags = e->ext;
|
||||
alias = 0;
|
||||
break;
|
||||
case IVHD_DEV_RANGE_END:
|
||||
devid = e->devid;
|
||||
for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
|
||||
if (alias)
|
||||
amd_iommu_alias_table[dev_i] = devid_to;
|
||||
set_dev_entry_from_acpi(
|
||||
amd_iommu_alias_table[dev_i],
|
||||
flags, ext_flags);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
p += 0x04 << (e->type >> 6);
|
||||
}
|
||||
}
|
||||
|
||||
static int __init init_iommu_devices(struct amd_iommu *iommu)
|
||||
{
|
||||
u16 i;
|
||||
|
||||
for (i = iommu->first_device; i <= iommu->last_device; ++i)
|
||||
set_iommu_for_device(iommu, i);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __init free_iommu_one(struct amd_iommu *iommu)
|
||||
{
|
||||
free_command_buffer(iommu);
|
||||
iommu_unmap_mmio_space(iommu);
|
||||
}
|
||||
|
||||
static void __init free_iommu_all(void)
|
||||
{
|
||||
struct amd_iommu *iommu, *next;
|
||||
|
||||
list_for_each_entry_safe(iommu, next, &amd_iommu_list, list) {
|
||||
list_del(&iommu->list);
|
||||
free_iommu_one(iommu);
|
||||
kfree(iommu);
|
||||
}
|
||||
}
|
||||
|
||||
static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
|
||||
{
|
||||
spin_lock_init(&iommu->lock);
|
||||
list_add_tail(&iommu->list, &amd_iommu_list);
|
||||
|
||||
/*
|
||||
* Copy data from ACPI table entry to the iommu struct
|
||||
*/
|
||||
iommu->devid = h->devid;
|
||||
iommu->cap_ptr = h->cap_ptr;
|
||||
iommu->mmio_phys = h->mmio_phys;
|
||||
iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys);
|
||||
if (!iommu->mmio_base)
|
||||
return -ENOMEM;
|
||||
|
||||
iommu_set_device_table(iommu);
|
||||
iommu->cmd_buf = alloc_command_buffer(iommu);
|
||||
if (!iommu->cmd_buf)
|
||||
return -ENOMEM;
|
||||
|
||||
init_iommu_from_pci(iommu);
|
||||
init_iommu_from_acpi(iommu, h);
|
||||
init_iommu_devices(iommu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init init_iommu_all(struct acpi_table_header *table)
|
||||
{
|
||||
u8 *p = (u8 *)table, *end = (u8 *)table;
|
||||
struct ivhd_header *h;
|
||||
struct amd_iommu *iommu;
|
||||
int ret;
|
||||
|
||||
INIT_LIST_HEAD(&amd_iommu_list);
|
||||
|
||||
end += table->length;
|
||||
p += IVRS_HEADER_LENGTH;
|
||||
|
||||
while (p < end) {
|
||||
h = (struct ivhd_header *)p;
|
||||
switch (*p) {
|
||||
case ACPI_IVHD_TYPE:
|
||||
iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
|
||||
if (iommu == NULL)
|
||||
return -ENOMEM;
|
||||
ret = init_iommu_one(iommu, h);
|
||||
if (ret)
|
||||
return ret;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
p += h->length;
|
||||
|
||||
}
|
||||
WARN_ON(p != end);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __init free_unity_maps(void)
|
||||
{
|
||||
struct unity_map_entry *entry, *next;
|
||||
|
||||
list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) {
|
||||
list_del(&entry->list);
|
||||
kfree(entry);
|
||||
}
|
||||
}
|
||||
|
||||
static int __init init_exclusion_range(struct ivmd_header *m)
|
||||
{
|
||||
int i;
|
||||
|
||||
switch (m->type) {
|
||||
case ACPI_IVMD_TYPE:
|
||||
set_device_exclusion_range(m->devid, m);
|
||||
break;
|
||||
case ACPI_IVMD_TYPE_ALL:
|
||||
for (i = 0; i < amd_iommu_last_bdf; ++i)
|
||||
set_device_exclusion_range(i, m);
|
||||
break;
|
||||
case ACPI_IVMD_TYPE_RANGE:
|
||||
for (i = m->devid; i <= m->aux; ++i)
|
||||
set_device_exclusion_range(i, m);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init init_unity_map_range(struct ivmd_header *m)
|
||||
{
|
||||
struct unity_map_entry *e = 0;
|
||||
|
||||
e = kzalloc(sizeof(*e), GFP_KERNEL);
|
||||
if (e == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
switch (m->type) {
|
||||
default:
|
||||
case ACPI_IVMD_TYPE:
|
||||
e->devid_start = e->devid_end = m->devid;
|
||||
break;
|
||||
case ACPI_IVMD_TYPE_ALL:
|
||||
e->devid_start = 0;
|
||||
e->devid_end = amd_iommu_last_bdf;
|
||||
break;
|
||||
case ACPI_IVMD_TYPE_RANGE:
|
||||
e->devid_start = m->devid;
|
||||
e->devid_end = m->aux;
|
||||
break;
|
||||
}
|
||||
e->address_start = PAGE_ALIGN(m->range_start);
|
||||
e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
|
||||
e->prot = m->flags >> 1;
|
||||
|
||||
list_add_tail(&e->list, &amd_iommu_unity_map);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init init_memory_definitions(struct acpi_table_header *table)
|
||||
{
|
||||
u8 *p = (u8 *)table, *end = (u8 *)table;
|
||||
struct ivmd_header *m;
|
||||
|
||||
INIT_LIST_HEAD(&amd_iommu_unity_map);
|
||||
|
||||
end += table->length;
|
||||
p += IVRS_HEADER_LENGTH;
|
||||
|
||||
while (p < end) {
|
||||
m = (struct ivmd_header *)p;
|
||||
if (m->flags & IVMD_FLAG_EXCL_RANGE)
|
||||
init_exclusion_range(m);
|
||||
else if (m->flags & IVMD_FLAG_UNITY_MAP)
|
||||
init_unity_map_range(m);
|
||||
|
||||
p += m->length;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __init enable_iommus(void)
|
||||
{
|
||||
struct amd_iommu *iommu;
|
||||
|
||||
list_for_each_entry(iommu, &amd_iommu_list, list) {
|
||||
iommu_set_exclusion_range(iommu);
|
||||
iommu_enable(iommu);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Suspend/Resume support
|
||||
* disable suspend until real resume implemented
|
||||
*/
|
||||
|
||||
static int amd_iommu_resume(struct sys_device *dev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static struct sysdev_class amd_iommu_sysdev_class = {
|
||||
.name = "amd_iommu",
|
||||
.suspend = amd_iommu_suspend,
|
||||
.resume = amd_iommu_resume,
|
||||
};
|
||||
|
||||
static struct sys_device device_amd_iommu = {
|
||||
.id = 0,
|
||||
.cls = &amd_iommu_sysdev_class,
|
||||
};
|
||||
|
||||
int __init amd_iommu_init(void)
|
||||
{
|
||||
int i, ret = 0;
|
||||
|
||||
|
||||
if (no_iommu) {
|
||||
printk(KERN_INFO "AMD IOMMU disabled by kernel command line\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!amd_iommu_detected)
|
||||
return -ENODEV;
|
||||
|
||||
/*
|
||||
* First parse ACPI tables to find the largest Bus/Dev/Func
|
||||
* we need to handle. Upon this information the shared data
|
||||
* structures for the IOMMUs in the system will be allocated
|
||||
*/
|
||||
if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0)
|
||||
return -ENODEV;
|
||||
|
||||
dev_table_size = TBL_SIZE(DEV_TABLE_ENTRY_SIZE);
|
||||
alias_table_size = TBL_SIZE(ALIAS_TABLE_ENTRY_SIZE);
|
||||
rlookup_table_size = TBL_SIZE(RLOOKUP_TABLE_ENTRY_SIZE);
|
||||
|
||||
ret = -ENOMEM;
|
||||
|
||||
/* Device table - directly used by all IOMMUs */
|
||||
amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL,
|
||||
get_order(dev_table_size));
|
||||
if (amd_iommu_dev_table == NULL)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the
|
||||
* IOMMU see for that device
|
||||
*/
|
||||
amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL,
|
||||
get_order(alias_table_size));
|
||||
if (amd_iommu_alias_table == NULL)
|
||||
goto free;
|
||||
|
||||
/* IOMMU rlookup table - find the IOMMU for a specific device */
|
||||
amd_iommu_rlookup_table = (void *)__get_free_pages(GFP_KERNEL,
|
||||
get_order(rlookup_table_size));
|
||||
if (amd_iommu_rlookup_table == NULL)
|
||||
goto free;
|
||||
|
||||
/*
|
||||
* Protection Domain table - maps devices to protection domains
|
||||
* This table has the same size as the rlookup_table
|
||||
*/
|
||||
amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL,
|
||||
get_order(rlookup_table_size));
|
||||
if (amd_iommu_pd_table == NULL)
|
||||
goto free;
|
||||
|
||||
amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(GFP_KERNEL,
|
||||
get_order(MAX_DOMAIN_ID/8));
|
||||
if (amd_iommu_pd_alloc_bitmap == NULL)
|
||||
goto free;
|
||||
|
||||
/*
|
||||
* memory is allocated now; initialize the device table with all zeroes
|
||||
* and let all alias entries point to itself
|
||||
*/
|
||||
memset(amd_iommu_dev_table, 0, dev_table_size);
|
||||
for (i = 0; i < amd_iommu_last_bdf; ++i)
|
||||
amd_iommu_alias_table[i] = i;
|
||||
|
||||
memset(amd_iommu_pd_table, 0, rlookup_table_size);
|
||||
memset(amd_iommu_pd_alloc_bitmap, 0, MAX_DOMAIN_ID / 8);
|
||||
|
||||
/*
|
||||
* never allocate domain 0 because its used as the non-allocated and
|
||||
* error value placeholder
|
||||
*/
|
||||
amd_iommu_pd_alloc_bitmap[0] = 1;
|
||||
|
||||
/*
|
||||
* now the data structures are allocated and basically initialized
|
||||
* start the real acpi table scan
|
||||
*/
|
||||
ret = -ENODEV;
|
||||
if (acpi_table_parse("IVRS", init_iommu_all) != 0)
|
||||
goto free;
|
||||
|
||||
if (acpi_table_parse("IVRS", init_memory_definitions) != 0)
|
||||
goto free;
|
||||
|
||||
ret = amd_iommu_init_dma_ops();
|
||||
if (ret)
|
||||
goto free;
|
||||
|
||||
ret = sysdev_class_register(&amd_iommu_sysdev_class);
|
||||
if (ret)
|
||||
goto free;
|
||||
|
||||
ret = sysdev_register(&device_amd_iommu);
|
||||
if (ret)
|
||||
goto free;
|
||||
|
||||
enable_iommus();
|
||||
|
||||
printk(KERN_INFO "AMD IOMMU: aperture size is %d MB\n",
|
||||
(1 << (amd_iommu_aperture_order-20)));
|
||||
|
||||
printk(KERN_INFO "AMD IOMMU: device isolation ");
|
||||
if (amd_iommu_isolate)
|
||||
printk("enabled\n");
|
||||
else
|
||||
printk("disabled\n");
|
||||
|
||||
out:
|
||||
return ret;
|
||||
|
||||
free:
|
||||
if (amd_iommu_pd_alloc_bitmap)
|
||||
free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1);
|
||||
|
||||
if (amd_iommu_pd_table)
|
||||
free_pages((unsigned long)amd_iommu_pd_table,
|
||||
get_order(rlookup_table_size));
|
||||
|
||||
if (amd_iommu_rlookup_table)
|
||||
free_pages((unsigned long)amd_iommu_rlookup_table,
|
||||
get_order(rlookup_table_size));
|
||||
|
||||
if (amd_iommu_alias_table)
|
||||
free_pages((unsigned long)amd_iommu_alias_table,
|
||||
get_order(alias_table_size));
|
||||
|
||||
if (amd_iommu_dev_table)
|
||||
free_pages((unsigned long)amd_iommu_dev_table,
|
||||
get_order(dev_table_size));
|
||||
|
||||
free_iommu_all();
|
||||
|
||||
free_unity_maps();
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
static int __init early_amd_iommu_detect(struct acpi_table_header *table)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __init amd_iommu_detect(void)
|
||||
{
|
||||
if (swiotlb || no_iommu || iommu_detected)
|
||||
return;
|
||||
|
||||
if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
|
||||
iommu_detected = 1;
|
||||
amd_iommu_detected = 1;
|
||||
#ifdef CONFIG_GART_IOMMU
|
||||
gart_iommu_aperture_disabled = 1;
|
||||
gart_iommu_aperture = 0;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static int __init parse_amd_iommu_options(char *str)
|
||||
{
|
||||
for (; *str; ++str) {
|
||||
if (strcmp(str, "isolate") == 0)
|
||||
amd_iommu_isolate = 1;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int __init parse_amd_iommu_size_options(char *str)
|
||||
{
|
||||
for (; *str; ++str) {
|
||||
if (strcmp(str, "32M") == 0)
|
||||
amd_iommu_aperture_order = 25;
|
||||
if (strcmp(str, "64M") == 0)
|
||||
amd_iommu_aperture_order = 26;
|
||||
if (strcmp(str, "128M") == 0)
|
||||
amd_iommu_aperture_order = 27;
|
||||
if (strcmp(str, "256M") == 0)
|
||||
amd_iommu_aperture_order = 28;
|
||||
if (strcmp(str, "512M") == 0)
|
||||
amd_iommu_aperture_order = 29;
|
||||
if (strcmp(str, "1G") == 0)
|
||||
amd_iommu_aperture_order = 30;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("amd_iommu=", parse_amd_iommu_options);
|
||||
__setup("amd_iommu_size=", parse_amd_iommu_size_options);
|
|
@ -35,6 +35,18 @@ int fallback_aper_force __initdata;
|
|||
|
||||
int fix_aperture __initdata = 1;
|
||||
|
||||
struct bus_dev_range {
|
||||
int bus;
|
||||
int dev_base;
|
||||
int dev_limit;
|
||||
};
|
||||
|
||||
static struct bus_dev_range bus_dev_ranges[] __initdata = {
|
||||
{ 0x00, 0x18, 0x20},
|
||||
{ 0xff, 0x00, 0x20},
|
||||
{ 0xfe, 0x00, 0x20}
|
||||
};
|
||||
|
||||
static struct resource gart_resource = {
|
||||
.name = "GART",
|
||||
.flags = IORESOURCE_MEM,
|
||||
|
@ -55,8 +67,9 @@ static u32 __init allocate_aperture(void)
|
|||
u32 aper_size;
|
||||
void *p;
|
||||
|
||||
if (fallback_aper_order > 7)
|
||||
fallback_aper_order = 7;
|
||||
/* aper_size should <= 1G */
|
||||
if (fallback_aper_order > 5)
|
||||
fallback_aper_order = 5;
|
||||
aper_size = (32 * 1024 * 1024) << fallback_aper_order;
|
||||
|
||||
/*
|
||||
|
@ -65,7 +78,20 @@ static u32 __init allocate_aperture(void)
|
|||
* memory. Unfortunately we cannot move it up because that would
|
||||
* make the IOMMU useless.
|
||||
*/
|
||||
p = __alloc_bootmem_nopanic(aper_size, aper_size, 0);
|
||||
/*
|
||||
* using 512M as goal, in case kexec will load kernel_big
|
||||
* that will do the on position decompress, and could overlap with
|
||||
* that positon with gart that is used.
|
||||
* sequende:
|
||||
* kernel_small
|
||||
* ==> kexec (with kdump trigger path or previous doesn't shutdown gart)
|
||||
* ==> kernel_small(gart area become e820_reserved)
|
||||
* ==> kexec (with kdump trigger path or previous doesn't shutdown gart)
|
||||
* ==> kerne_big (uncompressed size will be big than 64M or 128M)
|
||||
* so don't use 512M below as gart iommu, leave the space for kernel
|
||||
* code for safe
|
||||
*/
|
||||
p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20);
|
||||
if (!p || __pa(p)+aper_size > 0xffffffff) {
|
||||
printk(KERN_ERR
|
||||
"Cannot allocate aperture memory hole (%p,%uK)\n",
|
||||
|
@ -83,69 +109,53 @@ static u32 __init allocate_aperture(void)
|
|||
return (u32)__pa(p);
|
||||
}
|
||||
|
||||
static int __init aperture_valid(u64 aper_base, u32 aper_size)
|
||||
{
|
||||
if (!aper_base)
|
||||
return 0;
|
||||
|
||||
if (aper_base + aper_size > 0x100000000UL) {
|
||||
printk(KERN_ERR "Aperture beyond 4GB. Ignoring.\n");
|
||||
return 0;
|
||||
}
|
||||
if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
|
||||
printk(KERN_ERR "Aperture pointing to e820 RAM. Ignoring.\n");
|
||||
return 0;
|
||||
}
|
||||
if (aper_size < 64*1024*1024) {
|
||||
printk(KERN_ERR "Aperture too small (%d MB)\n", aper_size>>20);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Find a PCI capability */
|
||||
static __u32 __init find_cap(int num, int slot, int func, int cap)
|
||||
static u32 __init find_cap(int bus, int slot, int func, int cap)
|
||||
{
|
||||
int bytes;
|
||||
u8 pos;
|
||||
|
||||
if (!(read_pci_config_16(num, slot, func, PCI_STATUS) &
|
||||
if (!(read_pci_config_16(bus, slot, func, PCI_STATUS) &
|
||||
PCI_STATUS_CAP_LIST))
|
||||
return 0;
|
||||
|
||||
pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST);
|
||||
pos = read_pci_config_byte(bus, slot, func, PCI_CAPABILITY_LIST);
|
||||
for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
|
||||
u8 id;
|
||||
|
||||
pos &= ~3;
|
||||
id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID);
|
||||
id = read_pci_config_byte(bus, slot, func, pos+PCI_CAP_LIST_ID);
|
||||
if (id == 0xff)
|
||||
break;
|
||||
if (id == cap)
|
||||
return pos;
|
||||
pos = read_pci_config_byte(num, slot, func,
|
||||
pos = read_pci_config_byte(bus, slot, func,
|
||||
pos+PCI_CAP_LIST_NEXT);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Read a standard AGPv3 bridge header */
|
||||
static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order)
|
||||
static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order)
|
||||
{
|
||||
u32 apsize;
|
||||
u32 apsizereg;
|
||||
int nbits;
|
||||
u32 aper_low, aper_hi;
|
||||
u64 aper;
|
||||
u32 old_order;
|
||||
|
||||
printk(KERN_INFO "AGP bridge at %02x:%02x:%02x\n", num, slot, func);
|
||||
apsizereg = read_pci_config_16(num, slot, func, cap + 0x14);
|
||||
printk(KERN_INFO "AGP bridge at %02x:%02x:%02x\n", bus, slot, func);
|
||||
apsizereg = read_pci_config_16(bus, slot, func, cap + 0x14);
|
||||
if (apsizereg == 0xffffffff) {
|
||||
printk(KERN_ERR "APSIZE in AGP bridge unreadable\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* old_order could be the value from NB gart setting */
|
||||
old_order = *order;
|
||||
|
||||
apsize = apsizereg & 0xfff;
|
||||
/* Some BIOS use weird encodings not in the AGPv3 table. */
|
||||
if (apsize & 0xff)
|
||||
|
@ -155,14 +165,26 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order)
|
|||
if ((int)*order < 0) /* < 32MB */
|
||||
*order = 0;
|
||||
|
||||
aper_low = read_pci_config(num, slot, func, 0x10);
|
||||
aper_hi = read_pci_config(num, slot, func, 0x14);
|
||||
aper_low = read_pci_config(bus, slot, func, 0x10);
|
||||
aper_hi = read_pci_config(bus, slot, func, 0x14);
|
||||
aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32);
|
||||
|
||||
/*
|
||||
* On some sick chips, APSIZE is 0. It means it wants 4G
|
||||
* so let double check that order, and lets trust AMD NB settings:
|
||||
*/
|
||||
printk(KERN_INFO "Aperture from AGP @ %Lx old size %u MB\n",
|
||||
aper, 32 << old_order);
|
||||
if (aper + (32ULL<<(20 + *order)) > 0x100000000ULL) {
|
||||
printk(KERN_INFO "Aperture size %u MB (APSIZE %x) is not right, using settings from NB\n",
|
||||
32 << *order, apsizereg);
|
||||
*order = old_order;
|
||||
}
|
||||
|
||||
printk(KERN_INFO "Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n",
|
||||
aper, 32 << *order, apsizereg);
|
||||
|
||||
if (!aperture_valid(aper, (32*1024*1024) << *order))
|
||||
if (!aperture_valid(aper, (32*1024*1024) << *order, 32<<20))
|
||||
return 0;
|
||||
return (u32)aper;
|
||||
}
|
||||
|
@ -180,17 +202,17 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order)
|
|||
* the AGP bridges should be always an own bus on the HT hierarchy,
|
||||
* but do it here for future safety.
|
||||
*/
|
||||
static __u32 __init search_agp_bridge(u32 *order, int *valid_agp)
|
||||
static u32 __init search_agp_bridge(u32 *order, int *valid_agp)
|
||||
{
|
||||
int num, slot, func;
|
||||
int bus, slot, func;
|
||||
|
||||
/* Poor man's PCI discovery */
|
||||
for (num = 0; num < 256; num++) {
|
||||
for (bus = 0; bus < 256; bus++) {
|
||||
for (slot = 0; slot < 32; slot++) {
|
||||
for (func = 0; func < 8; func++) {
|
||||
u32 class, cap;
|
||||
u8 type;
|
||||
class = read_pci_config(num, slot, func,
|
||||
class = read_pci_config(bus, slot, func,
|
||||
PCI_CLASS_REVISION);
|
||||
if (class == 0xffffffff)
|
||||
break;
|
||||
|
@ -199,17 +221,17 @@ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp)
|
|||
case PCI_CLASS_BRIDGE_HOST:
|
||||
case PCI_CLASS_BRIDGE_OTHER: /* needed? */
|
||||
/* AGP bridge? */
|
||||
cap = find_cap(num, slot, func,
|
||||
cap = find_cap(bus, slot, func,
|
||||
PCI_CAP_ID_AGP);
|
||||
if (!cap)
|
||||
break;
|
||||
*valid_agp = 1;
|
||||
return read_agp(num, slot, func, cap,
|
||||
return read_agp(bus, slot, func, cap,
|
||||
order);
|
||||
}
|
||||
|
||||
/* No multi-function device? */
|
||||
type = read_pci_config_byte(num, slot, func,
|
||||
type = read_pci_config_byte(bus, slot, func,
|
||||
PCI_HEADER_TYPE);
|
||||
if (!(type & 0x80))
|
||||
break;
|
||||
|
@ -249,36 +271,50 @@ void __init early_gart_iommu_check(void)
|
|||
* or BIOS forget to put that in reserved.
|
||||
* try to update e820 to make that region as reserved.
|
||||
*/
|
||||
int fix, num;
|
||||
int i, fix, slot;
|
||||
u32 ctl;
|
||||
u32 aper_size = 0, aper_order = 0, last_aper_order = 0;
|
||||
u64 aper_base = 0, last_aper_base = 0;
|
||||
int aper_enabled = 0, last_aper_enabled = 0;
|
||||
int aper_enabled = 0, last_aper_enabled = 0, last_valid = 0;
|
||||
|
||||
if (!early_pci_allowed())
|
||||
return;
|
||||
|
||||
/* This is mostly duplicate of iommu_hole_init */
|
||||
fix = 0;
|
||||
for (num = 24; num < 32; num++) {
|
||||
if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
|
||||
continue;
|
||||
for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
|
||||
int bus;
|
||||
int dev_base, dev_limit;
|
||||
|
||||
ctl = read_pci_config(0, num, 3, 0x90);
|
||||
aper_enabled = ctl & 1;
|
||||
aper_order = (ctl >> 1) & 7;
|
||||
aper_size = (32 * 1024 * 1024) << aper_order;
|
||||
aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff;
|
||||
aper_base <<= 25;
|
||||
bus = bus_dev_ranges[i].bus;
|
||||
dev_base = bus_dev_ranges[i].dev_base;
|
||||
dev_limit = bus_dev_ranges[i].dev_limit;
|
||||
|
||||
if ((last_aper_order && aper_order != last_aper_order) ||
|
||||
(last_aper_base && aper_base != last_aper_base) ||
|
||||
(last_aper_enabled && aper_enabled != last_aper_enabled)) {
|
||||
fix = 1;
|
||||
break;
|
||||
for (slot = dev_base; slot < dev_limit; slot++) {
|
||||
if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
|
||||
continue;
|
||||
|
||||
ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
|
||||
aper_enabled = ctl & AMD64_GARTEN;
|
||||
aper_order = (ctl >> 1) & 7;
|
||||
aper_size = (32 * 1024 * 1024) << aper_order;
|
||||
aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff;
|
||||
aper_base <<= 25;
|
||||
|
||||
if (last_valid) {
|
||||
if ((aper_order != last_aper_order) ||
|
||||
(aper_base != last_aper_base) ||
|
||||
(aper_enabled != last_aper_enabled)) {
|
||||
fix = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
last_aper_order = aper_order;
|
||||
last_aper_base = aper_base;
|
||||
last_aper_enabled = aper_enabled;
|
||||
last_valid = 1;
|
||||
}
|
||||
last_aper_order = aper_order;
|
||||
last_aper_base = aper_base;
|
||||
last_aper_enabled = aper_enabled;
|
||||
}
|
||||
|
||||
if (!fix && !aper_enabled)
|
||||
|
@ -290,32 +326,46 @@ void __init early_gart_iommu_check(void)
|
|||
if (gart_fix_e820 && !fix && aper_enabled) {
|
||||
if (e820_any_mapped(aper_base, aper_base + aper_size,
|
||||
E820_RAM)) {
|
||||
/* reserved it, so we can resuse it in second kernel */
|
||||
/* reserve it, so we can reuse it in second kernel */
|
||||
printk(KERN_INFO "update e820 for GART\n");
|
||||
add_memory_region(aper_base, aper_size, E820_RESERVED);
|
||||
e820_add_region(aper_base, aper_size, E820_RESERVED);
|
||||
update_e820();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* different nodes have different setting, disable them all at first*/
|
||||
for (num = 24; num < 32; num++) {
|
||||
if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
|
||||
continue;
|
||||
if (!fix)
|
||||
return;
|
||||
|
||||
ctl = read_pci_config(0, num, 3, 0x90);
|
||||
ctl &= ~1;
|
||||
write_pci_config(0, num, 3, 0x90, ctl);
|
||||
/* different nodes have different setting, disable them all at first*/
|
||||
for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
|
||||
int bus;
|
||||
int dev_base, dev_limit;
|
||||
|
||||
bus = bus_dev_ranges[i].bus;
|
||||
dev_base = bus_dev_ranges[i].dev_base;
|
||||
dev_limit = bus_dev_ranges[i].dev_limit;
|
||||
|
||||
for (slot = dev_base; slot < dev_limit; slot++) {
|
||||
if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
|
||||
continue;
|
||||
|
||||
ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
|
||||
ctl &= ~AMD64_GARTEN;
|
||||
write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static int __initdata printed_gart_size_msg;
|
||||
|
||||
void __init gart_iommu_hole_init(void)
|
||||
{
|
||||
u32 agp_aper_base = 0, agp_aper_order = 0;
|
||||
u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0;
|
||||
u64 aper_base, last_aper_base = 0;
|
||||
int fix, num, valid_agp = 0;
|
||||
int node;
|
||||
int fix, slot, valid_agp = 0;
|
||||
int i, node;
|
||||
|
||||
if (gart_iommu_aperture_disabled || !fix_aperture ||
|
||||
!early_pci_allowed())
|
||||
|
@ -323,38 +373,65 @@ void __init gart_iommu_hole_init(void)
|
|||
|
||||
printk(KERN_INFO "Checking aperture...\n");
|
||||
|
||||
if (!fallback_aper_force)
|
||||
agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp);
|
||||
|
||||
fix = 0;
|
||||
node = 0;
|
||||
for (num = 24; num < 32; num++) {
|
||||
if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
|
||||
continue;
|
||||
for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
|
||||
int bus;
|
||||
int dev_base, dev_limit;
|
||||
|
||||
iommu_detected = 1;
|
||||
gart_iommu_aperture = 1;
|
||||
bus = bus_dev_ranges[i].bus;
|
||||
dev_base = bus_dev_ranges[i].dev_base;
|
||||
dev_limit = bus_dev_ranges[i].dev_limit;
|
||||
|
||||
aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7;
|
||||
aper_size = (32 * 1024 * 1024) << aper_order;
|
||||
aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff;
|
||||
aper_base <<= 25;
|
||||
for (slot = dev_base; slot < dev_limit; slot++) {
|
||||
if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
|
||||
continue;
|
||||
|
||||
printk(KERN_INFO "Node %d: aperture @ %Lx size %u MB\n",
|
||||
node, aper_base, aper_size >> 20);
|
||||
node++;
|
||||
iommu_detected = 1;
|
||||
gart_iommu_aperture = 1;
|
||||
|
||||
if (!aperture_valid(aper_base, aper_size)) {
|
||||
fix = 1;
|
||||
break;
|
||||
aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7;
|
||||
aper_size = (32 * 1024 * 1024) << aper_order;
|
||||
aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff;
|
||||
aper_base <<= 25;
|
||||
|
||||
printk(KERN_INFO "Node %d: aperture @ %Lx size %u MB\n",
|
||||
node, aper_base, aper_size >> 20);
|
||||
node++;
|
||||
|
||||
if (!aperture_valid(aper_base, aper_size, 64<<20)) {
|
||||
if (valid_agp && agp_aper_base &&
|
||||
agp_aper_base == aper_base &&
|
||||
agp_aper_order == aper_order) {
|
||||
/* the same between two setting from NB and agp */
|
||||
if (!no_iommu &&
|
||||
max_pfn > MAX_DMA32_PFN &&
|
||||
!printed_gart_size_msg) {
|
||||
printk(KERN_ERR "you are using iommu with agp, but GART size is less than 64M\n");
|
||||
printk(KERN_ERR "please increase GART size in your BIOS setup\n");
|
||||
printk(KERN_ERR "if BIOS doesn't have that option, contact your HW vendor!\n");
|
||||
printed_gart_size_msg = 1;
|
||||
}
|
||||
} else {
|
||||
fix = 1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if ((last_aper_order && aper_order != last_aper_order) ||
|
||||
(last_aper_base && aper_base != last_aper_base)) {
|
||||
fix = 1;
|
||||
goto out;
|
||||
}
|
||||
last_aper_order = aper_order;
|
||||
last_aper_base = aper_base;
|
||||
}
|
||||
|
||||
if ((last_aper_order && aper_order != last_aper_order) ||
|
||||
(last_aper_base && aper_base != last_aper_base)) {
|
||||
fix = 1;
|
||||
break;
|
||||
}
|
||||
last_aper_order = aper_order;
|
||||
last_aper_base = aper_base;
|
||||
}
|
||||
|
||||
out:
|
||||
if (!fix && !fallback_aper_force) {
|
||||
if (last_aper_base) {
|
||||
unsigned long n = (32 * 1024 * 1024) << last_aper_order;
|
||||
|
@ -364,14 +441,16 @@ void __init gart_iommu_hole_init(void)
|
|||
return;
|
||||
}
|
||||
|
||||
if (!fallback_aper_force)
|
||||
aper_alloc = search_agp_bridge(&aper_order, &valid_agp);
|
||||
if (!fallback_aper_force) {
|
||||
aper_alloc = agp_aper_base;
|
||||
aper_order = agp_aper_order;
|
||||
}
|
||||
|
||||
if (aper_alloc) {
|
||||
/* Got the aperture from the AGP bridge */
|
||||
} else if (swiotlb && !valid_agp) {
|
||||
/* Do nothing */
|
||||
} else if ((!no_iommu && end_pfn > MAX_DMA32_PFN) ||
|
||||
} else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) ||
|
||||
force_iommu ||
|
||||
valid_agp ||
|
||||
fallback_aper_force) {
|
||||
|
@ -401,16 +480,24 @@ void __init gart_iommu_hole_init(void)
|
|||
}
|
||||
|
||||
/* Fix up the north bridges */
|
||||
for (num = 24; num < 32; num++) {
|
||||
if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
|
||||
continue;
|
||||
for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
|
||||
int bus;
|
||||
int dev_base, dev_limit;
|
||||
|
||||
/*
|
||||
* Don't enable translation yet. That is done later.
|
||||
* Assume this BIOS didn't initialise the GART so
|
||||
* just overwrite all previous bits
|
||||
*/
|
||||
write_pci_config(0, num, 3, 0x90, aper_order<<1);
|
||||
write_pci_config(0, num, 3, 0x94, aper_alloc>>25);
|
||||
bus = bus_dev_ranges[i].bus;
|
||||
dev_base = bus_dev_ranges[i].dev_base;
|
||||
dev_limit = bus_dev_ranges[i].dev_limit;
|
||||
for (slot = dev_base; slot < dev_limit; slot++) {
|
||||
if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
|
||||
continue;
|
||||
|
||||
/* Don't enable translation yet. That is done later.
|
||||
Assume this BIOS didn't initialise the GART so
|
||||
just overwrite all previous bits */
|
||||
write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, aper_order << 1);
|
||||
write_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE, aper_alloc >> 25);
|
||||
}
|
||||
}
|
||||
|
||||
set_up_gart_resume(aper_order, aper_alloc);
|
||||
}
|
||||
|
|
|
@ -52,30 +52,41 @@
|
|||
|
||||
unsigned long mp_lapic_addr;
|
||||
|
||||
DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
|
||||
EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
|
||||
|
||||
/*
|
||||
* Knob to control our willingness to enable the local APIC.
|
||||
*
|
||||
* -1=force-disable, +1=force-enable
|
||||
* +1=force-enable
|
||||
*/
|
||||
static int enable_local_apic __initdata;
|
||||
static int force_enable_local_apic;
|
||||
int disable_apic;
|
||||
|
||||
/* Local APIC timer verification ok */
|
||||
static int local_apic_timer_verify_ok;
|
||||
/* Disable local APIC timer from the kernel commandline or via dmi quirk
|
||||
or using CPU MSR check */
|
||||
int local_apic_timer_disabled;
|
||||
/* Disable local APIC timer from the kernel commandline or via dmi quirk */
|
||||
static int local_apic_timer_disabled;
|
||||
/* Local APIC timer works in C2 */
|
||||
int local_apic_timer_c2_ok;
|
||||
EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
|
||||
|
||||
int first_system_vector = 0xfe;
|
||||
|
||||
char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
|
||||
|
||||
/*
|
||||
* Debug level, exported for io_apic.c
|
||||
*/
|
||||
int apic_verbosity;
|
||||
|
||||
int pic_mode;
|
||||
|
||||
/* Have we found an MP table */
|
||||
int smp_found_config;
|
||||
|
||||
static struct resource lapic_resource = {
|
||||
.name = "Local APIC",
|
||||
.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
|
||||
};
|
||||
|
||||
static unsigned int calibration_result;
|
||||
|
||||
static int lapic_next_event(unsigned long delta,
|
||||
|
@ -545,7 +556,7 @@ void __init setup_boot_APIC_clock(void)
|
|||
lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
|
||||
else
|
||||
printk(KERN_WARNING "APIC timer registered as dummy,"
|
||||
" due to nmi_watchdog=1!\n");
|
||||
" due to nmi_watchdog=%d!\n", nmi_watchdog);
|
||||
}
|
||||
|
||||
/* Setup the lapic or request the broadcast */
|
||||
|
@ -963,7 +974,7 @@ void __cpuinit setup_local_APIC(void)
|
|||
* Double-check whether this APIC is really registered.
|
||||
*/
|
||||
if (!apic_id_registered())
|
||||
BUG();
|
||||
WARN_ON_ONCE(1);
|
||||
|
||||
/*
|
||||
* Intel recommends to set DFR, LDR and TPR before enabling
|
||||
|
@ -1094,7 +1105,7 @@ static int __init detect_init_APIC(void)
|
|||
u32 h, l, features;
|
||||
|
||||
/* Disabled by kernel option? */
|
||||
if (enable_local_apic < 0)
|
||||
if (disable_apic)
|
||||
return -1;
|
||||
|
||||
switch (boot_cpu_data.x86_vendor) {
|
||||
|
@ -1117,7 +1128,7 @@ static int __init detect_init_APIC(void)
|
|||
* Over-ride BIOS and try to enable the local APIC only if
|
||||
* "lapic" specified.
|
||||
*/
|
||||
if (enable_local_apic <= 0) {
|
||||
if (!force_enable_local_apic) {
|
||||
printk(KERN_INFO "Local APIC disabled by BIOS -- "
|
||||
"you can enable it with \"lapic\"\n");
|
||||
return -1;
|
||||
|
@ -1154,9 +1165,6 @@ static int __init detect_init_APIC(void)
|
|||
if (l & MSR_IA32_APICBASE_ENABLE)
|
||||
mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
|
||||
|
||||
if (nmi_watchdog != NMI_NONE && nmi_watchdog != NMI_DISABLED)
|
||||
nmi_watchdog = NMI_LOCAL_APIC;
|
||||
|
||||
printk(KERN_INFO "Found and enabled local APIC!\n");
|
||||
|
||||
apic_pm_activate();
|
||||
|
@ -1195,36 +1203,6 @@ void __init init_apic_mappings(void)
|
|||
if (boot_cpu_physical_apicid == -1U)
|
||||
boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
|
||||
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
{
|
||||
unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nr_ioapics; i++) {
|
||||
if (smp_found_config) {
|
||||
ioapic_phys = mp_ioapics[i].mpc_apicaddr;
|
||||
if (!ioapic_phys) {
|
||||
printk(KERN_ERR
|
||||
"WARNING: bogus zero IO-APIC "
|
||||
"address found in MPTABLE, "
|
||||
"disabling IO/APIC support!\n");
|
||||
smp_found_config = 0;
|
||||
skip_ioapic_setup = 1;
|
||||
goto fake_ioapic_page;
|
||||
}
|
||||
} else {
|
||||
fake_ioapic_page:
|
||||
ioapic_phys = (unsigned long)
|
||||
alloc_bootmem_pages(PAGE_SIZE);
|
||||
ioapic_phys = __pa(ioapic_phys);
|
||||
}
|
||||
set_fixmap_nocache(idx, ioapic_phys);
|
||||
printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n",
|
||||
__fix_to_virt(idx), ioapic_phys);
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1236,7 +1214,7 @@ int apic_version[MAX_APICS];
|
|||
|
||||
int __init APIC_init_uniprocessor(void)
|
||||
{
|
||||
if (enable_local_apic < 0)
|
||||
if (disable_apic)
|
||||
clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
|
||||
|
||||
if (!smp_found_config && !cpu_has_apic)
|
||||
|
@ -1265,10 +1243,14 @@ int __init APIC_init_uniprocessor(void)
|
|||
#ifdef CONFIG_CRASH_DUMP
|
||||
boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
|
||||
#endif
|
||||
phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
|
||||
physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
|
||||
|
||||
setup_local_APIC();
|
||||
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
|
||||
#endif
|
||||
localise_nmi_watchdog();
|
||||
end_local_APIC_setup();
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
if (smp_found_config)
|
||||
|
@ -1351,13 +1333,13 @@ void __init smp_intr_init(void)
|
|||
* The reschedule interrupt is a CPU-to-CPU reschedule-helper
|
||||
* IPI, driven by wakeup.
|
||||
*/
|
||||
set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
|
||||
alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
|
||||
|
||||
/* IPI for invalidation */
|
||||
set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
|
||||
alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
|
||||
|
||||
/* IPI for generic function call */
|
||||
set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
|
||||
alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -1370,15 +1352,15 @@ void __init apic_intr_init(void)
|
|||
smp_intr_init();
|
||||
#endif
|
||||
/* self generated IPI for local APIC timer */
|
||||
set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
|
||||
alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
|
||||
|
||||
/* IPI vectors for APIC spurious and error interrupts */
|
||||
set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
|
||||
set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
|
||||
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
|
||||
alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
|
||||
|
||||
/* thermal monitor LVT interrupt */
|
||||
#ifdef CONFIG_X86_MCE_P4THERMAL
|
||||
set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
|
||||
alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -1513,6 +1495,9 @@ void __cpuinit generic_processor_info(int apicid, int version)
|
|||
*/
|
||||
cpu = 0;
|
||||
|
||||
if (apicid > max_physical_apicid)
|
||||
max_physical_apicid = apicid;
|
||||
|
||||
/*
|
||||
* Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
|
||||
* but we need to work other dependencies like SMP_SUSPEND etc
|
||||
|
@ -1520,7 +1505,7 @@ void __cpuinit generic_processor_info(int apicid, int version)
|
|||
* if (CPU_HOTPLUG_ENABLED || num_processors > 8)
|
||||
* - Ashok Raj <ashok.raj@intel.com>
|
||||
*/
|
||||
if (num_processors > 8) {
|
||||
if (max_physical_apicid >= 8) {
|
||||
switch (boot_cpu_data.x86_vendor) {
|
||||
case X86_VENDOR_INTEL:
|
||||
if (!APIC_XAPIC(version)) {
|
||||
|
@ -1534,9 +1519,9 @@ void __cpuinit generic_processor_info(int apicid, int version)
|
|||
}
|
||||
#ifdef CONFIG_SMP
|
||||
/* are we being called early in kernel startup? */
|
||||
if (x86_cpu_to_apicid_early_ptr) {
|
||||
u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
|
||||
u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
|
||||
if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
|
||||
u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
|
||||
u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
|
||||
|
||||
cpu_to_apicid[cpu] = apicid;
|
||||
bios_cpu_apicid[cpu] = apicid;
|
||||
|
@ -1703,14 +1688,14 @@ static void apic_pm_activate(void) { }
|
|||
*/
|
||||
static int __init parse_lapic(char *arg)
|
||||
{
|
||||
enable_local_apic = 1;
|
||||
force_enable_local_apic = 1;
|
||||
return 0;
|
||||
}
|
||||
early_param("lapic", parse_lapic);
|
||||
|
||||
static int __init parse_nolapic(char *arg)
|
||||
{
|
||||
enable_local_apic = -1;
|
||||
disable_apic = 1;
|
||||
clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
|
||||
return 0;
|
||||
}
|
||||
|
@ -1740,3 +1725,21 @@ static int __init apic_set_verbosity(char *str)
|
|||
}
|
||||
__setup("apic=", apic_set_verbosity);
|
||||
|
||||
static int __init lapic_insert_resource(void)
|
||||
{
|
||||
if (!apic_phys)
|
||||
return -1;
|
||||
|
||||
/* Put local APIC into the resource map. */
|
||||
lapic_resource.start = apic_phys;
|
||||
lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
|
||||
insert_resource(&iomem_resource, &lapic_resource);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* need call insert after e820_reserve_resources()
|
||||
* that is using request_resource
|
||||
*/
|
||||
late_initcall(lapic_insert_resource);
|
||||
|
|
|
@ -43,7 +43,7 @@
|
|||
#include <mach_ipi.h>
|
||||
#include <mach_apic.h>
|
||||
|
||||
int disable_apic_timer __cpuinitdata;
|
||||
static int disable_apic_timer __cpuinitdata;
|
||||
static int apic_calibrate_pmtmr __initdata;
|
||||
int disable_apic;
|
||||
|
||||
|
@ -56,6 +56,9 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
|
|||
*/
|
||||
int apic_verbosity;
|
||||
|
||||
/* Have we found an MP table */
|
||||
int smp_found_config;
|
||||
|
||||
static struct resource lapic_resource = {
|
||||
.name = "Local APIC",
|
||||
.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
|
||||
|
@ -87,9 +90,6 @@ static unsigned long apic_phys;
|
|||
|
||||
unsigned long mp_lapic_addr;
|
||||
|
||||
DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
|
||||
EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
|
||||
|
||||
unsigned int __cpuinitdata maxcpus = NR_CPUS;
|
||||
/*
|
||||
* Get the LAPIC version
|
||||
|
@ -417,37 +417,13 @@ void __init setup_boot_APIC_clock(void)
|
|||
lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
|
||||
else
|
||||
printk(KERN_WARNING "APIC timer registered as dummy,"
|
||||
" due to nmi_watchdog=1!\n");
|
||||
" due to nmi_watchdog=%d!\n", nmi_watchdog);
|
||||
|
||||
setup_APIC_timer();
|
||||
}
|
||||
|
||||
/*
|
||||
* AMD C1E enabled CPUs have a real nasty problem: Some BIOSes set the
|
||||
* C1E flag only in the secondary CPU, so when we detect the wreckage
|
||||
* we already have enabled the boot CPU local apic timer. Check, if
|
||||
* disable_apic_timer is set and the DUMMY flag is cleared. If yes,
|
||||
* set the DUMMY flag again and force the broadcast mode in the
|
||||
* clockevents layer.
|
||||
*/
|
||||
static void __cpuinit check_boot_apic_timer_broadcast(void)
|
||||
{
|
||||
if (!disable_apic_timer ||
|
||||
(lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY))
|
||||
return;
|
||||
|
||||
printk(KERN_INFO "AMD C1E detected late. Force timer broadcast.\n");
|
||||
lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY;
|
||||
|
||||
local_irq_enable();
|
||||
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
|
||||
&boot_cpu_physical_apicid);
|
||||
local_irq_disable();
|
||||
}
|
||||
|
||||
void __cpuinit setup_secondary_APIC_clock(void)
|
||||
{
|
||||
check_boot_apic_timer_broadcast();
|
||||
setup_APIC_timer();
|
||||
}
|
||||
|
||||
|
@ -850,7 +826,6 @@ static void __cpuinit lapic_setup_esr(void)
|
|||
void __cpuinit end_local_APIC_setup(void)
|
||||
{
|
||||
lapic_setup_esr();
|
||||
nmi_watchdog_default();
|
||||
setup_apic_nmi_watchdog(NULL);
|
||||
apic_pm_activate();
|
||||
}
|
||||
|
@ -875,7 +850,7 @@ static int __init detect_init_APIC(void)
|
|||
|
||||
void __init early_init_lapic_mapping(void)
|
||||
{
|
||||
unsigned long apic_phys;
|
||||
unsigned long phys_addr;
|
||||
|
||||
/*
|
||||
* If no local APIC can be found then go out
|
||||
|
@ -884,11 +859,11 @@ void __init early_init_lapic_mapping(void)
|
|||
if (!smp_found_config)
|
||||
return;
|
||||
|
||||
apic_phys = mp_lapic_addr;
|
||||
phys_addr = mp_lapic_addr;
|
||||
|
||||
set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
|
||||
set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
|
||||
apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
|
||||
APIC_BASE, apic_phys);
|
||||
APIC_BASE, phys_addr);
|
||||
|
||||
/*
|
||||
* Fetch the APIC ID of the BSP in case we have a
|
||||
|
@ -942,7 +917,9 @@ int __init APIC_init_uniprocessor(void)
|
|||
|
||||
verify_local_APIC();
|
||||
|
||||
phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
|
||||
connect_bsp_APIC();
|
||||
|
||||
physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
|
||||
apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
|
||||
|
||||
setup_local_APIC();
|
||||
|
@ -954,6 +931,8 @@ int __init APIC_init_uniprocessor(void)
|
|||
if (!skip_ioapic_setup && nr_ioapics)
|
||||
enable_IO_APIC();
|
||||
|
||||
if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
|
||||
localise_nmi_watchdog();
|
||||
end_local_APIC_setup();
|
||||
|
||||
if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
|
||||
|
@ -1021,6 +1000,14 @@ asmlinkage void smp_error_interrupt(void)
|
|||
irq_exit();
|
||||
}
|
||||
|
||||
/**
|
||||
* * connect_bsp_APIC - attach the APIC to the interrupt system
|
||||
* */
|
||||
void __init connect_bsp_APIC(void)
|
||||
{
|
||||
enable_apic_mode();
|
||||
}
|
||||
|
||||
void disconnect_bsp_APIC(int virt_wire_setup)
|
||||
{
|
||||
/* Go back to Virtual Wire compatibility mode */
|
||||
|
@ -1090,10 +1077,13 @@ void __cpuinit generic_processor_info(int apicid, int version)
|
|||
*/
|
||||
cpu = 0;
|
||||
}
|
||||
if (apicid > max_physical_apicid)
|
||||
max_physical_apicid = apicid;
|
||||
|
||||
/* are we being called early in kernel startup? */
|
||||
if (x86_cpu_to_apicid_early_ptr) {
|
||||
u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
|
||||
u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
|
||||
if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
|
||||
u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
|
||||
u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
|
||||
|
||||
cpu_to_apicid[cpu] = apicid;
|
||||
bios_cpu_apicid[cpu] = apicid;
|
||||
|
@ -1269,7 +1259,7 @@ __cpuinit int apic_is_clustered_box(void)
|
|||
if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
|
||||
return 0;
|
||||
|
||||
bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
|
||||
bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
|
||||
bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
|
||||
|
||||
for (i = 0; i < NR_CPUS; i++) {
|
||||
|
|
|
@ -228,6 +228,7 @@
|
|||
#include <linux/suspend.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/smp_lock.h>
|
||||
|
||||
#include <asm/system.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
@ -1149,7 +1150,7 @@ static void queue_event(apm_event_t event, struct apm_user *sender)
|
|||
as->event_tail = 0;
|
||||
}
|
||||
as->events[as->event_head] = event;
|
||||
if ((!as->suser) || (!as->writer))
|
||||
if (!as->suser || !as->writer)
|
||||
continue;
|
||||
switch (event) {
|
||||
case APM_SYS_SUSPEND:
|
||||
|
@ -1396,7 +1397,7 @@ static void apm_mainloop(void)
|
|||
|
||||
static int check_apm_user(struct apm_user *as, const char *func)
|
||||
{
|
||||
if ((as == NULL) || (as->magic != APM_BIOS_MAGIC)) {
|
||||
if (as == NULL || as->magic != APM_BIOS_MAGIC) {
|
||||
printk(KERN_ERR "apm: %s passed bad filp\n", func);
|
||||
return 1;
|
||||
}
|
||||
|
@ -1459,18 +1460,19 @@ static unsigned int do_poll(struct file *fp, poll_table *wait)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int do_ioctl(struct inode *inode, struct file *filp,
|
||||
u_int cmd, u_long arg)
|
||||
static long do_ioctl(struct file *filp, u_int cmd, u_long arg)
|
||||
{
|
||||
struct apm_user *as;
|
||||
int ret;
|
||||
|
||||
as = filp->private_data;
|
||||
if (check_apm_user(as, "ioctl"))
|
||||
return -EIO;
|
||||
if ((!as->suser) || (!as->writer))
|
||||
if (!as->suser || !as->writer)
|
||||
return -EPERM;
|
||||
switch (cmd) {
|
||||
case APM_IOC_STANDBY:
|
||||
lock_kernel();
|
||||
if (as->standbys_read > 0) {
|
||||
as->standbys_read--;
|
||||
as->standbys_pending--;
|
||||
|
@ -1479,8 +1481,10 @@ static int do_ioctl(struct inode *inode, struct file *filp,
|
|||
queue_event(APM_USER_STANDBY, as);
|
||||
if (standbys_pending <= 0)
|
||||
standby();
|
||||
unlock_kernel();
|
||||
break;
|
||||
case APM_IOC_SUSPEND:
|
||||
lock_kernel();
|
||||
if (as->suspends_read > 0) {
|
||||
as->suspends_read--;
|
||||
as->suspends_pending--;
|
||||
|
@ -1488,16 +1492,17 @@ static int do_ioctl(struct inode *inode, struct file *filp,
|
|||
} else
|
||||
queue_event(APM_USER_SUSPEND, as);
|
||||
if (suspends_pending <= 0) {
|
||||
return suspend(1);
|
||||
ret = suspend(1);
|
||||
} else {
|
||||
as->suspend_wait = 1;
|
||||
wait_event_interruptible(apm_suspend_waitqueue,
|
||||
as->suspend_wait == 0);
|
||||
return as->suspend_result;
|
||||
ret = as->suspend_result;
|
||||
}
|
||||
break;
|
||||
unlock_kernel();
|
||||
return ret;
|
||||
default:
|
||||
return -EINVAL;
|
||||
return -ENOTTY;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -1860,7 +1865,7 @@ static const struct file_operations apm_bios_fops = {
|
|||
.owner = THIS_MODULE,
|
||||
.read = do_read,
|
||||
.poll = do_poll,
|
||||
.ioctl = do_ioctl,
|
||||
.unlocked_ioctl = do_ioctl,
|
||||
.open = do_open,
|
||||
.release = do_release,
|
||||
};
|
||||
|
|
|
@ -111,7 +111,7 @@ void foo(void)
|
|||
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
|
||||
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
|
||||
OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
|
||||
OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret);
|
||||
OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
|
||||
OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
|
||||
#endif
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@ int main(void)
|
|||
ENTRY(pid);
|
||||
BLANK();
|
||||
#undef ENTRY
|
||||
#define ENTRY(entry) DEFINE(threadinfo_ ## entry, offsetof(struct thread_info, entry))
|
||||
#define ENTRY(entry) DEFINE(TI_ ## entry, offsetof(struct thread_info, entry))
|
||||
ENTRY(flags);
|
||||
ENTRY(addr_limit);
|
||||
ENTRY(preempt_count);
|
||||
|
@ -61,8 +61,11 @@ int main(void)
|
|||
OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
|
||||
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
|
||||
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
|
||||
OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
|
||||
OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
|
||||
OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret);
|
||||
OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32);
|
||||
OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
|
||||
OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
|
||||
OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
|
||||
OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
|
||||
#endif
|
||||
|
|
|
@ -6,11 +6,15 @@ obj-y := intel_cacheinfo.o addon_cpuid_features.o
|
|||
obj-y += proc.o feature_names.o
|
||||
|
||||
obj-$(CONFIG_X86_32) += common.o bugs.o
|
||||
obj-$(CONFIG_X86_64) += common_64.o bugs_64.o
|
||||
obj-$(CONFIG_X86_32) += amd.o
|
||||
obj-$(CONFIG_X86_64) += amd_64.o
|
||||
obj-$(CONFIG_X86_32) += cyrix.o
|
||||
obj-$(CONFIG_X86_32) += centaur.o
|
||||
obj-$(CONFIG_X86_64) += centaur_64.o
|
||||
obj-$(CONFIG_X86_32) += transmeta.o
|
||||
obj-$(CONFIG_X86_32) += intel.o
|
||||
obj-$(CONFIG_X86_64) += intel_64.o
|
||||
obj-$(CONFIG_X86_32) += umc.o
|
||||
|
||||
obj-$(CONFIG_X86_MCE) += mcheck/
|
||||
|
|
|
@ -1,9 +1,7 @@
|
|||
|
||||
/*
|
||||
* Routines to indentify additional cpu features that are scattered in
|
||||
* cpuid space.
|
||||
*/
|
||||
|
||||
#include <linux/cpu.h>
|
||||
|
||||
#include <asm/pat.h>
|
||||
|
@ -53,19 +51,20 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
|
|||
#ifdef CONFIG_X86_PAT
|
||||
void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (!cpu_has_pat)
|
||||
pat_disable("PAT not supported by CPU.");
|
||||
|
||||
switch (c->x86_vendor) {
|
||||
case X86_VENDOR_AMD:
|
||||
if (c->x86 >= 0xf && c->x86 <= 0x11)
|
||||
return;
|
||||
break;
|
||||
case X86_VENDOR_INTEL:
|
||||
if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15))
|
||||
return;
|
||||
break;
|
||||
case X86_VENDOR_AMD:
|
||||
case X86_VENDOR_CENTAUR:
|
||||
case X86_VENDOR_TRANSMETA:
|
||||
return;
|
||||
}
|
||||
|
||||
pat_disable(cpu_has_pat ?
|
||||
"PAT disabled. Not yet verified on this CPU type." :
|
||||
"PAT not supported by CPU.");
|
||||
pat_disable("PAT disabled. Not yet verified on this CPU type.");
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -24,43 +24,6 @@
|
|||
extern void vide(void);
|
||||
__asm__(".align 4\nvide: ret");
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
#define ENABLE_C1E_MASK 0x18000000
|
||||
#define CPUID_PROCESSOR_SIGNATURE 1
|
||||
#define CPUID_XFAM 0x0ff00000
|
||||
#define CPUID_XFAM_K8 0x00000000
|
||||
#define CPUID_XFAM_10H 0x00100000
|
||||
#define CPUID_XFAM_11H 0x00200000
|
||||
#define CPUID_XMOD 0x000f0000
|
||||
#define CPUID_XMOD_REV_F 0x00040000
|
||||
|
||||
/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */
|
||||
static __cpuinit int amd_apic_timer_broken(void)
|
||||
{
|
||||
u32 lo, hi;
|
||||
u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
|
||||
switch (eax & CPUID_XFAM) {
|
||||
case CPUID_XFAM_K8:
|
||||
if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F)
|
||||
break;
|
||||
case CPUID_XFAM_10H:
|
||||
case CPUID_XFAM_11H:
|
||||
rdmsr(MSR_K8_ENABLE_C1E, lo, hi);
|
||||
if (lo & ENABLE_C1E_MASK) {
|
||||
if (smp_processor_id() != boot_cpu_physical_apicid)
|
||||
printk(KERN_INFO "AMD C1E detected late. "
|
||||
" Force timer broadcast.\n");
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
/* err on the side of caution */
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
int force_mwait __cpuinitdata;
|
||||
|
||||
static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
|
||||
|
@ -297,11 +260,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
|
|||
num_cache_leaves = 3;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
if (amd_apic_timer_broken())
|
||||
local_apic_timer_disabled = 1;
|
||||
#endif
|
||||
|
||||
/* K6s reports MCEs but don't actually have all the MSRs */
|
||||
if (c->x86 < 6)
|
||||
clear_cpu_cap(c, X86_FEATURE_MCE);
|
||||
|
|
|
@ -0,0 +1,222 @@
|
|||
#include <linux/init.h>
|
||||
#include <linux/mm.h>
|
||||
|
||||
#include <asm/numa_64.h>
|
||||
#include <asm/mmconfig.h>
|
||||
#include <asm/cacheflush.h>
|
||||
|
||||
#include <mach_apic.h>
|
||||
|
||||
#include "cpu.h"
|
||||
|
||||
int force_mwait __cpuinitdata;
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
static int __cpuinit nearby_node(int apicid)
|
||||
{
|
||||
int i, node;
|
||||
|
||||
for (i = apicid - 1; i >= 0; i--) {
|
||||
node = apicid_to_node[i];
|
||||
if (node != NUMA_NO_NODE && node_online(node))
|
||||
return node;
|
||||
}
|
||||
for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
|
||||
node = apicid_to_node[i];
|
||||
if (node != NUMA_NO_NODE && node_online(node))
|
||||
return node;
|
||||
}
|
||||
return first_node(node_online_map); /* Shouldn't happen */
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* On a AMD dual core setup the lower bits of the APIC id distingush the cores.
|
||||
* Assumes number of cores is a power of two.
|
||||
*/
|
||||
static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
unsigned bits;
|
||||
#ifdef CONFIG_NUMA
|
||||
int cpu = smp_processor_id();
|
||||
int node = 0;
|
||||
unsigned apicid = hard_smp_processor_id();
|
||||
#endif
|
||||
bits = c->x86_coreid_bits;
|
||||
|
||||
/* Low order bits define the core id (index of core in socket) */
|
||||
c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
|
||||
/* Convert the initial APIC ID into the socket ID */
|
||||
c->phys_proc_id = c->initial_apicid >> bits;
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
node = c->phys_proc_id;
|
||||
if (apicid_to_node[apicid] != NUMA_NO_NODE)
|
||||
node = apicid_to_node[apicid];
|
||||
if (!node_online(node)) {
|
||||
/* Two possibilities here:
|
||||
- The CPU is missing memory and no node was created.
|
||||
In that case try picking one from a nearby CPU
|
||||
- The APIC IDs differ from the HyperTransport node IDs
|
||||
which the K8 northbridge parsing fills in.
|
||||
Assume they are all increased by a constant offset,
|
||||
but in the same order as the HT nodeids.
|
||||
If that doesn't result in a usable node fall back to the
|
||||
path for the previous case. */
|
||||
|
||||
int ht_nodeid = c->initial_apicid;
|
||||
|
||||
if (ht_nodeid >= 0 &&
|
||||
apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
|
||||
node = apicid_to_node[ht_nodeid];
|
||||
/* Pick a nearby node */
|
||||
if (!node_online(node))
|
||||
node = nearby_node(apicid);
|
||||
}
|
||||
numa_set_node(cpu, node);
|
||||
|
||||
printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
unsigned bits, ecx;
|
||||
|
||||
/* Multi core CPU? */
|
||||
if (c->extended_cpuid_level < 0x80000008)
|
||||
return;
|
||||
|
||||
ecx = cpuid_ecx(0x80000008);
|
||||
|
||||
c->x86_max_cores = (ecx & 0xff) + 1;
|
||||
|
||||
/* CPU telling us the core id bits shift? */
|
||||
bits = (ecx >> 12) & 0xF;
|
||||
|
||||
/* Otherwise recompute */
|
||||
if (bits == 0) {
|
||||
while ((1 << bits) < c->x86_max_cores)
|
||||
bits++;
|
||||
}
|
||||
|
||||
c->x86_coreid_bits = bits;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
|
||||
{
|
||||
early_init_amd_mc(c);
|
||||
|
||||
/* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
|
||||
if (c->x86_power & (1<<8))
|
||||
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
|
||||
}
|
||||
|
||||
static void __cpuinit init_amd(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned level;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
unsigned long value;
|
||||
|
||||
/*
|
||||
* Disable TLB flush filter by setting HWCR.FFDIS on K8
|
||||
* bit 6 of msr C001_0015
|
||||
*
|
||||
* Errata 63 for SH-B3 steppings
|
||||
* Errata 122 for all steppings (F+ have it disabled by default)
|
||||
*/
|
||||
if (c->x86 == 0xf) {
|
||||
rdmsrl(MSR_K8_HWCR, value);
|
||||
value |= 1 << 6;
|
||||
wrmsrl(MSR_K8_HWCR, value);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
|
||||
3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
|
||||
clear_cpu_cap(c, 0*32+31);
|
||||
|
||||
/* On C+ stepping K8 rep microcode works well for copy/memset */
|
||||
if (c->x86 == 0xf) {
|
||||
level = cpuid_eax(1);
|
||||
if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
|
||||
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
|
||||
}
|
||||
if (c->x86 == 0x10 || c->x86 == 0x11)
|
||||
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
|
||||
|
||||
/* Enable workaround for FXSAVE leak */
|
||||
if (c->x86 >= 6)
|
||||
set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
|
||||
|
||||
level = get_model_name(c);
|
||||
if (!level) {
|
||||
switch (c->x86) {
|
||||
case 0xf:
|
||||
/* Should distinguish Models here, but this is only
|
||||
a fallback anyways. */
|
||||
strcpy(c->x86_model_id, "Hammer");
|
||||
break;
|
||||
}
|
||||
}
|
||||
display_cacheinfo(c);
|
||||
|
||||
/* Multi core CPU? */
|
||||
if (c->extended_cpuid_level >= 0x80000008)
|
||||
amd_detect_cmp(c);
|
||||
|
||||
if (c->extended_cpuid_level >= 0x80000006 &&
|
||||
(cpuid_edx(0x80000006) & 0xf000))
|
||||
num_cache_leaves = 4;
|
||||
else
|
||||
num_cache_leaves = 3;
|
||||
|
||||
if (c->x86 >= 0xf && c->x86 <= 0x11)
|
||||
set_cpu_cap(c, X86_FEATURE_K8);
|
||||
|
||||
/* MFENCE stops RDTSC speculation */
|
||||
set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
|
||||
|
||||
if (c->x86 == 0x10) {
|
||||
/* do this for boot cpu */
|
||||
if (c == &boot_cpu_data)
|
||||
check_enable_amd_mmconf_dmi();
|
||||
|
||||
fam10h_check_enable_mmcfg();
|
||||
}
|
||||
|
||||
if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
|
||||
unsigned long long tseg;
|
||||
|
||||
/*
|
||||
* Split up direct mapping around the TSEG SMM area.
|
||||
* Don't do it for gbpages because there seems very little
|
||||
* benefit in doing so.
|
||||
*/
|
||||
if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
|
||||
printk(KERN_DEBUG "tseg: %010llx\n", tseg);
|
||||
if ((tseg>>PMD_SHIFT) <
|
||||
(max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
|
||||
((tseg>>PMD_SHIFT) <
|
||||
(max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
|
||||
(tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
|
||||
set_memory_4k((unsigned long)__va(tseg), 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static struct cpu_dev amd_cpu_dev __cpuinitdata = {
|
||||
.c_vendor = "AMD",
|
||||
.c_ident = { "AuthenticAMD" },
|
||||
.c_early_init = early_init_amd,
|
||||
.c_init = init_amd,
|
||||
};
|
||||
|
||||
cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev);
|
||||
|
|
@ -59,8 +59,12 @@ static void __init check_fpu(void)
|
|||
return;
|
||||
}
|
||||
|
||||
/* trap_init() enabled FXSR and company _before_ testing for FP problems here. */
|
||||
/* Test for the divl bug.. */
|
||||
/*
|
||||
* trap_init() enabled FXSR and company _before_ testing for FP
|
||||
* problems here.
|
||||
*
|
||||
* Test for the divl bug..
|
||||
*/
|
||||
__asm__("fninit\n\t"
|
||||
"fldl %1\n\t"
|
||||
"fdivl %2\n\t"
|
||||
|
@ -108,10 +112,15 @@ static void __init check_popad(void)
|
|||
"movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx "
|
||||
: "=&a" (res)
|
||||
: "d" (inp)
|
||||
: "ecx", "edi" );
|
||||
/* If this fails, it means that any user program may lock the CPU hard. Too bad. */
|
||||
if (res != 12345678) printk( "Buggy.\n" );
|
||||
else printk( "OK.\n" );
|
||||
: "ecx", "edi");
|
||||
/*
|
||||
* If this fails, it means that any user program may lock the
|
||||
* CPU hard. Too bad.
|
||||
*/
|
||||
if (res != 12345678)
|
||||
printk("Buggy.\n");
|
||||
else
|
||||
printk("OK.\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -137,7 +146,8 @@ static void __init check_config(void)
|
|||
* i486+ only features! (WP works in supervisor mode and the
|
||||
* new "invlpg" and "bswap" instructions)
|
||||
*/
|
||||
#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP)
|
||||
#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || \
|
||||
defined(CONFIG_X86_BSWAP)
|
||||
if (boot_cpu_data.x86 == 3)
|
||||
panic("Kernel requires i486+ for 'invlpg' and other features");
|
||||
#endif
|
||||
|
@ -170,6 +180,7 @@ void __init check_bugs(void)
|
|||
check_fpu();
|
||||
check_hlt();
|
||||
check_popad();
|
||||
init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
|
||||
init_utsname()->machine[1] =
|
||||
'0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
|
||||
alternative_instructions();
|
||||
}
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
#include <linux/init.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/processor.h>
|
||||
|
||||
#include "cpu.h"
|
||||
|
||||
static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (c->x86 == 0x6 && c->x86_model >= 0xf)
|
||||
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
|
||||
|
||||
set_cpu_cap(c, X86_FEATURE_SYSENTER32);
|
||||
}
|
||||
|
||||
static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (c->x86 == 0x6 && c->x86_model >= 0xf) {
|
||||
c->x86_cache_alignment = c->x86_clflush_size * 2;
|
||||
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
|
||||
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
|
||||
}
|
||||
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
|
||||
}
|
||||
|
||||
static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
|
||||
.c_vendor = "Centaur",
|
||||
.c_ident = { "CentaurHauls" },
|
||||
.c_early_init = early_init_centaur,
|
||||
.c_init = init_centaur,
|
||||
};
|
||||
|
||||
cpu_vendor_dev_register(X86_VENDOR_CENTAUR, ¢aur_cpu_dev);
|
||||
|
|
@ -427,7 +427,7 @@ __setup("serialnumber", x86_serial_nr_setup);
|
|||
/*
|
||||
* This does the hard work of actually picking apart the CPU stuff...
|
||||
*/
|
||||
void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
|
||||
static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
|
|
@ -0,0 +1,681 @@
|
|||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/kgdb.h>
|
||||
#include <linux/topology.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/i387.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/mtrr.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/pat.h>
|
||||
#include <asm/numa.h>
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
#include <asm/mpspec.h>
|
||||
#include <asm/apic.h>
|
||||
#include <mach_apic.h>
|
||||
#endif
|
||||
#include <asm/pda.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/atomic.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/setup.h>
|
||||
#include <asm/genapic.h>
|
||||
|
||||
#include "cpu.h"
|
||||
|
||||
/* We need valid kernel segments for data and code in long mode too
|
||||
* IRET will check the segment types kkeil 2000/10/28
|
||||
* Also sysret mandates a special GDT layout
|
||||
*/
|
||||
/* The TLS descriptors are currently at a different place compared to i386.
|
||||
Hopefully nobody expects them at a fixed place (Wine?) */
|
||||
DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
|
||||
[GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
|
||||
[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
|
||||
[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
|
||||
[GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
|
||||
[GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
|
||||
[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
|
||||
} };
|
||||
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
|
||||
|
||||
__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
|
||||
|
||||
/* Current gdt points %fs at the "master" per-cpu area: after this,
|
||||
* it's on the real one. */
|
||||
void switch_to_new_gdt(void)
|
||||
{
|
||||
struct desc_ptr gdt_descr;
|
||||
|
||||
gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
|
||||
gdt_descr.size = GDT_SIZE - 1;
|
||||
load_gdt(&gdt_descr);
|
||||
}
|
||||
|
||||
struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
|
||||
|
||||
static void __cpuinit default_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
display_cacheinfo(c);
|
||||
}
|
||||
|
||||
static struct cpu_dev __cpuinitdata default_cpu = {
|
||||
.c_init = default_init,
|
||||
.c_vendor = "Unknown",
|
||||
};
|
||||
static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
|
||||
|
||||
int __cpuinit get_model_name(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned int *v;
|
||||
|
||||
if (c->extended_cpuid_level < 0x80000004)
|
||||
return 0;
|
||||
|
||||
v = (unsigned int *) c->x86_model_id;
|
||||
cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
|
||||
cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
|
||||
cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
|
||||
c->x86_model_id[48] = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned int n, dummy, ebx, ecx, edx;
|
||||
|
||||
n = c->extended_cpuid_level;
|
||||
|
||||
if (n >= 0x80000005) {
|
||||
cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
|
||||
printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), "
|
||||
"D cache %dK (%d bytes/line)\n",
|
||||
edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
|
||||
c->x86_cache_size = (ecx>>24) + (edx>>24);
|
||||
/* On K8 L1 TLB is inclusive, so don't count it */
|
||||
c->x86_tlbsize = 0;
|
||||
}
|
||||
|
||||
if (n >= 0x80000006) {
|
||||
cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
|
||||
ecx = cpuid_ecx(0x80000006);
|
||||
c->x86_cache_size = ecx >> 16;
|
||||
c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
|
||||
|
||||
printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
|
||||
c->x86_cache_size, ecx & 0xFF);
|
||||
}
|
||||
}
|
||||
|
||||
void __cpuinit detect_ht(struct cpuinfo_x86 *c)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
u32 eax, ebx, ecx, edx;
|
||||
int index_msb, core_bits;
|
||||
|
||||
cpuid(1, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
|
||||
if (!cpu_has(c, X86_FEATURE_HT))
|
||||
return;
|
||||
if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
|
||||
goto out;
|
||||
|
||||
smp_num_siblings = (ebx & 0xff0000) >> 16;
|
||||
|
||||
if (smp_num_siblings == 1) {
|
||||
printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
|
||||
} else if (smp_num_siblings > 1) {
|
||||
|
||||
if (smp_num_siblings > NR_CPUS) {
|
||||
printk(KERN_WARNING "CPU: Unsupported number of "
|
||||
"siblings %d", smp_num_siblings);
|
||||
smp_num_siblings = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
index_msb = get_count_order(smp_num_siblings);
|
||||
c->phys_proc_id = phys_pkg_id(index_msb);
|
||||
|
||||
smp_num_siblings = smp_num_siblings / c->x86_max_cores;
|
||||
|
||||
index_msb = get_count_order(smp_num_siblings);
|
||||
|
||||
core_bits = get_count_order(c->x86_max_cores);
|
||||
|
||||
c->cpu_core_id = phys_pkg_id(index_msb) &
|
||||
((1 << core_bits) - 1);
|
||||
}
|
||||
out:
|
||||
if ((c->x86_max_cores * smp_num_siblings) > 1) {
|
||||
printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
|
||||
c->phys_proc_id);
|
||||
printk(KERN_INFO "CPU: Processor Core ID: %d\n",
|
||||
c->cpu_core_id);
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
|
||||
{
|
||||
char *v = c->x86_vendor_id;
|
||||
int i;
|
||||
static int printed;
|
||||
|
||||
for (i = 0; i < X86_VENDOR_NUM; i++) {
|
||||
if (cpu_devs[i]) {
|
||||
if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
|
||||
(cpu_devs[i]->c_ident[1] &&
|
||||
!strcmp(v, cpu_devs[i]->c_ident[1]))) {
|
||||
c->x86_vendor = i;
|
||||
this_cpu = cpu_devs[i];
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!printed) {
|
||||
printed++;
|
||||
printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n");
|
||||
printk(KERN_ERR "CPU: Your system may be unstable.\n");
|
||||
}
|
||||
c->x86_vendor = X86_VENDOR_UNKNOWN;
|
||||
}
|
||||
|
||||
static void __init early_cpu_support_print(void)
|
||||
{
|
||||
int i,j;
|
||||
struct cpu_dev *cpu_devx;
|
||||
|
||||
printk("KERNEL supported cpus:\n");
|
||||
for (i = 0; i < X86_VENDOR_NUM; i++) {
|
||||
cpu_devx = cpu_devs[i];
|
||||
if (!cpu_devx)
|
||||
continue;
|
||||
for (j = 0; j < 2; j++) {
|
||||
if (!cpu_devx->c_ident[j])
|
||||
continue;
|
||||
printk(" %s %s\n", cpu_devx->c_vendor,
|
||||
cpu_devx->c_ident[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
|
||||
|
||||
void __init early_cpu_init(void)
|
||||
{
|
||||
struct cpu_vendor_dev *cvdev;
|
||||
|
||||
for (cvdev = __x86cpuvendor_start ;
|
||||
cvdev < __x86cpuvendor_end ;
|
||||
cvdev++)
|
||||
cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
|
||||
early_cpu_support_print();
|
||||
early_identify_cpu(&boot_cpu_data);
|
||||
}
|
||||
|
||||
/* Do some early cpuid on the boot CPU to get some parameter that are
|
||||
needed before check_bugs. Everything advanced is in identify_cpu
|
||||
below. */
|
||||
static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 tfms, xlvl;
|
||||
|
||||
c->loops_per_jiffy = loops_per_jiffy;
|
||||
c->x86_cache_size = -1;
|
||||
c->x86_vendor = X86_VENDOR_UNKNOWN;
|
||||
c->x86_model = c->x86_mask = 0; /* So far unknown... */
|
||||
c->x86_vendor_id[0] = '\0'; /* Unset */
|
||||
c->x86_model_id[0] = '\0'; /* Unset */
|
||||
c->x86_clflush_size = 64;
|
||||
c->x86_cache_alignment = c->x86_clflush_size;
|
||||
c->x86_max_cores = 1;
|
||||
c->x86_coreid_bits = 0;
|
||||
c->extended_cpuid_level = 0;
|
||||
memset(&c->x86_capability, 0, sizeof c->x86_capability);
|
||||
|
||||
/* Get vendor name */
|
||||
cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
|
||||
(unsigned int *)&c->x86_vendor_id[0],
|
||||
(unsigned int *)&c->x86_vendor_id[8],
|
||||
(unsigned int *)&c->x86_vendor_id[4]);
|
||||
|
||||
get_cpu_vendor(c);
|
||||
|
||||
/* Initialize the standard set of capabilities */
|
||||
/* Note that the vendor-specific code below might override */
|
||||
|
||||
/* Intel-defined flags: level 0x00000001 */
|
||||
if (c->cpuid_level >= 0x00000001) {
|
||||
__u32 misc;
|
||||
cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
|
||||
&c->x86_capability[0]);
|
||||
c->x86 = (tfms >> 8) & 0xf;
|
||||
c->x86_model = (tfms >> 4) & 0xf;
|
||||
c->x86_mask = tfms & 0xf;
|
||||
if (c->x86 == 0xf)
|
||||
c->x86 += (tfms >> 20) & 0xff;
|
||||
if (c->x86 >= 0x6)
|
||||
c->x86_model += ((tfms >> 16) & 0xF) << 4;
|
||||
if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
|
||||
c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
|
||||
} else {
|
||||
/* Have CPUID level 0 only - unheard of */
|
||||
c->x86 = 4;
|
||||
}
|
||||
|
||||
c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
|
||||
#ifdef CONFIG_SMP
|
||||
c->phys_proc_id = c->initial_apicid;
|
||||
#endif
|
||||
/* AMD-defined flags: level 0x80000001 */
|
||||
xlvl = cpuid_eax(0x80000000);
|
||||
c->extended_cpuid_level = xlvl;
|
||||
if ((xlvl & 0xffff0000) == 0x80000000) {
|
||||
if (xlvl >= 0x80000001) {
|
||||
c->x86_capability[1] = cpuid_edx(0x80000001);
|
||||
c->x86_capability[6] = cpuid_ecx(0x80000001);
|
||||
}
|
||||
if (xlvl >= 0x80000004)
|
||||
get_model_name(c); /* Default name */
|
||||
}
|
||||
|
||||
/* Transmeta-defined flags: level 0x80860001 */
|
||||
xlvl = cpuid_eax(0x80860000);
|
||||
if ((xlvl & 0xffff0000) == 0x80860000) {
|
||||
/* Don't set x86_cpuid_level here for now to not confuse. */
|
||||
if (xlvl >= 0x80860001)
|
||||
c->x86_capability[2] = cpuid_edx(0x80860001);
|
||||
}
|
||||
|
||||
c->extended_cpuid_level = cpuid_eax(0x80000000);
|
||||
if (c->extended_cpuid_level >= 0x80000007)
|
||||
c->x86_power = cpuid_edx(0x80000007);
|
||||
|
||||
if (c->extended_cpuid_level >= 0x80000008) {
|
||||
u32 eax = cpuid_eax(0x80000008);
|
||||
|
||||
c->x86_virt_bits = (eax >> 8) & 0xff;
|
||||
c->x86_phys_bits = eax & 0xff;
|
||||
}
|
||||
|
||||
/* Assume all 64-bit CPUs support 32-bit syscall */
|
||||
set_cpu_cap(c, X86_FEATURE_SYSCALL32);
|
||||
|
||||
if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
|
||||
cpu_devs[c->x86_vendor]->c_early_init)
|
||||
cpu_devs[c->x86_vendor]->c_early_init(c);
|
||||
|
||||
validate_pat_support(c);
|
||||
|
||||
/* early_param could clear that, but recall get it set again */
|
||||
if (disable_apic)
|
||||
clear_cpu_cap(c, X86_FEATURE_APIC);
|
||||
}
|
||||
|
||||
/*
|
||||
* This does the hard work of actually picking apart the CPU stuff...
|
||||
*/
|
||||
static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
|
||||
{
|
||||
int i;
|
||||
|
||||
early_identify_cpu(c);
|
||||
|
||||
init_scattered_cpuid_features(c);
|
||||
|
||||
c->apicid = phys_pkg_id(0);
|
||||
|
||||
/*
|
||||
* Vendor-specific initialization. In this section we
|
||||
* canonicalize the feature flags, meaning if there are
|
||||
* features a certain CPU supports which CPUID doesn't
|
||||
* tell us, CPUID claiming incorrect flags, or other bugs,
|
||||
* we handle them here.
|
||||
*
|
||||
* At the end of this section, c->x86_capability better
|
||||
* indicate the features this CPU genuinely supports!
|
||||
*/
|
||||
if (this_cpu->c_init)
|
||||
this_cpu->c_init(c);
|
||||
|
||||
detect_ht(c);
|
||||
|
||||
/*
|
||||
* On SMP, boot_cpu_data holds the common feature set between
|
||||
* all CPUs; so make sure that we indicate which features are
|
||||
* common between the CPUs. The first time this routine gets
|
||||
* executed, c == &boot_cpu_data.
|
||||
*/
|
||||
if (c != &boot_cpu_data) {
|
||||
/* AND the already accumulated flags with these */
|
||||
for (i = 0; i < NCAPINTS; i++)
|
||||
boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
|
||||
}
|
||||
|
||||
/* Clear all flags overriden by options */
|
||||
for (i = 0; i < NCAPINTS; i++)
|
||||
c->x86_capability[i] &= ~cleared_cpu_caps[i];
|
||||
|
||||
#ifdef CONFIG_X86_MCE
|
||||
mcheck_init(c);
|
||||
#endif
|
||||
select_idle_routine(c);
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
numa_add_cpu(smp_processor_id());
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
void __cpuinit identify_boot_cpu(void)
|
||||
{
|
||||
identify_cpu(&boot_cpu_data);
|
||||
}
|
||||
|
||||
void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
|
||||
{
|
||||
BUG_ON(c == &boot_cpu_data);
|
||||
identify_cpu(c);
|
||||
mtrr_ap_init();
|
||||
}
|
||||
|
||||
static __init int setup_noclflush(char *arg)
|
||||
{
|
||||
setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
|
||||
return 1;
|
||||
}
|
||||
__setup("noclflush", setup_noclflush);
|
||||
|
||||
void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (c->x86_model_id[0])
|
||||
printk(KERN_CONT "%s", c->x86_model_id);
|
||||
|
||||
if (c->x86_mask || c->cpuid_level >= 0)
|
||||
printk(KERN_CONT " stepping %02x\n", c->x86_mask);
|
||||
else
|
||||
printk(KERN_CONT "\n");
|
||||
}
|
||||
|
||||
static __init int setup_disablecpuid(char *arg)
|
||||
{
|
||||
int bit;
|
||||
if (get_option(&arg, &bit) && bit < NCAPINTS*32)
|
||||
setup_clear_cpu_cap(bit);
|
||||
else
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
__setup("clearcpuid=", setup_disablecpuid);
|
||||
|
||||
cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
|
||||
|
||||
struct x8664_pda **_cpu_pda __read_mostly;
|
||||
EXPORT_SYMBOL(_cpu_pda);
|
||||
|
||||
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
|
||||
|
||||
char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
|
||||
|
||||
unsigned long __supported_pte_mask __read_mostly = ~0UL;
|
||||
EXPORT_SYMBOL_GPL(__supported_pte_mask);
|
||||
|
||||
static int do_not_nx __cpuinitdata;
|
||||
|
||||
/* noexec=on|off
|
||||
Control non executable mappings for 64bit processes.
|
||||
|
||||
on Enable(default)
|
||||
off Disable
|
||||
*/
|
||||
static int __init nonx_setup(char *str)
|
||||
{
|
||||
if (!str)
|
||||
return -EINVAL;
|
||||
if (!strncmp(str, "on", 2)) {
|
||||
__supported_pte_mask |= _PAGE_NX;
|
||||
do_not_nx = 0;
|
||||
} else if (!strncmp(str, "off", 3)) {
|
||||
do_not_nx = 1;
|
||||
__supported_pte_mask &= ~_PAGE_NX;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
early_param("noexec", nonx_setup);
|
||||
|
||||
int force_personality32;
|
||||
|
||||
/* noexec32=on|off
|
||||
Control non executable heap for 32bit processes.
|
||||
To control the stack too use noexec=off
|
||||
|
||||
on PROT_READ does not imply PROT_EXEC for 32bit processes (default)
|
||||
off PROT_READ implies PROT_EXEC
|
||||
*/
|
||||
static int __init nonx32_setup(char *str)
|
||||
{
|
||||
if (!strcmp(str, "on"))
|
||||
force_personality32 &= ~READ_IMPLIES_EXEC;
|
||||
else if (!strcmp(str, "off"))
|
||||
force_personality32 |= READ_IMPLIES_EXEC;
|
||||
return 1;
|
||||
}
|
||||
__setup("noexec32=", nonx32_setup);
|
||||
|
||||
void pda_init(int cpu)
|
||||
{
|
||||
struct x8664_pda *pda = cpu_pda(cpu);
|
||||
|
||||
/* Setup up data that may be needed in __get_free_pages early */
|
||||
loadsegment(fs, 0);
|
||||
loadsegment(gs, 0);
|
||||
/* Memory clobbers used to order PDA accessed */
|
||||
mb();
|
||||
wrmsrl(MSR_GS_BASE, pda);
|
||||
mb();
|
||||
|
||||
pda->cpunumber = cpu;
|
||||
pda->irqcount = -1;
|
||||
pda->kernelstack = (unsigned long)stack_thread_info() -
|
||||
PDA_STACKOFFSET + THREAD_SIZE;
|
||||
pda->active_mm = &init_mm;
|
||||
pda->mmu_state = 0;
|
||||
|
||||
if (cpu == 0) {
|
||||
/* others are initialized in smpboot.c */
|
||||
pda->pcurrent = &init_task;
|
||||
pda->irqstackptr = boot_cpu_stack;
|
||||
} else {
|
||||
pda->irqstackptr = (char *)
|
||||
__get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
|
||||
if (!pda->irqstackptr)
|
||||
panic("cannot allocate irqstack for cpu %d", cpu);
|
||||
|
||||
if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
|
||||
pda->nodenumber = cpu_to_node(cpu);
|
||||
}
|
||||
|
||||
pda->irqstackptr += IRQSTACKSIZE-64;
|
||||
}
|
||||
|
||||
char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
|
||||
DEBUG_STKSZ]
|
||||
__attribute__((section(".bss.page_aligned")));
|
||||
|
||||
extern asmlinkage void ignore_sysret(void);
|
||||
|
||||
/* May not be marked __init: used by software suspend */
|
||||
void syscall_init(void)
|
||||
{
|
||||
/*
|
||||
* LSTAR and STAR live in a bit strange symbiosis.
|
||||
* They both write to the same internal register. STAR allows to
|
||||
* set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
|
||||
*/
|
||||
wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
|
||||
wrmsrl(MSR_LSTAR, system_call);
|
||||
wrmsrl(MSR_CSTAR, ignore_sysret);
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
syscall32_cpu_init();
|
||||
#endif
|
||||
|
||||
/* Flags to clear on syscall */
|
||||
wrmsrl(MSR_SYSCALL_MASK,
|
||||
X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
|
||||
}
|
||||
|
||||
void __cpuinit check_efer(void)
|
||||
{
|
||||
unsigned long efer;
|
||||
|
||||
rdmsrl(MSR_EFER, efer);
|
||||
if (!(efer & EFER_NX) || do_not_nx)
|
||||
__supported_pte_mask &= ~_PAGE_NX;
|
||||
}
|
||||
|
||||
unsigned long kernel_eflags;
|
||||
|
||||
/*
|
||||
* Copies of the original ist values from the tss are only accessed during
|
||||
* debugging, no special alignment required.
|
||||
*/
|
||||
DEFINE_PER_CPU(struct orig_ist, orig_ist);
|
||||
|
||||
/*
|
||||
* cpu_init() initializes state that is per-CPU. Some data is already
|
||||
* initialized (naturally) in the bootstrap process, such as the GDT
|
||||
* and IDT. We reload them nevertheless, this function acts as a
|
||||
* 'CPU state barrier', nothing should get across.
|
||||
* A lot of state is already set up in PDA init.
|
||||
*/
|
||||
void __cpuinit cpu_init(void)
|
||||
{
|
||||
int cpu = stack_smp_processor_id();
|
||||
struct tss_struct *t = &per_cpu(init_tss, cpu);
|
||||
struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
|
||||
unsigned long v;
|
||||
char *estacks = NULL;
|
||||
struct task_struct *me;
|
||||
int i;
|
||||
|
||||
/* CPU 0 is initialised in head64.c */
|
||||
if (cpu != 0)
|
||||
pda_init(cpu);
|
||||
else
|
||||
estacks = boot_exception_stacks;
|
||||
|
||||
me = current;
|
||||
|
||||
if (cpu_test_and_set(cpu, cpu_initialized))
|
||||
panic("CPU#%d already initialized!\n", cpu);
|
||||
|
||||
printk(KERN_INFO "Initializing CPU#%d\n", cpu);
|
||||
|
||||
clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
|
||||
|
||||
/*
|
||||
* Initialize the per-CPU GDT with the boot GDT,
|
||||
* and set up the GDT descriptor:
|
||||
*/
|
||||
|
||||
switch_to_new_gdt();
|
||||
load_idt((const struct desc_ptr *)&idt_descr);
|
||||
|
||||
memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
|
||||
syscall_init();
|
||||
|
||||
wrmsrl(MSR_FS_BASE, 0);
|
||||
wrmsrl(MSR_KERNEL_GS_BASE, 0);
|
||||
barrier();
|
||||
|
||||
check_efer();
|
||||
|
||||
/*
|
||||
* set up and load the per-CPU TSS
|
||||
*/
|
||||
for (v = 0; v < N_EXCEPTION_STACKS; v++) {
|
||||
static const unsigned int order[N_EXCEPTION_STACKS] = {
|
||||
[0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
|
||||
[DEBUG_STACK - 1] = DEBUG_STACK_ORDER
|
||||
};
|
||||
if (cpu) {
|
||||
estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
|
||||
if (!estacks)
|
||||
panic("Cannot allocate exception stack %ld %d\n",
|
||||
v, cpu);
|
||||
}
|
||||
estacks += PAGE_SIZE << order[v];
|
||||
orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks;
|
||||
}
|
||||
|
||||
t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
|
||||
/*
|
||||
* <= is required because the CPU will access up to
|
||||
* 8 bits beyond the end of the IO permission bitmap.
|
||||
*/
|
||||
for (i = 0; i <= IO_BITMAP_LONGS; i++)
|
||||
t->io_bitmap[i] = ~0UL;
|
||||
|
||||
atomic_inc(&init_mm.mm_count);
|
||||
me->active_mm = &init_mm;
|
||||
if (me->mm)
|
||||
BUG();
|
||||
enter_lazy_tlb(&init_mm, me);
|
||||
|
||||
load_sp0(t, ¤t->thread);
|
||||
set_tss_desc(cpu, t);
|
||||
load_TR_desc();
|
||||
load_LDT(&init_mm.context);
|
||||
|
||||
#ifdef CONFIG_KGDB
|
||||
/*
|
||||
* If the kgdb is connected no debug regs should be altered. This
|
||||
* is only applicable when KGDB and a KGDB I/O module are built
|
||||
* into the kernel and you are using early debugging with
|
||||
* kgdbwait. KGDB will control the kernel HW breakpoint registers.
|
||||
*/
|
||||
if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
|
||||
arch_kgdb_ops.correct_hw_break();
|
||||
else {
|
||||
#endif
|
||||
/*
|
||||
* Clear all 6 debug registers:
|
||||
*/
|
||||
|
||||
set_debugreg(0UL, 0);
|
||||
set_debugreg(0UL, 1);
|
||||
set_debugreg(0UL, 2);
|
||||
set_debugreg(0UL, 3);
|
||||
set_debugreg(0UL, 6);
|
||||
set_debugreg(0UL, 7);
|
||||
#ifdef CONFIG_KGDB
|
||||
/* If the kgdb is connected no debug regs should be altered. */
|
||||
}
|
||||
#endif
|
||||
|
||||
fpu_init();
|
||||
|
||||
raw_local_save_flags(kernel_eflags);
|
||||
|
||||
if (is_uv_system())
|
||||
uv_cpu_init();
|
||||
}
|
|
@ -1,3 +1,6 @@
|
|||
#ifndef ARCH_X86_CPU_H
|
||||
|
||||
#define ARCH_X86_CPU_H
|
||||
|
||||
struct cpu_model_info {
|
||||
int vendor;
|
||||
|
@ -36,3 +39,5 @@ extern struct cpu_vendor_dev __x86cpuvendor_start[], __x86cpuvendor_end[];
|
|||
|
||||
extern int get_model_name(struct cpuinfo_x86 *c);
|
||||
extern void display_cacheinfo(struct cpuinfo_x86 *c);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -26,9 +26,10 @@
|
|||
#define NFORCE2_SAFE_DISTANCE 50
|
||||
|
||||
/* Delay in ms between FSB changes */
|
||||
//#define NFORCE2_DELAY 10
|
||||
/* #define NFORCE2_DELAY 10 */
|
||||
|
||||
/* nforce2_chipset:
|
||||
/*
|
||||
* nforce2_chipset:
|
||||
* FSB is changed using the chipset
|
||||
*/
|
||||
static struct pci_dev *nforce2_chipset_dev;
|
||||
|
@ -36,13 +37,13 @@ static struct pci_dev *nforce2_chipset_dev;
|
|||
/* fid:
|
||||
* multiplier * 10
|
||||
*/
|
||||
static int fid = 0;
|
||||
static int fid;
|
||||
|
||||
/* min_fsb, max_fsb:
|
||||
* minimum and maximum FSB (= FSB at boot time)
|
||||
*/
|
||||
static int min_fsb = 0;
|
||||
static int max_fsb = 0;
|
||||
static int min_fsb;
|
||||
static int max_fsb;
|
||||
|
||||
MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>");
|
||||
MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver");
|
||||
|
@ -53,7 +54,7 @@ module_param(min_fsb, int, 0444);
|
|||
|
||||
MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)");
|
||||
MODULE_PARM_DESC(min_fsb,
|
||||
"Minimum FSB to use, if not defined: current FSB - 50");
|
||||
"Minimum FSB to use, if not defined: current FSB - 50");
|
||||
|
||||
#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg)
|
||||
|
||||
|
@ -139,7 +140,7 @@ static unsigned int nforce2_fsb_read(int bootfsb)
|
|||
|
||||
/* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */
|
||||
nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
|
||||
0x01EF,PCI_ANY_ID,PCI_ANY_ID,NULL);
|
||||
0x01EF, PCI_ANY_ID, PCI_ANY_ID, NULL);
|
||||
if (!nforce2_sub5)
|
||||
return 0;
|
||||
|
||||
|
@ -147,13 +148,13 @@ static unsigned int nforce2_fsb_read(int bootfsb)
|
|||
fsb /= 1000000;
|
||||
|
||||
/* Check if PLL register is already set */
|
||||
pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp);
|
||||
pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
|
||||
|
||||
if(bootfsb || !temp)
|
||||
if (bootfsb || !temp)
|
||||
return fsb;
|
||||
|
||||
|
||||
/* Use PLL register FSB value */
|
||||
pci_read_config_dword(nforce2_chipset_dev,NFORCE2_PLLREG, &temp);
|
||||
pci_read_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, &temp);
|
||||
fsb = nforce2_calc_fsb(temp);
|
||||
|
||||
return fsb;
|
||||
|
@ -184,7 +185,7 @@ static int nforce2_set_fsb(unsigned int fsb)
|
|||
}
|
||||
|
||||
/* First write? Then set actual value */
|
||||
pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp);
|
||||
pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
|
||||
if (!temp) {
|
||||
pll = nforce2_calc_pll(tfsb);
|
||||
|
||||
|
@ -210,7 +211,8 @@ static int nforce2_set_fsb(unsigned int fsb)
|
|||
tfsb--;
|
||||
|
||||
/* Calculate the PLL reg. value */
|
||||
if ((pll = nforce2_calc_pll(tfsb)) == -1)
|
||||
pll = nforce2_calc_pll(tfsb);
|
||||
if (pll == -1)
|
||||
return -EINVAL;
|
||||
|
||||
nforce2_write_pll(pll);
|
||||
|
@ -249,7 +251,7 @@ static unsigned int nforce2_get(unsigned int cpu)
|
|||
static int nforce2_target(struct cpufreq_policy *policy,
|
||||
unsigned int target_freq, unsigned int relation)
|
||||
{
|
||||
// unsigned long flags;
|
||||
/* unsigned long flags; */
|
||||
struct cpufreq_freqs freqs;
|
||||
unsigned int target_fsb;
|
||||
|
||||
|
@ -271,17 +273,17 @@ static int nforce2_target(struct cpufreq_policy *policy,
|
|||
cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
|
||||
|
||||
/* Disable IRQs */
|
||||
//local_irq_save(flags);
|
||||
/* local_irq_save(flags); */
|
||||
|
||||
if (nforce2_set_fsb(target_fsb) < 0)
|
||||
printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n",
|
||||
target_fsb);
|
||||
target_fsb);
|
||||
else
|
||||
dprintk("Changed FSB successfully to %d\n",
|
||||
target_fsb);
|
||||
target_fsb);
|
||||
|
||||
/* Enable IRQs */
|
||||
//local_irq_restore(flags);
|
||||
/* local_irq_restore(flags); */
|
||||
|
||||
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
|
||||
|
||||
|
@ -302,8 +304,8 @@ static int nforce2_verify(struct cpufreq_policy *policy)
|
|||
policy->max = (fsb_pol_max + 1) * fid * 100;
|
||||
|
||||
cpufreq_verify_within_limits(policy,
|
||||
policy->cpuinfo.min_freq,
|
||||
policy->cpuinfo.max_freq);
|
||||
policy->cpuinfo.min_freq,
|
||||
policy->cpuinfo.max_freq);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -347,7 +349,7 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy)
|
|||
/* Set maximum FSB to FSB at boot time */
|
||||
max_fsb = nforce2_fsb_read(1);
|
||||
|
||||
if(!max_fsb)
|
||||
if (!max_fsb)
|
||||
return -EIO;
|
||||
|
||||
if (!min_fsb)
|
||||
|
|
|
@ -226,6 +226,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
|
|||
|
||||
if (cpu_has_bts)
|
||||
ds_init_intel(c);
|
||||
|
||||
#ifdef CONFIG_X86_NUMAQ
|
||||
numaq_tsc_disable();
|
||||
#endif
|
||||
}
|
||||
|
||||
static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
#include <linux/init.h>
|
||||
#include <linux/smp.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/topology.h>
|
||||
#include <asm/numa_64.h>
|
||||
|
||||
#include "cpu.h"
|
||||
|
||||
static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
|
||||
(c->x86 == 0x6 && c->x86_model >= 0x0e))
|
||||
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
|
||||
|
||||
set_cpu_cap(c, X86_FEATURE_SYSENTER32);
|
||||
}
|
||||
|
||||
/*
|
||||
* find out the number of processor cores on the die
|
||||
*/
|
||||
static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned int eax, t;
|
||||
|
||||
if (c->cpuid_level < 4)
|
||||
return 1;
|
||||
|
||||
cpuid_count(4, 0, &eax, &t, &t, &t);
|
||||
|
||||
if (eax & 0x1f)
|
||||
return ((eax >> 26) + 1);
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void __cpuinit srat_detect_node(void)
|
||||
{
|
||||
#ifdef CONFIG_NUMA
|
||||
unsigned node;
|
||||
int cpu = smp_processor_id();
|
||||
int apicid = hard_smp_processor_id();
|
||||
|
||||
/* Don't do the funky fallback heuristics the AMD version employs
|
||||
for now. */
|
||||
node = apicid_to_node[apicid];
|
||||
if (node == NUMA_NO_NODE || !node_online(node))
|
||||
node = first_node(node_online_map);
|
||||
numa_set_node(cpu, node);
|
||||
|
||||
printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void __cpuinit init_intel(struct cpuinfo_x86 *c)
|
||||
{
|
||||
init_intel_cacheinfo(c);
|
||||
if (c->cpuid_level > 9) {
|
||||
unsigned eax = cpuid_eax(10);
|
||||
/* Check for version and the number of counters */
|
||||
if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
|
||||
set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
|
||||
}
|
||||
|
||||
if (cpu_has_ds) {
|
||||
unsigned int l1, l2;
|
||||
rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
|
||||
if (!(l1 & (1<<11)))
|
||||
set_cpu_cap(c, X86_FEATURE_BTS);
|
||||
if (!(l1 & (1<<12)))
|
||||
set_cpu_cap(c, X86_FEATURE_PEBS);
|
||||
}
|
||||
|
||||
|
||||
if (cpu_has_bts)
|
||||
ds_init_intel(c);
|
||||
|
||||
if (c->x86 == 15)
|
||||
c->x86_cache_alignment = c->x86_clflush_size * 2;
|
||||
if (c->x86 == 6)
|
||||
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
|
||||
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
|
||||
c->x86_max_cores = intel_num_cpu_cores(c);
|
||||
|
||||
srat_detect_node();
|
||||
}
|
||||
|
||||
static struct cpu_dev intel_cpu_dev __cpuinitdata = {
|
||||
.c_vendor = "Intel",
|
||||
.c_ident = { "GenuineIntel" },
|
||||
.c_early_init = early_init_intel,
|
||||
.c_init = init_intel,
|
||||
};
|
||||
cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev);
|
||||
|
|
@ -62,6 +62,7 @@ static struct _cache_table cache_table[] __cpuinitdata =
|
|||
{ 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */
|
||||
{ 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */
|
||||
{ 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */
|
||||
{ 0x4e, LVL_2, 6144 }, /* 24-way set assoc, 64 byte line size */
|
||||
{ 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
|
||||
{ 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
|
||||
{ 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
|
||||
|
|
|
@ -9,23 +9,23 @@
|
|||
#include <linux/interrupt.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
#include "mce.h"
|
||||
|
||||
/* Machine Check Handler For AMD Athlon/Duron */
|
||||
static void k7_machine_check(struct pt_regs * regs, long error_code)
|
||||
static void k7_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
int recover=1;
|
||||
int recover = 1;
|
||||
u32 alow, ahigh, high, low;
|
||||
u32 mcgstl, mcgsth;
|
||||
int i;
|
||||
|
||||
rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
|
||||
rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
|
||||
if (mcgstl & (1<<0)) /* Recoverable ? */
|
||||
recover=0;
|
||||
recover = 0;
|
||||
|
||||
printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
|
||||
smp_processor_id(), mcgsth, mcgstl);
|
||||
|
@ -60,12 +60,12 @@ static void k7_machine_check(struct pt_regs * regs, long error_code)
|
|||
}
|
||||
|
||||
if (recover&2)
|
||||
panic ("CPU context corrupt");
|
||||
panic("CPU context corrupt");
|
||||
if (recover&1)
|
||||
panic ("Unable to continue");
|
||||
printk (KERN_EMERG "Attempting to continue.\n");
|
||||
panic("Unable to continue");
|
||||
printk(KERN_EMERG "Attempting to continue.\n");
|
||||
mcgstl &= ~(1<<2);
|
||||
wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
|
||||
wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
|
||||
}
|
||||
|
||||
|
||||
|
@ -81,25 +81,25 @@ void amd_mcheck_init(struct cpuinfo_x86 *c)
|
|||
machine_check_vector = k7_machine_check;
|
||||
wmb();
|
||||
|
||||
printk (KERN_INFO "Intel machine check architecture supported.\n");
|
||||
rdmsr (MSR_IA32_MCG_CAP, l, h);
|
||||
printk(KERN_INFO "Intel machine check architecture supported.\n");
|
||||
rdmsr(MSR_IA32_MCG_CAP, l, h);
|
||||
if (l & (1<<8)) /* Control register present ? */
|
||||
wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
|
||||
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
|
||||
nr_mce_banks = l & 0xff;
|
||||
|
||||
/* Clear status for MC index 0 separately, we don't touch CTL,
|
||||
* as some K7 Athlons cause spurious MCEs when its enabled. */
|
||||
if (boot_cpu_data.x86 == 6) {
|
||||
wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
|
||||
wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0);
|
||||
i = 1;
|
||||
} else
|
||||
i = 0;
|
||||
for (; i<nr_mce_banks; i++) {
|
||||
wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
|
||||
wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
|
||||
for (; i < nr_mce_banks; i++) {
|
||||
wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
|
||||
wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
|
||||
}
|
||||
|
||||
set_in_cr4 (X86_CR4_MCE);
|
||||
printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
|
||||
set_in_cr4(X86_CR4_MCE);
|
||||
printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
|
||||
smp_processor_id());
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@
|
|||
#include <asm/idle.h>
|
||||
|
||||
#define MISC_MCELOG_MINOR 227
|
||||
#define NR_BANKS 6
|
||||
#define NR_SYSFS_BANKS 6
|
||||
|
||||
atomic_t mce_entry;
|
||||
|
||||
|
@ -46,7 +46,7 @@ static int mce_dont_init;
|
|||
*/
|
||||
static int tolerant = 1;
|
||||
static int banks;
|
||||
static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
|
||||
static unsigned long bank[NR_SYSFS_BANKS] = { [0 ... NR_SYSFS_BANKS-1] = ~0UL };
|
||||
static unsigned long notify_user;
|
||||
static int rip_msr;
|
||||
static int mce_bootlog = -1;
|
||||
|
@ -209,7 +209,7 @@ void do_machine_check(struct pt_regs * regs, long error_code)
|
|||
barrier();
|
||||
|
||||
for (i = 0; i < banks; i++) {
|
||||
if (!bank[i])
|
||||
if (i < NR_SYSFS_BANKS && !bank[i])
|
||||
continue;
|
||||
|
||||
m.misc = 0;
|
||||
|
@ -444,9 +444,10 @@ static void mce_init(void *dummy)
|
|||
|
||||
rdmsrl(MSR_IA32_MCG_CAP, cap);
|
||||
banks = cap & 0xff;
|
||||
if (banks > NR_BANKS) {
|
||||
printk(KERN_INFO "MCE: warning: using only %d banks\n", banks);
|
||||
banks = NR_BANKS;
|
||||
if (banks > MCE_EXTENDED_BANK) {
|
||||
banks = MCE_EXTENDED_BANK;
|
||||
printk(KERN_INFO "MCE: warning: using only %d banks\n",
|
||||
MCE_EXTENDED_BANK);
|
||||
}
|
||||
/* Use accurate RIP reporting if available. */
|
||||
if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
|
||||
|
@ -462,7 +463,11 @@ static void mce_init(void *dummy)
|
|||
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
|
||||
|
||||
for (i = 0; i < banks; i++) {
|
||||
wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
|
||||
if (i < NR_SYSFS_BANKS)
|
||||
wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
|
||||
else
|
||||
wrmsrl(MSR_IA32_MC0_CTL+4*i, ~0UL);
|
||||
|
||||
wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
|
||||
}
|
||||
}
|
||||
|
@ -766,7 +771,10 @@ DEFINE_PER_CPU(struct sys_device, device_mce);
|
|||
} \
|
||||
static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
|
||||
|
||||
/* TBD should generate these dynamically based on number of available banks */
|
||||
/*
|
||||
* TBD should generate these dynamically based on number of available banks.
|
||||
* Have only 6 contol banks in /sysfs until then.
|
||||
*/
|
||||
ACCESSOR(bank0ctl,bank[0],mce_restart())
|
||||
ACCESSOR(bank1ctl,bank[1],mce_restart())
|
||||
ACCESSOR(bank2ctl,bank[2],mce_restart())
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
#include <linux/interrupt.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/apic.h>
|
||||
|
@ -32,12 +32,12 @@ struct intel_mce_extended_msrs {
|
|||
/* u32 *reserved[]; */
|
||||
};
|
||||
|
||||
static int mce_num_extended_msrs = 0;
|
||||
static int mce_num_extended_msrs;
|
||||
|
||||
|
||||
#ifdef CONFIG_X86_MCE_P4THERMAL
|
||||
static void unexpected_thermal_interrupt(struct pt_regs *regs)
|
||||
{
|
||||
{
|
||||
printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
|
||||
smp_processor_id());
|
||||
add_taint(TAINT_MACHINE_CHECK);
|
||||
|
@ -83,7 +83,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
|
|||
* be some SMM goo which handles it, so we can't even put a handler
|
||||
* since it might be delivered via SMI already -zwanem.
|
||||
*/
|
||||
rdmsr (MSR_IA32_MISC_ENABLE, l, h);
|
||||
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
|
||||
h = apic_read(APIC_LVTTHMR);
|
||||
if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
|
||||
printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
|
||||
|
@ -91,7 +91,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
|
|||
return; /* -EBUSY */
|
||||
}
|
||||
|
||||
/* check whether a vector already exists, temporarily masked? */
|
||||
/* check whether a vector already exists, temporarily masked? */
|
||||
if (h & APIC_VECTOR_MASK) {
|
||||
printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already "
|
||||
"installed\n",
|
||||
|
@ -104,18 +104,18 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
|
|||
h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */
|
||||
apic_write_around(APIC_LVTTHMR, h);
|
||||
|
||||
rdmsr (MSR_IA32_THERM_INTERRUPT, l, h);
|
||||
wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
|
||||
rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
|
||||
wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
|
||||
|
||||
/* ok we're good to go... */
|
||||
vendor_thermal_interrupt = intel_thermal_interrupt;
|
||||
|
||||
rdmsr (MSR_IA32_MISC_ENABLE, l, h);
|
||||
wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
|
||||
|
||||
l = apic_read (APIC_LVTTHMR);
|
||||
apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
|
||||
printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
|
||||
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
|
||||
wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h);
|
||||
|
||||
l = apic_read(APIC_LVTTHMR);
|
||||
apic_write_around(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
|
||||
printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
|
||||
|
||||
/* enable thermal throttle processing */
|
||||
atomic_set(&therm_throt_en, 1);
|
||||
|
@ -129,28 +129,28 @@ static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
|
|||
{
|
||||
u32 h;
|
||||
|
||||
rdmsr (MSR_IA32_MCG_EAX, r->eax, h);
|
||||
rdmsr (MSR_IA32_MCG_EBX, r->ebx, h);
|
||||
rdmsr (MSR_IA32_MCG_ECX, r->ecx, h);
|
||||
rdmsr (MSR_IA32_MCG_EDX, r->edx, h);
|
||||
rdmsr (MSR_IA32_MCG_ESI, r->esi, h);
|
||||
rdmsr (MSR_IA32_MCG_EDI, r->edi, h);
|
||||
rdmsr (MSR_IA32_MCG_EBP, r->ebp, h);
|
||||
rdmsr (MSR_IA32_MCG_ESP, r->esp, h);
|
||||
rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h);
|
||||
rdmsr (MSR_IA32_MCG_EIP, r->eip, h);
|
||||
rdmsr(MSR_IA32_MCG_EAX, r->eax, h);
|
||||
rdmsr(MSR_IA32_MCG_EBX, r->ebx, h);
|
||||
rdmsr(MSR_IA32_MCG_ECX, r->ecx, h);
|
||||
rdmsr(MSR_IA32_MCG_EDX, r->edx, h);
|
||||
rdmsr(MSR_IA32_MCG_ESI, r->esi, h);
|
||||
rdmsr(MSR_IA32_MCG_EDI, r->edi, h);
|
||||
rdmsr(MSR_IA32_MCG_EBP, r->ebp, h);
|
||||
rdmsr(MSR_IA32_MCG_ESP, r->esp, h);
|
||||
rdmsr(MSR_IA32_MCG_EFLAGS, r->eflags, h);
|
||||
rdmsr(MSR_IA32_MCG_EIP, r->eip, h);
|
||||
}
|
||||
|
||||
static void intel_machine_check(struct pt_regs * regs, long error_code)
|
||||
static void intel_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
int recover=1;
|
||||
int recover = 1;
|
||||
u32 alow, ahigh, high, low;
|
||||
u32 mcgstl, mcgsth;
|
||||
int i;
|
||||
|
||||
rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
|
||||
rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
|
||||
if (mcgstl & (1<<0)) /* Recoverable ? */
|
||||
recover=0;
|
||||
recover = 0;
|
||||
|
||||
printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
|
||||
smp_processor_id(), mcgsth, mcgstl);
|
||||
|
@ -191,20 +191,20 @@ static void intel_machine_check(struct pt_regs * regs, long error_code)
|
|||
}
|
||||
|
||||
if (recover & 2)
|
||||
panic ("CPU context corrupt");
|
||||
panic("CPU context corrupt");
|
||||
if (recover & 1)
|
||||
panic ("Unable to continue");
|
||||
panic("Unable to continue");
|
||||
|
||||
printk(KERN_EMERG "Attempting to continue.\n");
|
||||
/*
|
||||
* Do not clear the MSR_IA32_MCi_STATUS if the error is not
|
||||
/*
|
||||
* Do not clear the MSR_IA32_MCi_STATUS if the error is not
|
||||
* recoverable/continuable.This will allow BIOS to look at the MSRs
|
||||
* for errors if the OS could not log the error.
|
||||
*/
|
||||
for (i=0; i<nr_mce_banks; i++) {
|
||||
for (i = 0; i < nr_mce_banks; i++) {
|
||||
u32 msr;
|
||||
msr = MSR_IA32_MC0_STATUS+i*4;
|
||||
rdmsr (msr, low, high);
|
||||
rdmsr(msr, low, high);
|
||||
if (high&(1<<31)) {
|
||||
/* Clear it */
|
||||
wrmsr(msr, 0UL, 0UL);
|
||||
|
@ -214,7 +214,7 @@ static void intel_machine_check(struct pt_regs * regs, long error_code)
|
|||
}
|
||||
}
|
||||
mcgstl &= ~(1<<2);
|
||||
wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
|
||||
wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
|
||||
}
|
||||
|
||||
|
||||
|
@ -222,30 +222,30 @@ void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
|
|||
{
|
||||
u32 l, h;
|
||||
int i;
|
||||
|
||||
|
||||
machine_check_vector = intel_machine_check;
|
||||
wmb();
|
||||
|
||||
printk (KERN_INFO "Intel machine check architecture supported.\n");
|
||||
rdmsr (MSR_IA32_MCG_CAP, l, h);
|
||||
printk(KERN_INFO "Intel machine check architecture supported.\n");
|
||||
rdmsr(MSR_IA32_MCG_CAP, l, h);
|
||||
if (l & (1<<8)) /* Control register present ? */
|
||||
wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
|
||||
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
|
||||
nr_mce_banks = l & 0xff;
|
||||
|
||||
for (i=0; i<nr_mce_banks; i++) {
|
||||
wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
|
||||
wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
|
||||
for (i = 0; i < nr_mce_banks; i++) {
|
||||
wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
|
||||
wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
|
||||
}
|
||||
|
||||
set_in_cr4 (X86_CR4_MCE);
|
||||
printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
|
||||
set_in_cr4(X86_CR4_MCE);
|
||||
printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
|
||||
smp_processor_id());
|
||||
|
||||
/* Check for P4/Xeon extended MCE MSRs */
|
||||
rdmsr (MSR_IA32_MCG_CAP, l, h);
|
||||
rdmsr(MSR_IA32_MCG_CAP, l, h);
|
||||
if (l & (1<<9)) {/* MCG_EXT_P */
|
||||
mce_num_extended_msrs = (l >> 16) & 0xff;
|
||||
printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
|
||||
printk(KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
|
||||
" available\n",
|
||||
smp_processor_id(), mce_num_extended_msrs);
|
||||
|
||||
|
|
|
@ -37,7 +37,7 @@ static struct fixed_range_block fixed_range_blocks[] = {
|
|||
static unsigned long smp_changes_mask;
|
||||
static struct mtrr_state mtrr_state = {};
|
||||
static int mtrr_state_set;
|
||||
static u64 tom2;
|
||||
u64 mtrr_tom2;
|
||||
|
||||
#undef MODULE_PARAM_PREFIX
|
||||
#define MODULE_PARAM_PREFIX "mtrr."
|
||||
|
@ -139,8 +139,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
|
|||
}
|
||||
}
|
||||
|
||||
if (tom2) {
|
||||
if (start >= (1ULL<<32) && (end < tom2))
|
||||
if (mtrr_tom2) {
|
||||
if (start >= (1ULL<<32) && (end < mtrr_tom2))
|
||||
return MTRR_TYPE_WRBACK;
|
||||
}
|
||||
|
||||
|
@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
|
|||
rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
|
||||
}
|
||||
|
||||
/* fill the MSR pair relating to a var range */
|
||||
void fill_mtrr_var_range(unsigned int index,
|
||||
u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
|
||||
{
|
||||
struct mtrr_var_range *vr;
|
||||
|
||||
vr = mtrr_state.var_ranges;
|
||||
|
||||
vr[index].base_lo = base_lo;
|
||||
vr[index].base_hi = base_hi;
|
||||
vr[index].mask_lo = mask_lo;
|
||||
vr[index].mask_hi = mask_hi;
|
||||
}
|
||||
|
||||
static void
|
||||
get_fixed_ranges(mtrr_type * frs)
|
||||
{
|
||||
|
@ -213,13 +227,13 @@ void __init get_mtrr_state(void)
|
|||
mtrr_state.enabled = (lo & 0xc00) >> 10;
|
||||
|
||||
if (amd_special_default_mtrr()) {
|
||||
unsigned lo, hi;
|
||||
unsigned low, high;
|
||||
/* TOP_MEM2 */
|
||||
rdmsr(MSR_K8_TOP_MEM2, lo, hi);
|
||||
tom2 = hi;
|
||||
tom2 <<= 32;
|
||||
tom2 |= lo;
|
||||
tom2 &= 0xffffff8000000ULL;
|
||||
rdmsr(MSR_K8_TOP_MEM2, low, high);
|
||||
mtrr_tom2 = high;
|
||||
mtrr_tom2 <<= 32;
|
||||
mtrr_tom2 |= low;
|
||||
mtrr_tom2 &= 0xffffff800000ULL;
|
||||
}
|
||||
if (mtrr_show) {
|
||||
int high_width;
|
||||
|
@ -251,9 +265,9 @@ void __init get_mtrr_state(void)
|
|||
else
|
||||
printk(KERN_INFO "MTRR %u disabled\n", i);
|
||||
}
|
||||
if (tom2) {
|
||||
if (mtrr_tom2) {
|
||||
printk(KERN_INFO "TOM2: %016llx aka %lldM\n",
|
||||
tom2, tom2>>20);
|
||||
mtrr_tom2, mtrr_tom2>>20);
|
||||
}
|
||||
}
|
||||
mtrr_state_set = 1;
|
||||
|
@ -328,7 +342,7 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
|
|||
|
||||
if (lo != msrwords[0] || hi != msrwords[1]) {
|
||||
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
|
||||
boot_cpu_data.x86 == 15 &&
|
||||
(boot_cpu_data.x86 >= 0x0f && boot_cpu_data.x86 <= 0x11) &&
|
||||
((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK))
|
||||
k8_enable_fixed_iorrs();
|
||||
mtrr_wrmsr(msr, msrwords[0], msrwords[1]);
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
#include <linux/smp.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
#include <asm/e820.h>
|
||||
#include <asm/mtrr.h>
|
||||
|
@ -609,6 +610,787 @@ static struct sysdev_driver mtrr_sysdev_driver = {
|
|||
.resume = mtrr_restore,
|
||||
};
|
||||
|
||||
/* should be related to MTRR_VAR_RANGES nums */
|
||||
#define RANGE_NUM 256
|
||||
|
||||
struct res_range {
|
||||
unsigned long start;
|
||||
unsigned long end;
|
||||
};
|
||||
|
||||
static int __init
|
||||
add_range(struct res_range *range, int nr_range, unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
/* out of slots */
|
||||
if (nr_range >= RANGE_NUM)
|
||||
return nr_range;
|
||||
|
||||
range[nr_range].start = start;
|
||||
range[nr_range].end = end;
|
||||
|
||||
nr_range++;
|
||||
|
||||
return nr_range;
|
||||
}
|
||||
|
||||
static int __init
|
||||
add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* try to merge it with old one */
|
||||
for (i = 0; i < nr_range; i++) {
|
||||
unsigned long final_start, final_end;
|
||||
unsigned long common_start, common_end;
|
||||
|
||||
if (!range[i].end)
|
||||
continue;
|
||||
|
||||
common_start = max(range[i].start, start);
|
||||
common_end = min(range[i].end, end);
|
||||
if (common_start > common_end + 1)
|
||||
continue;
|
||||
|
||||
final_start = min(range[i].start, start);
|
||||
final_end = max(range[i].end, end);
|
||||
|
||||
range[i].start = final_start;
|
||||
range[i].end = final_end;
|
||||
return nr_range;
|
||||
}
|
||||
|
||||
/* need to add that */
|
||||
return add_range(range, nr_range, start, end);
|
||||
}
|
||||
|
||||
static void __init
|
||||
subtract_range(struct res_range *range, unsigned long start, unsigned long end)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (j = 0; j < RANGE_NUM; j++) {
|
||||
if (!range[j].end)
|
||||
continue;
|
||||
|
||||
if (start <= range[j].start && end >= range[j].end) {
|
||||
range[j].start = 0;
|
||||
range[j].end = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (start <= range[j].start && end < range[j].end &&
|
||||
range[j].start < end + 1) {
|
||||
range[j].start = end + 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if (start > range[j].start && end >= range[j].end &&
|
||||
range[j].end > start - 1) {
|
||||
range[j].end = start - 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (start > range[j].start && end < range[j].end) {
|
||||
/* find the new spare */
|
||||
for (i = 0; i < RANGE_NUM; i++) {
|
||||
if (range[i].end == 0)
|
||||
break;
|
||||
}
|
||||
if (i < RANGE_NUM) {
|
||||
range[i].end = range[j].end;
|
||||
range[i].start = end + 1;
|
||||
} else {
|
||||
printk(KERN_ERR "run of slot in ranges\n");
|
||||
}
|
||||
range[j].end = start - 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int __init cmp_range(const void *x1, const void *x2)
|
||||
{
|
||||
const struct res_range *r1 = x1;
|
||||
const struct res_range *r2 = x2;
|
||||
long start1, start2;
|
||||
|
||||
start1 = r1->start;
|
||||
start2 = r2->start;
|
||||
|
||||
return start1 - start2;
|
||||
}
|
||||
|
||||
struct var_mtrr_range_state {
|
||||
unsigned long base_pfn;
|
||||
unsigned long size_pfn;
|
||||
mtrr_type type;
|
||||
};
|
||||
|
||||
struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
|
||||
static int __initdata debug_print;
|
||||
|
||||
static int __init
|
||||
x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
|
||||
unsigned long extra_remove_base,
|
||||
unsigned long extra_remove_size)
|
||||
{
|
||||
unsigned long i, base, size;
|
||||
mtrr_type type;
|
||||
|
||||
for (i = 0; i < num_var_ranges; i++) {
|
||||
type = range_state[i].type;
|
||||
if (type != MTRR_TYPE_WRBACK)
|
||||
continue;
|
||||
base = range_state[i].base_pfn;
|
||||
size = range_state[i].size_pfn;
|
||||
nr_range = add_range_with_merge(range, nr_range, base,
|
||||
base + size - 1);
|
||||
}
|
||||
if (debug_print) {
|
||||
printk(KERN_DEBUG "After WB checking\n");
|
||||
for (i = 0; i < nr_range; i++)
|
||||
printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
|
||||
range[i].start, range[i].end + 1);
|
||||
}
|
||||
|
||||
/* take out UC ranges */
|
||||
for (i = 0; i < num_var_ranges; i++) {
|
||||
type = range_state[i].type;
|
||||
if (type != MTRR_TYPE_UNCACHABLE)
|
||||
continue;
|
||||
size = range_state[i].size_pfn;
|
||||
if (!size)
|
||||
continue;
|
||||
base = range_state[i].base_pfn;
|
||||
subtract_range(range, base, base + size - 1);
|
||||
}
|
||||
if (extra_remove_size)
|
||||
subtract_range(range, extra_remove_base,
|
||||
extra_remove_base + extra_remove_size - 1);
|
||||
|
||||
/* get new range num */
|
||||
nr_range = 0;
|
||||
for (i = 0; i < RANGE_NUM; i++) {
|
||||
if (!range[i].end)
|
||||
continue;
|
||||
nr_range++;
|
||||
}
|
||||
if (debug_print) {
|
||||
printk(KERN_DEBUG "After UC checking\n");
|
||||
for (i = 0; i < nr_range; i++)
|
||||
printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
|
||||
range[i].start, range[i].end + 1);
|
||||
}
|
||||
|
||||
/* sort the ranges */
|
||||
sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
|
||||
if (debug_print) {
|
||||
printk(KERN_DEBUG "After sorting\n");
|
||||
for (i = 0; i < nr_range; i++)
|
||||
printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
|
||||
range[i].start, range[i].end + 1);
|
||||
}
|
||||
|
||||
/* clear those is not used */
|
||||
for (i = nr_range; i < RANGE_NUM; i++)
|
||||
memset(&range[i], 0, sizeof(range[i]));
|
||||
|
||||
return nr_range;
|
||||
}
|
||||
|
||||
static struct res_range __initdata range[RANGE_NUM];
|
||||
|
||||
#ifdef CONFIG_MTRR_SANITIZER
|
||||
|
||||
static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
|
||||
{
|
||||
unsigned long sum;
|
||||
int i;
|
||||
|
||||
sum = 0;
|
||||
for (i = 0; i < nr_range; i++)
|
||||
sum += range[i].end + 1 - range[i].start;
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int enable_mtrr_cleanup __initdata =
|
||||
CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
|
||||
|
||||
static int __init disable_mtrr_cleanup_setup(char *str)
|
||||
{
|
||||
if (enable_mtrr_cleanup != -1)
|
||||
enable_mtrr_cleanup = 0;
|
||||
return 0;
|
||||
}
|
||||
early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
|
||||
|
||||
static int __init enable_mtrr_cleanup_setup(char *str)
|
||||
{
|
||||
if (enable_mtrr_cleanup != -1)
|
||||
enable_mtrr_cleanup = 1;
|
||||
return 0;
|
||||
}
|
||||
early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
|
||||
|
||||
struct var_mtrr_state {
|
||||
unsigned long range_startk;
|
||||
unsigned long range_sizek;
|
||||
unsigned long chunk_sizek;
|
||||
unsigned long gran_sizek;
|
||||
unsigned int reg;
|
||||
};
|
||||
|
||||
static void __init
|
||||
set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
|
||||
unsigned char type, unsigned int address_bits)
|
||||
{
|
||||
u32 base_lo, base_hi, mask_lo, mask_hi;
|
||||
u64 base, mask;
|
||||
|
||||
if (!sizek) {
|
||||
fill_mtrr_var_range(reg, 0, 0, 0, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
mask = (1ULL << address_bits) - 1;
|
||||
mask &= ~((((u64)sizek) << 10) - 1);
|
||||
|
||||
base = ((u64)basek) << 10;
|
||||
|
||||
base |= type;
|
||||
mask |= 0x800;
|
||||
|
||||
base_lo = base & ((1ULL<<32) - 1);
|
||||
base_hi = base >> 32;
|
||||
|
||||
mask_lo = mask & ((1ULL<<32) - 1);
|
||||
mask_hi = mask >> 32;
|
||||
|
||||
fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
|
||||
}
|
||||
|
||||
static void __init
|
||||
save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
|
||||
unsigned char type)
|
||||
{
|
||||
range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
|
||||
range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
|
||||
range_state[reg].type = type;
|
||||
}
|
||||
|
||||
static void __init
|
||||
set_var_mtrr_all(unsigned int address_bits)
|
||||
{
|
||||
unsigned long basek, sizek;
|
||||
unsigned char type;
|
||||
unsigned int reg;
|
||||
|
||||
for (reg = 0; reg < num_var_ranges; reg++) {
|
||||
basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
|
||||
sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
|
||||
type = range_state[reg].type;
|
||||
|
||||
set_var_mtrr(reg, basek, sizek, type, address_bits);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int __init
|
||||
range_to_mtrr(unsigned int reg, unsigned long range_startk,
|
||||
unsigned long range_sizek, unsigned char type)
|
||||
{
|
||||
if (!range_sizek || (reg >= num_var_ranges))
|
||||
return reg;
|
||||
|
||||
while (range_sizek) {
|
||||
unsigned long max_align, align;
|
||||
unsigned long sizek;
|
||||
|
||||
/* Compute the maximum size I can make a range */
|
||||
if (range_startk)
|
||||
max_align = ffs(range_startk) - 1;
|
||||
else
|
||||
max_align = 32;
|
||||
align = fls(range_sizek) - 1;
|
||||
if (align > max_align)
|
||||
align = max_align;
|
||||
|
||||
sizek = 1 << align;
|
||||
if (debug_print)
|
||||
printk(KERN_DEBUG "Setting variable MTRR %d, "
|
||||
"base: %ldMB, range: %ldMB, type %s\n",
|
||||
reg, range_startk >> 10, sizek >> 10,
|
||||
(type == MTRR_TYPE_UNCACHABLE)?"UC":
|
||||
((type == MTRR_TYPE_WRBACK)?"WB":"Other")
|
||||
);
|
||||
save_var_mtrr(reg++, range_startk, sizek, type);
|
||||
range_startk += sizek;
|
||||
range_sizek -= sizek;
|
||||
if (reg >= num_var_ranges)
|
||||
break;
|
||||
}
|
||||
return reg;
|
||||
}
|
||||
|
||||
static unsigned __init
|
||||
range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
|
||||
unsigned long sizek)
|
||||
{
|
||||
unsigned long hole_basek, hole_sizek;
|
||||
unsigned long second_basek, second_sizek;
|
||||
unsigned long range0_basek, range0_sizek;
|
||||
unsigned long range_basek, range_sizek;
|
||||
unsigned long chunk_sizek;
|
||||
unsigned long gran_sizek;
|
||||
|
||||
hole_basek = 0;
|
||||
hole_sizek = 0;
|
||||
second_basek = 0;
|
||||
second_sizek = 0;
|
||||
chunk_sizek = state->chunk_sizek;
|
||||
gran_sizek = state->gran_sizek;
|
||||
|
||||
/* align with gran size, prevent small block used up MTRRs */
|
||||
range_basek = ALIGN(state->range_startk, gran_sizek);
|
||||
if ((range_basek > basek) && basek)
|
||||
return second_sizek;
|
||||
state->range_sizek -= (range_basek - state->range_startk);
|
||||
range_sizek = ALIGN(state->range_sizek, gran_sizek);
|
||||
|
||||
while (range_sizek > state->range_sizek) {
|
||||
range_sizek -= gran_sizek;
|
||||
if (!range_sizek)
|
||||
return 0;
|
||||
}
|
||||
state->range_sizek = range_sizek;
|
||||
|
||||
/* try to append some small hole */
|
||||
range0_basek = state->range_startk;
|
||||
range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
|
||||
if (range0_sizek == state->range_sizek) {
|
||||
if (debug_print)
|
||||
printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
|
||||
range0_basek<<10,
|
||||
(range0_basek + state->range_sizek)<<10);
|
||||
state->reg = range_to_mtrr(state->reg, range0_basek,
|
||||
state->range_sizek, MTRR_TYPE_WRBACK);
|
||||
return 0;
|
||||
}
|
||||
|
||||
range0_sizek -= chunk_sizek;
|
||||
if (range0_sizek && sizek) {
|
||||
while (range0_basek + range0_sizek > (basek + sizek)) {
|
||||
range0_sizek -= chunk_sizek;
|
||||
if (!range0_sizek)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (range0_sizek) {
|
||||
if (debug_print)
|
||||
printk(KERN_DEBUG "range0: %016lx - %016lx\n",
|
||||
range0_basek<<10,
|
||||
(range0_basek + range0_sizek)<<10);
|
||||
state->reg = range_to_mtrr(state->reg, range0_basek,
|
||||
range0_sizek, MTRR_TYPE_WRBACK);
|
||||
|
||||
}
|
||||
|
||||
range_basek = range0_basek + range0_sizek;
|
||||
range_sizek = chunk_sizek;
|
||||
|
||||
if (range_basek + range_sizek > basek &&
|
||||
range_basek + range_sizek <= (basek + sizek)) {
|
||||
/* one hole */
|
||||
second_basek = basek;
|
||||
second_sizek = range_basek + range_sizek - basek;
|
||||
}
|
||||
|
||||
/* if last piece, only could one hole near end */
|
||||
if ((second_basek || !basek) &&
|
||||
range_sizek - (state->range_sizek - range0_sizek) - second_sizek <
|
||||
(chunk_sizek >> 1)) {
|
||||
/*
|
||||
* one hole in middle (second_sizek is 0) or at end
|
||||
* (second_sizek is 0 )
|
||||
*/
|
||||
hole_sizek = range_sizek - (state->range_sizek - range0_sizek)
|
||||
- second_sizek;
|
||||
hole_basek = range_basek + range_sizek - hole_sizek
|
||||
- second_sizek;
|
||||
} else {
|
||||
/* fallback for big hole, or several holes */
|
||||
range_sizek = state->range_sizek - range0_sizek;
|
||||
second_basek = 0;
|
||||
second_sizek = 0;
|
||||
}
|
||||
|
||||
if (debug_print)
|
||||
printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10,
|
||||
(range_basek + range_sizek)<<10);
|
||||
state->reg = range_to_mtrr(state->reg, range_basek, range_sizek,
|
||||
MTRR_TYPE_WRBACK);
|
||||
if (hole_sizek) {
|
||||
if (debug_print)
|
||||
printk(KERN_DEBUG "hole: %016lx - %016lx\n",
|
||||
hole_basek<<10, (hole_basek + hole_sizek)<<10);
|
||||
state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek,
|
||||
MTRR_TYPE_UNCACHABLE);
|
||||
|
||||
}
|
||||
|
||||
return second_sizek;
|
||||
}
|
||||
|
||||
static void __init
|
||||
set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
|
||||
unsigned long size_pfn)
|
||||
{
|
||||
unsigned long basek, sizek;
|
||||
unsigned long second_sizek = 0;
|
||||
|
||||
if (state->reg >= num_var_ranges)
|
||||
return;
|
||||
|
||||
basek = base_pfn << (PAGE_SHIFT - 10);
|
||||
sizek = size_pfn << (PAGE_SHIFT - 10);
|
||||
|
||||
/* See if I can merge with the last range */
|
||||
if ((basek <= 1024) ||
|
||||
(state->range_startk + state->range_sizek == basek)) {
|
||||
unsigned long endk = basek + sizek;
|
||||
state->range_sizek = endk - state->range_startk;
|
||||
return;
|
||||
}
|
||||
/* Write the range mtrrs */
|
||||
if (state->range_sizek != 0)
|
||||
second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
|
||||
|
||||
/* Allocate an msr */
|
||||
state->range_startk = basek + second_sizek;
|
||||
state->range_sizek = sizek - second_sizek;
|
||||
}
|
||||
|
||||
/* mininum size of mtrr block that can take hole */
|
||||
static u64 mtrr_chunk_size __initdata = (256ULL<<20);
|
||||
|
||||
static int __init parse_mtrr_chunk_size_opt(char *p)
|
||||
{
|
||||
if (!p)
|
||||
return -EINVAL;
|
||||
mtrr_chunk_size = memparse(p, &p);
|
||||
return 0;
|
||||
}
|
||||
early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
|
||||
|
||||
/* granity of mtrr of block */
|
||||
static u64 mtrr_gran_size __initdata;
|
||||
|
||||
static int __init parse_mtrr_gran_size_opt(char *p)
|
||||
{
|
||||
if (!p)
|
||||
return -EINVAL;
|
||||
mtrr_gran_size = memparse(p, &p);
|
||||
return 0;
|
||||
}
|
||||
early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
|
||||
|
||||
static int nr_mtrr_spare_reg __initdata =
|
||||
CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
|
||||
|
||||
static int __init parse_mtrr_spare_reg(char *arg)
|
||||
{
|
||||
if (arg)
|
||||
nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
|
||||
|
||||
static int __init
|
||||
x86_setup_var_mtrrs(struct res_range *range, int nr_range,
|
||||
u64 chunk_size, u64 gran_size)
|
||||
{
|
||||
struct var_mtrr_state var_state;
|
||||
int i;
|
||||
int num_reg;
|
||||
|
||||
var_state.range_startk = 0;
|
||||
var_state.range_sizek = 0;
|
||||
var_state.reg = 0;
|
||||
var_state.chunk_sizek = chunk_size >> 10;
|
||||
var_state.gran_sizek = gran_size >> 10;
|
||||
|
||||
memset(range_state, 0, sizeof(range_state));
|
||||
|
||||
/* Write the range etc */
|
||||
for (i = 0; i < nr_range; i++)
|
||||
set_var_mtrr_range(&var_state, range[i].start,
|
||||
range[i].end - range[i].start + 1);
|
||||
|
||||
/* Write the last range */
|
||||
if (var_state.range_sizek != 0)
|
||||
range_to_mtrr_with_hole(&var_state, 0, 0);
|
||||
|
||||
num_reg = var_state.reg;
|
||||
/* Clear out the extra MTRR's */
|
||||
while (var_state.reg < num_var_ranges) {
|
||||
save_var_mtrr(var_state.reg, 0, 0, 0);
|
||||
var_state.reg++;
|
||||
}
|
||||
|
||||
return num_reg;
|
||||
}
|
||||
|
||||
struct mtrr_cleanup_result {
|
||||
unsigned long gran_sizek;
|
||||
unsigned long chunk_sizek;
|
||||
unsigned long lose_cover_sizek;
|
||||
unsigned int num_reg;
|
||||
int bad;
|
||||
};
|
||||
|
||||
/*
|
||||
* gran_size: 1M, 2M, ..., 2G
|
||||
* chunk size: gran_size, ..., 4G
|
||||
* so we need (2+13)*6
|
||||
*/
|
||||
#define NUM_RESULT 90
|
||||
#define PSHIFT (PAGE_SHIFT - 10)
|
||||
|
||||
static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
|
||||
static struct res_range __initdata range_new[RANGE_NUM];
|
||||
static unsigned long __initdata min_loss_pfn[RANGE_NUM];
|
||||
|
||||
static int __init mtrr_cleanup(unsigned address_bits)
|
||||
{
|
||||
unsigned long extra_remove_base, extra_remove_size;
|
||||
unsigned long i, base, size, def, dummy;
|
||||
mtrr_type type;
|
||||
int nr_range, nr_range_new;
|
||||
u64 chunk_size, gran_size;
|
||||
unsigned long range_sums, range_sums_new;
|
||||
int index_good;
|
||||
int num_reg_good;
|
||||
|
||||
/* extra one for all 0 */
|
||||
int num[MTRR_NUM_TYPES + 1];
|
||||
|
||||
if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
|
||||
return 0;
|
||||
rdmsr(MTRRdefType_MSR, def, dummy);
|
||||
def &= 0xff;
|
||||
if (def != MTRR_TYPE_UNCACHABLE)
|
||||
return 0;
|
||||
|
||||
/* get it and store it aside */
|
||||
memset(range_state, 0, sizeof(range_state));
|
||||
for (i = 0; i < num_var_ranges; i++) {
|
||||
mtrr_if->get(i, &base, &size, &type);
|
||||
range_state[i].base_pfn = base;
|
||||
range_state[i].size_pfn = size;
|
||||
range_state[i].type = type;
|
||||
}
|
||||
|
||||
/* check entries number */
|
||||
memset(num, 0, sizeof(num));
|
||||
for (i = 0; i < num_var_ranges; i++) {
|
||||
type = range_state[i].type;
|
||||
size = range_state[i].size_pfn;
|
||||
if (type >= MTRR_NUM_TYPES)
|
||||
continue;
|
||||
if (!size)
|
||||
type = MTRR_NUM_TYPES;
|
||||
num[type]++;
|
||||
}
|
||||
|
||||
/* check if we got UC entries */
|
||||
if (!num[MTRR_TYPE_UNCACHABLE])
|
||||
return 0;
|
||||
|
||||
/* check if we only had WB and UC */
|
||||
if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
|
||||
num_var_ranges - num[MTRR_NUM_TYPES])
|
||||
return 0;
|
||||
|
||||
memset(range, 0, sizeof(range));
|
||||
extra_remove_size = 0;
|
||||
if (mtrr_tom2) {
|
||||
extra_remove_base = 1 << (32 - PAGE_SHIFT);
|
||||
extra_remove_size =
|
||||
(mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
|
||||
}
|
||||
nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
|
||||
extra_remove_size);
|
||||
range_sums = sum_ranges(range, nr_range);
|
||||
printk(KERN_INFO "total RAM coverred: %ldM\n",
|
||||
range_sums >> (20 - PAGE_SHIFT));
|
||||
|
||||
if (mtrr_chunk_size && mtrr_gran_size) {
|
||||
int num_reg;
|
||||
|
||||
debug_print = 1;
|
||||
/* convert ranges to var ranges state */
|
||||
num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
|
||||
mtrr_gran_size);
|
||||
|
||||
/* we got new setting in range_state, check it */
|
||||
memset(range_new, 0, sizeof(range_new));
|
||||
nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
|
||||
extra_remove_base,
|
||||
extra_remove_size);
|
||||
range_sums_new = sum_ranges(range_new, nr_range_new);
|
||||
|
||||
i = 0;
|
||||
result[i].chunk_sizek = mtrr_chunk_size >> 10;
|
||||
result[i].gran_sizek = mtrr_gran_size >> 10;
|
||||
result[i].num_reg = num_reg;
|
||||
if (range_sums < range_sums_new) {
|
||||
result[i].lose_cover_sizek =
|
||||
(range_sums_new - range_sums) << PSHIFT;
|
||||
result[i].bad = 1;
|
||||
} else
|
||||
result[i].lose_cover_sizek =
|
||||
(range_sums - range_sums_new) << PSHIFT;
|
||||
|
||||
printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
|
||||
result[i].bad?"*BAD*":" ", result[i].gran_sizek >> 10,
|
||||
result[i].chunk_sizek >> 10);
|
||||
printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n",
|
||||
result[i].num_reg, result[i].bad?"-":"",
|
||||
result[i].lose_cover_sizek >> 10);
|
||||
if (!result[i].bad) {
|
||||
set_var_mtrr_all(address_bits);
|
||||
return 1;
|
||||
}
|
||||
printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
|
||||
"will find optimal one\n");
|
||||
debug_print = 0;
|
||||
memset(result, 0, sizeof(result[0]));
|
||||
}
|
||||
|
||||
i = 0;
|
||||
memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
|
||||
memset(result, 0, sizeof(result));
|
||||
for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) {
|
||||
for (chunk_size = gran_size; chunk_size < (1ULL<<33);
|
||||
chunk_size <<= 1) {
|
||||
int num_reg;
|
||||
|
||||
if (debug_print)
|
||||
printk(KERN_INFO
|
||||
"\ngran_size: %lldM chunk_size_size: %lldM\n",
|
||||
gran_size >> 20, chunk_size >> 20);
|
||||
if (i >= NUM_RESULT)
|
||||
continue;
|
||||
|
||||
/* convert ranges to var ranges state */
|
||||
num_reg = x86_setup_var_mtrrs(range, nr_range,
|
||||
chunk_size, gran_size);
|
||||
|
||||
/* we got new setting in range_state, check it */
|
||||
memset(range_new, 0, sizeof(range_new));
|
||||
nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
|
||||
extra_remove_base, extra_remove_size);
|
||||
range_sums_new = sum_ranges(range_new, nr_range_new);
|
||||
|
||||
result[i].chunk_sizek = chunk_size >> 10;
|
||||
result[i].gran_sizek = gran_size >> 10;
|
||||
result[i].num_reg = num_reg;
|
||||
if (range_sums < range_sums_new) {
|
||||
result[i].lose_cover_sizek =
|
||||
(range_sums_new - range_sums) << PSHIFT;
|
||||
result[i].bad = 1;
|
||||
} else
|
||||
result[i].lose_cover_sizek =
|
||||
(range_sums - range_sums_new) << PSHIFT;
|
||||
|
||||
/* double check it */
|
||||
if (!result[i].bad && !result[i].lose_cover_sizek) {
|
||||
if (nr_range_new != nr_range ||
|
||||
memcmp(range, range_new, sizeof(range)))
|
||||
result[i].bad = 1;
|
||||
}
|
||||
|
||||
if (!result[i].bad && (range_sums - range_sums_new <
|
||||
min_loss_pfn[num_reg])) {
|
||||
min_loss_pfn[num_reg] =
|
||||
range_sums - range_sums_new;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
/* print out all */
|
||||
for (i = 0; i < NUM_RESULT; i++) {
|
||||
printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
|
||||
result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10,
|
||||
result[i].chunk_sizek >> 10);
|
||||
printk(KERN_CONT "num_reg: %d \tlose RAM: %s%ldM\n",
|
||||
result[i].num_reg, result[i].bad?"-":"",
|
||||
result[i].lose_cover_sizek >> 10);
|
||||
}
|
||||
|
||||
/* try to find the optimal index */
|
||||
if (nr_mtrr_spare_reg >= num_var_ranges)
|
||||
nr_mtrr_spare_reg = num_var_ranges - 1;
|
||||
num_reg_good = -1;
|
||||
for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
|
||||
if (!min_loss_pfn[i]) {
|
||||
num_reg_good = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
index_good = -1;
|
||||
if (num_reg_good != -1) {
|
||||
for (i = 0; i < NUM_RESULT; i++) {
|
||||
if (!result[i].bad &&
|
||||
result[i].num_reg == num_reg_good &&
|
||||
!result[i].lose_cover_sizek) {
|
||||
index_good = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (index_good != -1) {
|
||||
printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
|
||||
i = index_good;
|
||||
printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t",
|
||||
result[i].gran_sizek >> 10,
|
||||
result[i].chunk_sizek >> 10);
|
||||
printk(KERN_CONT "num_reg: %d \tlose RAM: %ldM\n",
|
||||
result[i].num_reg,
|
||||
result[i].lose_cover_sizek >> 10);
|
||||
/* convert ranges to var ranges state */
|
||||
chunk_size = result[i].chunk_sizek;
|
||||
chunk_size <<= 10;
|
||||
gran_size = result[i].gran_sizek;
|
||||
gran_size <<= 10;
|
||||
debug_print = 1;
|
||||
x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
|
||||
set_var_mtrr_all(address_bits);
|
||||
return 1;
|
||||
}
|
||||
|
||||
printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
|
||||
printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
static int __init mtrr_cleanup(unsigned address_bits)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int __initdata changed_by_mtrr_cleanup;
|
||||
|
||||
static int disable_mtrr_trim;
|
||||
|
||||
static int __init disable_mtrr_trim_setup(char *str)
|
||||
|
@ -648,6 +1430,19 @@ int __init amd_special_default_mtrr(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static u64 __init real_trim_memory(unsigned long start_pfn,
|
||||
unsigned long limit_pfn)
|
||||
{
|
||||
u64 trim_start, trim_size;
|
||||
trim_start = start_pfn;
|
||||
trim_start <<= PAGE_SHIFT;
|
||||
trim_size = limit_pfn;
|
||||
trim_size <<= PAGE_SHIFT;
|
||||
trim_size -= trim_start;
|
||||
|
||||
return e820_update_range(trim_start, trim_size, E820_RAM,
|
||||
E820_RESERVED);
|
||||
}
|
||||
/**
|
||||
* mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
|
||||
* @end_pfn: ending page frame number
|
||||
|
@ -663,8 +1458,11 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
|
|||
{
|
||||
unsigned long i, base, size, highest_pfn = 0, def, dummy;
|
||||
mtrr_type type;
|
||||
u64 trim_start, trim_size;
|
||||
int nr_range;
|
||||
u64 total_trim_size;
|
||||
|
||||
/* extra one for all 0 */
|
||||
int num[MTRR_NUM_TYPES + 1];
|
||||
/*
|
||||
* Make sure we only trim uncachable memory on machines that
|
||||
* support the Intel MTRR architecture:
|
||||
|
@ -676,14 +1474,22 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
|
|||
if (def != MTRR_TYPE_UNCACHABLE)
|
||||
return 0;
|
||||
|
||||
if (amd_special_default_mtrr())
|
||||
return 0;
|
||||
/* get it and store it aside */
|
||||
memset(range_state, 0, sizeof(range_state));
|
||||
for (i = 0; i < num_var_ranges; i++) {
|
||||
mtrr_if->get(i, &base, &size, &type);
|
||||
range_state[i].base_pfn = base;
|
||||
range_state[i].size_pfn = size;
|
||||
range_state[i].type = type;
|
||||
}
|
||||
|
||||
/* Find highest cached pfn */
|
||||
for (i = 0; i < num_var_ranges; i++) {
|
||||
mtrr_if->get(i, &base, &size, &type);
|
||||
type = range_state[i].type;
|
||||
if (type != MTRR_TYPE_WRBACK)
|
||||
continue;
|
||||
base = range_state[i].base_pfn;
|
||||
size = range_state[i].size_pfn;
|
||||
if (highest_pfn < base + size)
|
||||
highest_pfn = base + size;
|
||||
}
|
||||
|
@ -698,22 +1504,65 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
|
|||
return 0;
|
||||
}
|
||||
|
||||
if (highest_pfn < end_pfn) {
|
||||
printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
|
||||
" all of memory, losing %luMB of RAM.\n",
|
||||
(end_pfn - highest_pfn) >> (20 - PAGE_SHIFT));
|
||||
/* check entries number */
|
||||
memset(num, 0, sizeof(num));
|
||||
for (i = 0; i < num_var_ranges; i++) {
|
||||
type = range_state[i].type;
|
||||
if (type >= MTRR_NUM_TYPES)
|
||||
continue;
|
||||
size = range_state[i].size_pfn;
|
||||
if (!size)
|
||||
type = MTRR_NUM_TYPES;
|
||||
num[type]++;
|
||||
}
|
||||
|
||||
WARN_ON(1);
|
||||
/* no entry for WB? */
|
||||
if (!num[MTRR_TYPE_WRBACK])
|
||||
return 0;
|
||||
|
||||
/* check if we only had WB and UC */
|
||||
if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
|
||||
num_var_ranges - num[MTRR_NUM_TYPES])
|
||||
return 0;
|
||||
|
||||
memset(range, 0, sizeof(range));
|
||||
nr_range = 0;
|
||||
if (mtrr_tom2) {
|
||||
range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
|
||||
range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
|
||||
if (highest_pfn < range[nr_range].end + 1)
|
||||
highest_pfn = range[nr_range].end + 1;
|
||||
nr_range++;
|
||||
}
|
||||
nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
|
||||
|
||||
total_trim_size = 0;
|
||||
/* check the head */
|
||||
if (range[0].start)
|
||||
total_trim_size += real_trim_memory(0, range[0].start);
|
||||
/* check the holes */
|
||||
for (i = 0; i < nr_range - 1; i++) {
|
||||
if (range[i].end + 1 < range[i+1].start)
|
||||
total_trim_size += real_trim_memory(range[i].end + 1,
|
||||
range[i+1].start);
|
||||
}
|
||||
/* check the top */
|
||||
i = nr_range - 1;
|
||||
if (range[i].end + 1 < end_pfn)
|
||||
total_trim_size += real_trim_memory(range[i].end + 1,
|
||||
end_pfn);
|
||||
|
||||
if (total_trim_size) {
|
||||
printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
|
||||
" all of memory, losing %lluMB of RAM.\n",
|
||||
total_trim_size >> 20);
|
||||
|
||||
if (!changed_by_mtrr_cleanup)
|
||||
WARN_ON(1);
|
||||
|
||||
printk(KERN_INFO "update e820 for mtrr\n");
|
||||
trim_start = highest_pfn;
|
||||
trim_start <<= PAGE_SHIFT;
|
||||
trim_size = end_pfn;
|
||||
trim_size <<= PAGE_SHIFT;
|
||||
trim_size -= trim_start;
|
||||
update_memory_range(trim_start, trim_size, E820_RAM,
|
||||
E820_RESERVED);
|
||||
update_e820();
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -729,18 +1578,21 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
|
|||
*/
|
||||
void __init mtrr_bp_init(void)
|
||||
{
|
||||
u32 phys_addr;
|
||||
init_ifs();
|
||||
|
||||
phys_addr = 32;
|
||||
|
||||
if (cpu_has_mtrr) {
|
||||
mtrr_if = &generic_mtrr_ops;
|
||||
size_or_mask = 0xff000000; /* 36 bits */
|
||||
size_and_mask = 0x00f00000;
|
||||
phys_addr = 36;
|
||||
|
||||
/* This is an AMD specific MSR, but we assume(hope?) that
|
||||
Intel will implement it to when they extend the address
|
||||
bus of the Xeon. */
|
||||
if (cpuid_eax(0x80000000) >= 0x80000008) {
|
||||
u32 phys_addr;
|
||||
phys_addr = cpuid_eax(0x80000008) & 0xff;
|
||||
/* CPUID workaround for Intel 0F33/0F34 CPU */
|
||||
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
|
||||
|
@ -758,6 +1610,7 @@ void __init mtrr_bp_init(void)
|
|||
don't support PAE */
|
||||
size_or_mask = 0xfff00000; /* 32 bits */
|
||||
size_and_mask = 0;
|
||||
phys_addr = 32;
|
||||
}
|
||||
} else {
|
||||
switch (boot_cpu_data.x86_vendor) {
|
||||
|
@ -791,8 +1644,15 @@ void __init mtrr_bp_init(void)
|
|||
if (mtrr_if) {
|
||||
set_num_var_ranges();
|
||||
init_table();
|
||||
if (use_intel())
|
||||
if (use_intel()) {
|
||||
get_mtrr_state();
|
||||
|
||||
if (mtrr_cleanup(phys_addr)) {
|
||||
changed_by_mtrr_cleanup = 1;
|
||||
mtrr_if->set_all();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -829,9 +1689,10 @@ static int __init mtrr_init_finialize(void)
|
|||
{
|
||||
if (!mtrr_if)
|
||||
return 0;
|
||||
if (use_intel())
|
||||
mtrr_state_warn();
|
||||
else {
|
||||
if (use_intel()) {
|
||||
if (!changed_by_mtrr_cleanup)
|
||||
mtrr_state_warn();
|
||||
} else {
|
||||
/* The CPUs haven't MTRR and seem to not support SMP. They have
|
||||
* specific drivers, we use a tricky method to support
|
||||
* suspend/resume for them.
|
||||
|
|
|
@ -81,6 +81,8 @@ void set_mtrr_done(struct set_mtrr_context *ctxt);
|
|||
void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
|
||||
void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
|
||||
|
||||
void fill_mtrr_var_range(unsigned int index,
|
||||
u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
|
||||
void get_mtrr_state(void);
|
||||
|
||||
extern void set_mtrr_ops(struct mtrr_ops * ops);
|
||||
|
@ -92,6 +94,7 @@ extern struct mtrr_ops * mtrr_if;
|
|||
#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1)
|
||||
|
||||
extern unsigned int num_var_ranges;
|
||||
extern u64 mtrr_tom2;
|
||||
|
||||
void mtrr_state_warn(void);
|
||||
const char *mtrr_attrib_to_str(int x);
|
||||
|
|
|
@ -1,11 +1,15 @@
|
|||
/* local apic based NMI watchdog for various CPUs.
|
||||
This file also handles reservation of performance counters for coordination
|
||||
with other users (like oprofile).
|
||||
|
||||
Note that these events normally don't tick when the CPU idles. This means
|
||||
the frequency varies with CPU load.
|
||||
|
||||
Original code for K7/P6 written by Keith Owens */
|
||||
/*
|
||||
* local apic based NMI watchdog for various CPUs.
|
||||
*
|
||||
* This file also handles reservation of performance counters for coordination
|
||||
* with other users (like oprofile).
|
||||
*
|
||||
* Note that these events normally don't tick when the CPU idles. This means
|
||||
* the frequency varies with CPU load.
|
||||
*
|
||||
* Original code for K7/P6 written by Keith Owens
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/module.h>
|
||||
|
@ -36,12 +40,16 @@ struct wd_ops {
|
|||
|
||||
static const struct wd_ops *wd_ops;
|
||||
|
||||
/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
|
||||
* offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
|
||||
/*
|
||||
* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
|
||||
* offset from MSR_P4_BSU_ESCR0.
|
||||
*
|
||||
* It will be the max for all platforms (for now)
|
||||
*/
|
||||
#define NMI_MAX_COUNTER_BITS 66
|
||||
|
||||
/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
|
||||
/*
|
||||
* perfctr_nmi_owner tracks the ownership of the perfctr registers:
|
||||
* evtsel_nmi_owner tracks the ownership of the event selection
|
||||
* - different performance counters/ event selection may be reserved for
|
||||
* different subsystems this reservation system just tries to coordinate
|
||||
|
@ -73,8 +81,10 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* converts an msr to an appropriate reservation bit */
|
||||
/* returns the bit offset of the event selection register */
|
||||
/*
|
||||
* converts an msr to an appropriate reservation bit
|
||||
* returns the bit offset of the event selection register
|
||||
*/
|
||||
static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
|
||||
{
|
||||
/* returns the bit offset of the event selection register */
|
||||
|
@ -114,6 +124,7 @@ int avail_to_resrv_perfctr_nmi(unsigned int msr)
|
|||
|
||||
return (!test_bit(counter, perfctr_nmi_owner));
|
||||
}
|
||||
EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
|
||||
|
||||
int reserve_perfctr_nmi(unsigned int msr)
|
||||
{
|
||||
|
@ -128,6 +139,7 @@ int reserve_perfctr_nmi(unsigned int msr)
|
|||
return 1;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(reserve_perfctr_nmi);
|
||||
|
||||
void release_perfctr_nmi(unsigned int msr)
|
||||
{
|
||||
|
@ -140,6 +152,7 @@ void release_perfctr_nmi(unsigned int msr)
|
|||
|
||||
clear_bit(counter, perfctr_nmi_owner);
|
||||
}
|
||||
EXPORT_SYMBOL(release_perfctr_nmi);
|
||||
|
||||
int reserve_evntsel_nmi(unsigned int msr)
|
||||
{
|
||||
|
@ -154,6 +167,7 @@ int reserve_evntsel_nmi(unsigned int msr)
|
|||
return 1;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(reserve_evntsel_nmi);
|
||||
|
||||
void release_evntsel_nmi(unsigned int msr)
|
||||
{
|
||||
|
@ -166,11 +180,6 @@ void release_evntsel_nmi(unsigned int msr)
|
|||
|
||||
clear_bit(counter, evntsel_nmi_owner);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
|
||||
EXPORT_SYMBOL(reserve_perfctr_nmi);
|
||||
EXPORT_SYMBOL(release_perfctr_nmi);
|
||||
EXPORT_SYMBOL(reserve_evntsel_nmi);
|
||||
EXPORT_SYMBOL(release_evntsel_nmi);
|
||||
|
||||
void disable_lapic_nmi_watchdog(void)
|
||||
|
@ -181,7 +190,9 @@ void disable_lapic_nmi_watchdog(void)
|
|||
return;
|
||||
|
||||
on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
|
||||
wd_ops->unreserve();
|
||||
|
||||
if (wd_ops)
|
||||
wd_ops->unreserve();
|
||||
|
||||
BUG_ON(atomic_read(&nmi_active) != 0);
|
||||
}
|
||||
|
@ -232,8 +243,8 @@ static unsigned int adjust_for_32bit_ctr(unsigned int hz)
|
|||
return retval;
|
||||
}
|
||||
|
||||
static void
|
||||
write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi_hz)
|
||||
static void write_watchdog_counter(unsigned int perfctr_msr,
|
||||
const char *descr, unsigned nmi_hz)
|
||||
{
|
||||
u64 count = (u64)cpu_khz * 1000;
|
||||
|
||||
|
@ -244,7 +255,7 @@ write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi
|
|||
}
|
||||
|
||||
static void write_watchdog_counter32(unsigned int perfctr_msr,
|
||||
const char *descr, unsigned nmi_hz)
|
||||
const char *descr, unsigned nmi_hz)
|
||||
{
|
||||
u64 count = (u64)cpu_khz * 1000;
|
||||
|
||||
|
@ -254,9 +265,10 @@ static void write_watchdog_counter32(unsigned int perfctr_msr,
|
|||
wrmsr(perfctr_msr, (u32)(-count), 0);
|
||||
}
|
||||
|
||||
/* AMD K7/K8/Family10h/Family11h support. AMD keeps this interface
|
||||
nicely stable so there is not much variety */
|
||||
|
||||
/*
|
||||
* AMD K7/K8/Family10h/Family11h support.
|
||||
* AMD keeps this interface nicely stable so there is not much variety
|
||||
*/
|
||||
#define K7_EVNTSEL_ENABLE (1 << 22)
|
||||
#define K7_EVNTSEL_INT (1 << 20)
|
||||
#define K7_EVNTSEL_OS (1 << 17)
|
||||
|
@ -289,7 +301,7 @@ static int setup_k7_watchdog(unsigned nmi_hz)
|
|||
|
||||
wd->perfctr_msr = perfctr_msr;
|
||||
wd->evntsel_msr = evntsel_msr;
|
||||
wd->cccr_msr = 0; //unused
|
||||
wd->cccr_msr = 0; /* unused */
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -325,18 +337,19 @@ static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
|
|||
}
|
||||
|
||||
static const struct wd_ops k7_wd_ops = {
|
||||
.reserve = single_msr_reserve,
|
||||
.unreserve = single_msr_unreserve,
|
||||
.setup = setup_k7_watchdog,
|
||||
.rearm = single_msr_rearm,
|
||||
.stop = single_msr_stop_watchdog,
|
||||
.perfctr = MSR_K7_PERFCTR0,
|
||||
.evntsel = MSR_K7_EVNTSEL0,
|
||||
.checkbit = 1ULL<<47,
|
||||
.reserve = single_msr_reserve,
|
||||
.unreserve = single_msr_unreserve,
|
||||
.setup = setup_k7_watchdog,
|
||||
.rearm = single_msr_rearm,
|
||||
.stop = single_msr_stop_watchdog,
|
||||
.perfctr = MSR_K7_PERFCTR0,
|
||||
.evntsel = MSR_K7_EVNTSEL0,
|
||||
.checkbit = 1ULL << 47,
|
||||
};
|
||||
|
||||
/* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */
|
||||
|
||||
/*
|
||||
* Intel Model 6 (PPro+,P2,P3,P-M,Core1)
|
||||
*/
|
||||
#define P6_EVNTSEL0_ENABLE (1 << 22)
|
||||
#define P6_EVNTSEL_INT (1 << 20)
|
||||
#define P6_EVNTSEL_OS (1 << 17)
|
||||
|
@ -372,52 +385,58 @@ static int setup_p6_watchdog(unsigned nmi_hz)
|
|||
|
||||
wd->perfctr_msr = perfctr_msr;
|
||||
wd->evntsel_msr = evntsel_msr;
|
||||
wd->cccr_msr = 0; //unused
|
||||
wd->cccr_msr = 0; /* unused */
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
|
||||
{
|
||||
/* P6 based Pentium M need to re-unmask
|
||||
/*
|
||||
* P6 based Pentium M need to re-unmask
|
||||
* the apic vector but it doesn't hurt
|
||||
* other P6 variant.
|
||||
* ArchPerfom/Core Duo also needs this */
|
||||
* ArchPerfom/Core Duo also needs this
|
||||
*/
|
||||
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
||||
|
||||
/* P6/ARCH_PERFMON has 32 bit counter write */
|
||||
write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz);
|
||||
}
|
||||
|
||||
static const struct wd_ops p6_wd_ops = {
|
||||
.reserve = single_msr_reserve,
|
||||
.unreserve = single_msr_unreserve,
|
||||
.setup = setup_p6_watchdog,
|
||||
.rearm = p6_rearm,
|
||||
.stop = single_msr_stop_watchdog,
|
||||
.perfctr = MSR_P6_PERFCTR0,
|
||||
.evntsel = MSR_P6_EVNTSEL0,
|
||||
.checkbit = 1ULL<<39,
|
||||
.reserve = single_msr_reserve,
|
||||
.unreserve = single_msr_unreserve,
|
||||
.setup = setup_p6_watchdog,
|
||||
.rearm = p6_rearm,
|
||||
.stop = single_msr_stop_watchdog,
|
||||
.perfctr = MSR_P6_PERFCTR0,
|
||||
.evntsel = MSR_P6_EVNTSEL0,
|
||||
.checkbit = 1ULL << 39,
|
||||
};
|
||||
|
||||
/* Intel P4 performance counters. By far the most complicated of all. */
|
||||
|
||||
#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
|
||||
#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
|
||||
#define P4_ESCR_OS (1<<3)
|
||||
#define P4_ESCR_USR (1<<2)
|
||||
#define P4_CCCR_OVF_PMI0 (1<<26)
|
||||
#define P4_CCCR_OVF_PMI1 (1<<27)
|
||||
#define P4_CCCR_THRESHOLD(N) ((N)<<20)
|
||||
#define P4_CCCR_COMPLEMENT (1<<19)
|
||||
#define P4_CCCR_COMPARE (1<<18)
|
||||
#define P4_CCCR_REQUIRED (3<<16)
|
||||
#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
|
||||
#define P4_CCCR_ENABLE (1<<12)
|
||||
#define P4_CCCR_OVF (1<<31)
|
||||
|
||||
/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
|
||||
CRU_ESCR0 (with any non-null event selector) through a complemented
|
||||
max threshold. [IA32-Vol3, Section 14.9.9] */
|
||||
/*
|
||||
* Intel P4 performance counters.
|
||||
* By far the most complicated of all.
|
||||
*/
|
||||
#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7)
|
||||
#define P4_ESCR_EVENT_SELECT(N) ((N) << 25)
|
||||
#define P4_ESCR_OS (1 << 3)
|
||||
#define P4_ESCR_USR (1 << 2)
|
||||
#define P4_CCCR_OVF_PMI0 (1 << 26)
|
||||
#define P4_CCCR_OVF_PMI1 (1 << 27)
|
||||
#define P4_CCCR_THRESHOLD(N) ((N) << 20)
|
||||
#define P4_CCCR_COMPLEMENT (1 << 19)
|
||||
#define P4_CCCR_COMPARE (1 << 18)
|
||||
#define P4_CCCR_REQUIRED (3 << 16)
|
||||
#define P4_CCCR_ESCR_SELECT(N) ((N) << 13)
|
||||
#define P4_CCCR_ENABLE (1 << 12)
|
||||
#define P4_CCCR_OVF (1 << 31)
|
||||
|
||||
/*
|
||||
* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
|
||||
* CRU_ESCR0 (with any non-null event selector) through a complemented
|
||||
* max threshold. [IA32-Vol3, Section 14.9.9]
|
||||
*/
|
||||
static int setup_p4_watchdog(unsigned nmi_hz)
|
||||
{
|
||||
unsigned int perfctr_msr, evntsel_msr, cccr_msr;
|
||||
|
@ -442,7 +461,8 @@ static int setup_p4_watchdog(unsigned nmi_hz)
|
|||
#endif
|
||||
ht_num = 0;
|
||||
|
||||
/* performance counters are shared resources
|
||||
/*
|
||||
* performance counters are shared resources
|
||||
* assign each hyperthread its own set
|
||||
* (re-use the ESCR0 register, seems safe
|
||||
* and keeps the cccr_val the same)
|
||||
|
@ -540,20 +560,21 @@ static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
|
|||
}
|
||||
|
||||
static const struct wd_ops p4_wd_ops = {
|
||||
.reserve = p4_reserve,
|
||||
.unreserve = p4_unreserve,
|
||||
.setup = setup_p4_watchdog,
|
||||
.rearm = p4_rearm,
|
||||
.stop = stop_p4_watchdog,
|
||||
.reserve = p4_reserve,
|
||||
.unreserve = p4_unreserve,
|
||||
.setup = setup_p4_watchdog,
|
||||
.rearm = p4_rearm,
|
||||
.stop = stop_p4_watchdog,
|
||||
/* RED-PEN this is wrong for the other sibling */
|
||||
.perfctr = MSR_P4_BPU_PERFCTR0,
|
||||
.evntsel = MSR_P4_BSU_ESCR0,
|
||||
.checkbit = 1ULL<<39,
|
||||
.perfctr = MSR_P4_BPU_PERFCTR0,
|
||||
.evntsel = MSR_P4_BSU_ESCR0,
|
||||
.checkbit = 1ULL << 39,
|
||||
};
|
||||
|
||||
/* Watchdog using the Intel architected PerfMon. Used for Core2 and hopefully
|
||||
all future Intel CPUs. */
|
||||
|
||||
/*
|
||||
* Watchdog using the Intel architected PerfMon.
|
||||
* Used for Core2 and hopefully all future Intel CPUs.
|
||||
*/
|
||||
#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
|
||||
#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
|
||||
|
||||
|
@ -599,19 +620,19 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz)
|
|||
|
||||
wd->perfctr_msr = perfctr_msr;
|
||||
wd->evntsel_msr = evntsel_msr;
|
||||
wd->cccr_msr = 0; //unused
|
||||
wd->cccr_msr = 0; /* unused */
|
||||
intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static struct wd_ops intel_arch_wd_ops __read_mostly = {
|
||||
.reserve = single_msr_reserve,
|
||||
.unreserve = single_msr_unreserve,
|
||||
.setup = setup_intel_arch_watchdog,
|
||||
.rearm = p6_rearm,
|
||||
.stop = single_msr_stop_watchdog,
|
||||
.perfctr = MSR_ARCH_PERFMON_PERFCTR1,
|
||||
.evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
|
||||
.reserve = single_msr_reserve,
|
||||
.unreserve = single_msr_unreserve,
|
||||
.setup = setup_intel_arch_watchdog,
|
||||
.rearm = p6_rearm,
|
||||
.stop = single_msr_stop_watchdog,
|
||||
.perfctr = MSR_ARCH_PERFMON_PERFCTR1,
|
||||
.evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
|
||||
};
|
||||
|
||||
static void probe_nmi_watchdog(void)
|
||||
|
@ -624,8 +645,10 @@ static void probe_nmi_watchdog(void)
|
|||
wd_ops = &k7_wd_ops;
|
||||
break;
|
||||
case X86_VENDOR_INTEL:
|
||||
/* Work around Core Duo (Yonah) errata AE49 where perfctr1
|
||||
doesn't have a working enable bit. */
|
||||
/*
|
||||
* Work around Core Duo (Yonah) errata AE49 where perfctr1
|
||||
* doesn't have a working enable bit.
|
||||
*/
|
||||
if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) {
|
||||
intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
|
||||
intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
|
||||
|
@ -636,7 +659,7 @@ static void probe_nmi_watchdog(void)
|
|||
}
|
||||
switch (boot_cpu_data.x86) {
|
||||
case 6:
|
||||
if (boot_cpu_data.x86_model > 0xd)
|
||||
if (boot_cpu_data.x86_model > 13)
|
||||
return;
|
||||
|
||||
wd_ops = &p6_wd_ops;
|
||||
|
@ -697,10 +720,11 @@ int lapic_wd_event(unsigned nmi_hz)
|
|||
{
|
||||
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
|
||||
u64 ctr;
|
||||
|
||||
rdmsrl(wd->perfctr_msr, ctr);
|
||||
if (ctr & wd_ops->checkbit) { /* perfctr still running? */
|
||||
if (ctr & wd_ops->checkbit) /* perfctr still running? */
|
||||
return 0;
|
||||
}
|
||||
|
||||
wd_ops->rearm(wd, nmi_hz);
|
||||
return 1;
|
||||
}
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,775 +0,0 @@
|
|||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/ioport.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/kexec.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/pfn.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/suspend.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/e820.h>
|
||||
#include <asm/setup.h>
|
||||
|
||||
struct e820map e820;
|
||||
struct change_member {
|
||||
struct e820entry *pbios; /* pointer to original bios entry */
|
||||
unsigned long long addr; /* address for this change point */
|
||||
};
|
||||
static struct change_member change_point_list[2*E820MAX] __initdata;
|
||||
static struct change_member *change_point[2*E820MAX] __initdata;
|
||||
static struct e820entry *overlap_list[E820MAX] __initdata;
|
||||
static struct e820entry new_bios[E820MAX] __initdata;
|
||||
/* For PCI or other memory-mapped resources */
|
||||
unsigned long pci_mem_start = 0x10000000;
|
||||
#ifdef CONFIG_PCI
|
||||
EXPORT_SYMBOL(pci_mem_start);
|
||||
#endif
|
||||
extern int user_defined_memmap;
|
||||
|
||||
static struct resource system_rom_resource = {
|
||||
.name = "System ROM",
|
||||
.start = 0xf0000,
|
||||
.end = 0xfffff,
|
||||
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
||||
};
|
||||
|
||||
static struct resource extension_rom_resource = {
|
||||
.name = "Extension ROM",
|
||||
.start = 0xe0000,
|
||||
.end = 0xeffff,
|
||||
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
||||
};
|
||||
|
||||
static struct resource adapter_rom_resources[] = { {
|
||||
.name = "Adapter ROM",
|
||||
.start = 0xc8000,
|
||||
.end = 0,
|
||||
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
||||
}, {
|
||||
.name = "Adapter ROM",
|
||||
.start = 0,
|
||||
.end = 0,
|
||||
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
||||
}, {
|
||||
.name = "Adapter ROM",
|
||||
.start = 0,
|
||||
.end = 0,
|
||||
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
||||
}, {
|
||||
.name = "Adapter ROM",
|
||||
.start = 0,
|
||||
.end = 0,
|
||||
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
||||
}, {
|
||||
.name = "Adapter ROM",
|
||||
.start = 0,
|
||||
.end = 0,
|
||||
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
||||
}, {
|
||||
.name = "Adapter ROM",
|
||||
.start = 0,
|
||||
.end = 0,
|
||||
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
||||
} };
|
||||
|
||||
static struct resource video_rom_resource = {
|
||||
.name = "Video ROM",
|
||||
.start = 0xc0000,
|
||||
.end = 0xc7fff,
|
||||
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
||||
};
|
||||
|
||||
#define ROMSIGNATURE 0xaa55
|
||||
|
||||
static int __init romsignature(const unsigned char *rom)
|
||||
{
|
||||
const unsigned short * const ptr = (const unsigned short *)rom;
|
||||
unsigned short sig;
|
||||
|
||||
return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE;
|
||||
}
|
||||
|
||||
static int __init romchecksum(const unsigned char *rom, unsigned long length)
|
||||
{
|
||||
unsigned char sum, c;
|
||||
|
||||
for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--)
|
||||
sum += c;
|
||||
return !length && !sum;
|
||||
}
|
||||
|
||||
static void __init probe_roms(void)
|
||||
{
|
||||
const unsigned char *rom;
|
||||
unsigned long start, length, upper;
|
||||
unsigned char c;
|
||||
int i;
|
||||
|
||||
/* video rom */
|
||||
upper = adapter_rom_resources[0].start;
|
||||
for (start = video_rom_resource.start; start < upper; start += 2048) {
|
||||
rom = isa_bus_to_virt(start);
|
||||
if (!romsignature(rom))
|
||||
continue;
|
||||
|
||||
video_rom_resource.start = start;
|
||||
|
||||
if (probe_kernel_address(rom + 2, c) != 0)
|
||||
continue;
|
||||
|
||||
/* 0 < length <= 0x7f * 512, historically */
|
||||
length = c * 512;
|
||||
|
||||
/* if checksum okay, trust length byte */
|
||||
if (length && romchecksum(rom, length))
|
||||
video_rom_resource.end = start + length - 1;
|
||||
|
||||
request_resource(&iomem_resource, &video_rom_resource);
|
||||
break;
|
||||
}
|
||||
|
||||
start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
|
||||
if (start < upper)
|
||||
start = upper;
|
||||
|
||||
/* system rom */
|
||||
request_resource(&iomem_resource, &system_rom_resource);
|
||||
upper = system_rom_resource.start;
|
||||
|
||||
/* check for extension rom (ignore length byte!) */
|
||||
rom = isa_bus_to_virt(extension_rom_resource.start);
|
||||
if (romsignature(rom)) {
|
||||
length = extension_rom_resource.end - extension_rom_resource.start + 1;
|
||||
if (romchecksum(rom, length)) {
|
||||
request_resource(&iomem_resource, &extension_rom_resource);
|
||||
upper = extension_rom_resource.start;
|
||||
}
|
||||
}
|
||||
|
||||
/* check for adapter roms on 2k boundaries */
|
||||
for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
|
||||
rom = isa_bus_to_virt(start);
|
||||
if (!romsignature(rom))
|
||||
continue;
|
||||
|
||||
if (probe_kernel_address(rom + 2, c) != 0)
|
||||
continue;
|
||||
|
||||
/* 0 < length <= 0x7f * 512, historically */
|
||||
length = c * 512;
|
||||
|
||||
/* but accept any length that fits if checksum okay */
|
||||
if (!length || start + length > upper || !romchecksum(rom, length))
|
||||
continue;
|
||||
|
||||
adapter_rom_resources[i].start = start;
|
||||
adapter_rom_resources[i].end = start + length - 1;
|
||||
request_resource(&iomem_resource, &adapter_rom_resources[i]);
|
||||
|
||||
start = adapter_rom_resources[i++].end & ~2047UL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Request address space for all standard RAM and ROM resources
|
||||
* and also for regions reported as reserved by the e820.
|
||||
*/
|
||||
void __init init_iomem_resources(struct resource *code_resource,
|
||||
struct resource *data_resource,
|
||||
struct resource *bss_resource)
|
||||
{
|
||||
int i;
|
||||
|
||||
probe_roms();
|
||||
for (i = 0; i < e820.nr_map; i++) {
|
||||
struct resource *res;
|
||||
#ifndef CONFIG_RESOURCES_64BIT
|
||||
if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
|
||||
continue;
|
||||
#endif
|
||||
res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
|
||||
switch (e820.map[i].type) {
|
||||
case E820_RAM: res->name = "System RAM"; break;
|
||||
case E820_ACPI: res->name = "ACPI Tables"; break;
|
||||
case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
|
||||
default: res->name = "reserved";
|
||||
}
|
||||
res->start = e820.map[i].addr;
|
||||
res->end = res->start + e820.map[i].size - 1;
|
||||
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
|
||||
if (request_resource(&iomem_resource, res)) {
|
||||
kfree(res);
|
||||
continue;
|
||||
}
|
||||
if (e820.map[i].type == E820_RAM) {
|
||||
/*
|
||||
* We don't know which RAM region contains kernel data,
|
||||
* so we try it repeatedly and let the resource manager
|
||||
* test it.
|
||||
*/
|
||||
request_resource(res, code_resource);
|
||||
request_resource(res, data_resource);
|
||||
request_resource(res, bss_resource);
|
||||
#ifdef CONFIG_KEXEC
|
||||
if (crashk_res.start != crashk_res.end)
|
||||
request_resource(res, &crashk_res);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION)
|
||||
/**
|
||||
* e820_mark_nosave_regions - Find the ranges of physical addresses that do not
|
||||
* correspond to e820 RAM areas and mark the corresponding pages as nosave for
|
||||
* hibernation.
|
||||
*
|
||||
* This function requires the e820 map to be sorted and without any
|
||||
* overlapping entries and assumes the first e820 area to be RAM.
|
||||
*/
|
||||
void __init e820_mark_nosave_regions(void)
|
||||
{
|
||||
int i;
|
||||
unsigned long pfn;
|
||||
|
||||
pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
|
||||
for (i = 1; i < e820.nr_map; i++) {
|
||||
struct e820entry *ei = &e820.map[i];
|
||||
|
||||
if (pfn < PFN_UP(ei->addr))
|
||||
register_nosave_region(pfn, PFN_UP(ei->addr));
|
||||
|
||||
pfn = PFN_DOWN(ei->addr + ei->size);
|
||||
if (ei->type != E820_RAM)
|
||||
register_nosave_region(PFN_UP(ei->addr), pfn);
|
||||
|
||||
if (pfn >= max_low_pfn)
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void __init add_memory_region(unsigned long long start,
|
||||
unsigned long long size, int type)
|
||||
{
|
||||
int x;
|
||||
|
||||
x = e820.nr_map;
|
||||
|
||||
if (x == E820MAX) {
|
||||
printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
e820.map[x].addr = start;
|
||||
e820.map[x].size = size;
|
||||
e820.map[x].type = type;
|
||||
e820.nr_map++;
|
||||
} /* add_memory_region */
|
||||
|
||||
/*
|
||||
* Sanitize the BIOS e820 map.
|
||||
*
|
||||
* Some e820 responses include overlapping entries. The following
|
||||
* replaces the original e820 map with a new one, removing overlaps.
|
||||
*
|
||||
*/
|
||||
int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
|
||||
{
|
||||
struct change_member *change_tmp;
|
||||
unsigned long current_type, last_type;
|
||||
unsigned long long last_addr;
|
||||
int chgidx, still_changing;
|
||||
int overlap_entries;
|
||||
int new_bios_entry;
|
||||
int old_nr, new_nr, chg_nr;
|
||||
int i;
|
||||
|
||||
/*
|
||||
Visually we're performing the following (1,2,3,4 = memory types)...
|
||||
|
||||
Sample memory map (w/overlaps):
|
||||
____22__________________
|
||||
______________________4_
|
||||
____1111________________
|
||||
_44_____________________
|
||||
11111111________________
|
||||
____________________33__
|
||||
___________44___________
|
||||
__________33333_________
|
||||
______________22________
|
||||
___________________2222_
|
||||
_________111111111______
|
||||
_____________________11_
|
||||
_________________4______
|
||||
|
||||
Sanitized equivalent (no overlap):
|
||||
1_______________________
|
||||
_44_____________________
|
||||
___1____________________
|
||||
____22__________________
|
||||
______11________________
|
||||
_________1______________
|
||||
__________3_____________
|
||||
___________44___________
|
||||
_____________33_________
|
||||
_______________2________
|
||||
________________1_______
|
||||
_________________4______
|
||||
___________________2____
|
||||
____________________33__
|
||||
______________________4_
|
||||
*/
|
||||
/* if there's only one memory region, don't bother */
|
||||
if (*pnr_map < 2) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
old_nr = *pnr_map;
|
||||
|
||||
/* bail out if we find any unreasonable addresses in bios map */
|
||||
for (i=0; i<old_nr; i++)
|
||||
if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* create pointers for initial change-point information (for sorting) */
|
||||
for (i=0; i < 2*old_nr; i++)
|
||||
change_point[i] = &change_point_list[i];
|
||||
|
||||
/* record all known change-points (starting and ending addresses),
|
||||
omitting those that are for empty memory regions */
|
||||
chgidx = 0;
|
||||
for (i=0; i < old_nr; i++) {
|
||||
if (biosmap[i].size != 0) {
|
||||
change_point[chgidx]->addr = biosmap[i].addr;
|
||||
change_point[chgidx++]->pbios = &biosmap[i];
|
||||
change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
|
||||
change_point[chgidx++]->pbios = &biosmap[i];
|
||||
}
|
||||
}
|
||||
chg_nr = chgidx; /* true number of change-points */
|
||||
|
||||
/* sort change-point list by memory addresses (low -> high) */
|
||||
still_changing = 1;
|
||||
while (still_changing) {
|
||||
still_changing = 0;
|
||||
for (i=1; i < chg_nr; i++) {
|
||||
/* if <current_addr> > <last_addr>, swap */
|
||||
/* or, if current=<start_addr> & last=<end_addr>, swap */
|
||||
if ((change_point[i]->addr < change_point[i-1]->addr) ||
|
||||
((change_point[i]->addr == change_point[i-1]->addr) &&
|
||||
(change_point[i]->addr == change_point[i]->pbios->addr) &&
|
||||
(change_point[i-1]->addr != change_point[i-1]->pbios->addr))
|
||||
)
|
||||
{
|
||||
change_tmp = change_point[i];
|
||||
change_point[i] = change_point[i-1];
|
||||
change_point[i-1] = change_tmp;
|
||||
still_changing=1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* create a new bios memory map, removing overlaps */
|
||||
overlap_entries=0; /* number of entries in the overlap table */
|
||||
new_bios_entry=0; /* index for creating new bios map entries */
|
||||
last_type = 0; /* start with undefined memory type */
|
||||
last_addr = 0; /* start with 0 as last starting address */
|
||||
/* loop through change-points, determining affect on the new bios map */
|
||||
for (chgidx=0; chgidx < chg_nr; chgidx++)
|
||||
{
|
||||
/* keep track of all overlapping bios entries */
|
||||
if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
|
||||
{
|
||||
/* add map entry to overlap list (> 1 entry implies an overlap) */
|
||||
overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* remove entry from list (order independent, so swap with last) */
|
||||
for (i=0; i<overlap_entries; i++)
|
||||
{
|
||||
if (overlap_list[i] == change_point[chgidx]->pbios)
|
||||
overlap_list[i] = overlap_list[overlap_entries-1];
|
||||
}
|
||||
overlap_entries--;
|
||||
}
|
||||
/* if there are overlapping entries, decide which "type" to use */
|
||||
/* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
|
||||
current_type = 0;
|
||||
for (i=0; i<overlap_entries; i++)
|
||||
if (overlap_list[i]->type > current_type)
|
||||
current_type = overlap_list[i]->type;
|
||||
/* continue building up new bios map based on this information */
|
||||
if (current_type != last_type) {
|
||||
if (last_type != 0) {
|
||||
new_bios[new_bios_entry].size =
|
||||
change_point[chgidx]->addr - last_addr;
|
||||
/* move forward only if the new size was non-zero */
|
||||
if (new_bios[new_bios_entry].size != 0)
|
||||
if (++new_bios_entry >= E820MAX)
|
||||
break; /* no more space left for new bios entries */
|
||||
}
|
||||
if (current_type != 0) {
|
||||
new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
|
||||
new_bios[new_bios_entry].type = current_type;
|
||||
last_addr=change_point[chgidx]->addr;
|
||||
}
|
||||
last_type = current_type;
|
||||
}
|
||||
}
|
||||
new_nr = new_bios_entry; /* retain count for new bios entries */
|
||||
|
||||
/* copy new bios mapping into original location */
|
||||
memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
|
||||
*pnr_map = new_nr;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy the BIOS e820 map into a safe place.
|
||||
*
|
||||
* Sanity-check it while we're at it..
|
||||
*
|
||||
* If we're lucky and live on a modern system, the setup code
|
||||
* will have given us a memory map that we can use to properly
|
||||
* set up memory. If we aren't, we'll fake a memory map.
|
||||
*
|
||||
* We check to see that the memory map contains at least 2 elements
|
||||
* before we'll use it, because the detection code in setup.S may
|
||||
* not be perfect and most every PC known to man has two memory
|
||||
* regions: one from 0 to 640k, and one from 1mb up. (The IBM
|
||||
* thinkpad 560x, for example, does not cooperate with the memory
|
||||
* detection code.)
|
||||
*/
|
||||
int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
|
||||
{
|
||||
/* Only one memory region (or negative)? Ignore it */
|
||||
if (nr_map < 2)
|
||||
return -1;
|
||||
|
||||
do {
|
||||
u64 start = biosmap->addr;
|
||||
u64 size = biosmap->size;
|
||||
u64 end = start + size;
|
||||
u32 type = biosmap->type;
|
||||
|
||||
/* Overflow in 64 bits? Ignore the memory map. */
|
||||
if (start > end)
|
||||
return -1;
|
||||
|
||||
add_memory_region(start, size, type);
|
||||
} while (biosmap++, --nr_map);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the highest page frame number we have available
|
||||
*/
|
||||
void __init propagate_e820_map(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
max_pfn = 0;
|
||||
|
||||
for (i = 0; i < e820.nr_map; i++) {
|
||||
unsigned long start, end;
|
||||
/* RAM? */
|
||||
if (e820.map[i].type != E820_RAM)
|
||||
continue;
|
||||
start = PFN_UP(e820.map[i].addr);
|
||||
end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
|
||||
if (start >= end)
|
||||
continue;
|
||||
if (end > max_pfn)
|
||||
max_pfn = end;
|
||||
memory_present(0, start, end);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Register fully available low RAM pages with the bootmem allocator.
|
||||
*/
|
||||
void __init register_bootmem_low_pages(unsigned long max_low_pfn)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < e820.nr_map; i++) {
|
||||
unsigned long curr_pfn, last_pfn, size;
|
||||
/*
|
||||
* Reserve usable low memory
|
||||
*/
|
||||
if (e820.map[i].type != E820_RAM)
|
||||
continue;
|
||||
/*
|
||||
* We are rounding up the start address of usable memory:
|
||||
*/
|
||||
curr_pfn = PFN_UP(e820.map[i].addr);
|
||||
if (curr_pfn >= max_low_pfn)
|
||||
continue;
|
||||
/*
|
||||
* ... and at the end of the usable range downwards:
|
||||
*/
|
||||
last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
|
||||
|
||||
if (last_pfn > max_low_pfn)
|
||||
last_pfn = max_low_pfn;
|
||||
|
||||
/*
|
||||
* .. finally, did all the rounding and playing
|
||||
* around just make the area go away?
|
||||
*/
|
||||
if (last_pfn <= curr_pfn)
|
||||
continue;
|
||||
|
||||
size = last_pfn - curr_pfn;
|
||||
free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
|
||||
}
|
||||
}
|
||||
|
||||
void __init e820_register_memory(void)
|
||||
{
|
||||
unsigned long gapstart, gapsize, round;
|
||||
unsigned long long last;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Search for the biggest gap in the low 32 bits of the e820
|
||||
* memory space.
|
||||
*/
|
||||
last = 0x100000000ull;
|
||||
gapstart = 0x10000000;
|
||||
gapsize = 0x400000;
|
||||
i = e820.nr_map;
|
||||
while (--i >= 0) {
|
||||
unsigned long long start = e820.map[i].addr;
|
||||
unsigned long long end = start + e820.map[i].size;
|
||||
|
||||
/*
|
||||
* Since "last" is at most 4GB, we know we'll
|
||||
* fit in 32 bits if this condition is true
|
||||
*/
|
||||
if (last > end) {
|
||||
unsigned long gap = last - end;
|
||||
|
||||
if (gap > gapsize) {
|
||||
gapsize = gap;
|
||||
gapstart = end;
|
||||
}
|
||||
}
|
||||
if (start < last)
|
||||
last = start;
|
||||
}
|
||||
|
||||
/*
|
||||
* See how much we want to round up: start off with
|
||||
* rounding to the next 1MB area.
|
||||
*/
|
||||
round = 0x100000;
|
||||
while ((gapsize >> 4) > round)
|
||||
round += round;
|
||||
/* Fun with two's complement */
|
||||
pci_mem_start = (gapstart + round) & -round;
|
||||
|
||||
printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
|
||||
pci_mem_start, gapstart, gapsize);
|
||||
}
|
||||
|
||||
void __init print_memory_map(char *who)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < e820.nr_map; i++) {
|
||||
printk(" %s: %016Lx - %016Lx ", who,
|
||||
e820.map[i].addr,
|
||||
e820.map[i].addr + e820.map[i].size);
|
||||
switch (e820.map[i].type) {
|
||||
case E820_RAM: printk("(usable)\n");
|
||||
break;
|
||||
case E820_RESERVED:
|
||||
printk("(reserved)\n");
|
||||
break;
|
||||
case E820_ACPI:
|
||||
printk("(ACPI data)\n");
|
||||
break;
|
||||
case E820_NVS:
|
||||
printk("(ACPI NVS)\n");
|
||||
break;
|
||||
default: printk("type %u\n", e820.map[i].type);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void __init limit_regions(unsigned long long size)
|
||||
{
|
||||
unsigned long long current_addr;
|
||||
int i;
|
||||
|
||||
print_memory_map("limit_regions start");
|
||||
for (i = 0; i < e820.nr_map; i++) {
|
||||
current_addr = e820.map[i].addr + e820.map[i].size;
|
||||
if (current_addr < size)
|
||||
continue;
|
||||
|
||||
if (e820.map[i].type != E820_RAM)
|
||||
continue;
|
||||
|
||||
if (e820.map[i].addr >= size) {
|
||||
/*
|
||||
* This region starts past the end of the
|
||||
* requested size, skip it completely.
|
||||
*/
|
||||
e820.nr_map = i;
|
||||
} else {
|
||||
e820.nr_map = i + 1;
|
||||
e820.map[i].size -= current_addr - size;
|
||||
}
|
||||
print_memory_map("limit_regions endfor");
|
||||
return;
|
||||
}
|
||||
print_memory_map("limit_regions endfunc");
|
||||
}
|
||||
|
||||
/*
|
||||
* This function checks if any part of the range <start,end> is mapped
|
||||
* with type.
|
||||
*/
|
||||
int
|
||||
e820_any_mapped(u64 start, u64 end, unsigned type)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < e820.nr_map; i++) {
|
||||
const struct e820entry *ei = &e820.map[i];
|
||||
if (type && ei->type != type)
|
||||
continue;
|
||||
if (ei->addr >= end || ei->addr + ei->size <= start)
|
||||
continue;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(e820_any_mapped);
|
||||
|
||||
/*
|
||||
* This function checks if the entire range <start,end> is mapped with type.
|
||||
*
|
||||
* Note: this function only works correct if the e820 table is sorted and
|
||||
* not-overlapping, which is the case
|
||||
*/
|
||||
int __init
|
||||
e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
|
||||
{
|
||||
u64 start = s;
|
||||
u64 end = e;
|
||||
int i;
|
||||
for (i = 0; i < e820.nr_map; i++) {
|
||||
struct e820entry *ei = &e820.map[i];
|
||||
if (type && ei->type != type)
|
||||
continue;
|
||||
/* is the region (part) in overlap with the current region ?*/
|
||||
if (ei->addr >= end || ei->addr + ei->size <= start)
|
||||
continue;
|
||||
/* if the region is at the beginning of <start,end> we move
|
||||
* start to the end of the region since it's ok until there
|
||||
*/
|
||||
if (ei->addr <= start)
|
||||
start = ei->addr + ei->size;
|
||||
/* if start is now at or beyond end, we're done, full
|
||||
* coverage */
|
||||
if (start >= end)
|
||||
return 1; /* we're done */
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init parse_memmap(char *arg)
|
||||
{
|
||||
if (!arg)
|
||||
return -EINVAL;
|
||||
|
||||
if (strcmp(arg, "exactmap") == 0) {
|
||||
#ifdef CONFIG_CRASH_DUMP
|
||||
/* If we are doing a crash dump, we
|
||||
* still need to know the real mem
|
||||
* size before original memory map is
|
||||
* reset.
|
||||
*/
|
||||
propagate_e820_map();
|
||||
saved_max_pfn = max_pfn;
|
||||
#endif
|
||||
e820.nr_map = 0;
|
||||
user_defined_memmap = 1;
|
||||
} else {
|
||||
/* If the user specifies memory size, we
|
||||
* limit the BIOS-provided memory map to
|
||||
* that size. exactmap can be used to specify
|
||||
* the exact map. mem=number can be used to
|
||||
* trim the existing memory map.
|
||||
*/
|
||||
unsigned long long start_at, mem_size;
|
||||
|
||||
mem_size = memparse(arg, &arg);
|
||||
if (*arg == '@') {
|
||||
start_at = memparse(arg+1, &arg);
|
||||
add_memory_region(start_at, mem_size, E820_RAM);
|
||||
} else if (*arg == '#') {
|
||||
start_at = memparse(arg+1, &arg);
|
||||
add_memory_region(start_at, mem_size, E820_ACPI);
|
||||
} else if (*arg == '$') {
|
||||
start_at = memparse(arg+1, &arg);
|
||||
add_memory_region(start_at, mem_size, E820_RESERVED);
|
||||
} else {
|
||||
limit_regions(mem_size);
|
||||
user_defined_memmap = 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
early_param("memmap", parse_memmap);
|
||||
void __init update_memory_range(u64 start, u64 size, unsigned old_type,
|
||||
unsigned new_type)
|
||||
{
|
||||
int i;
|
||||
|
||||
BUG_ON(old_type == new_type);
|
||||
|
||||
for (i = 0; i < e820.nr_map; i++) {
|
||||
struct e820entry *ei = &e820.map[i];
|
||||
u64 final_start, final_end;
|
||||
if (ei->type != old_type)
|
||||
continue;
|
||||
/* totally covered? */
|
||||
if (ei->addr >= start && ei->size <= size) {
|
||||
ei->type = new_type;
|
||||
continue;
|
||||
}
|
||||
/* partially covered */
|
||||
final_start = max(start, ei->addr);
|
||||
final_end = min(start + size, ei->addr + ei->size);
|
||||
if (final_start >= final_end)
|
||||
continue;
|
||||
add_memory_region(final_start, final_end - final_start,
|
||||
new_type);
|
||||
}
|
||||
}
|
||||
void __init update_e820(void)
|
||||
{
|
||||
u8 nr_map;
|
||||
|
||||
nr_map = e820.nr_map;
|
||||
if (sanitize_e820_map(e820.map, &nr_map))
|
||||
return;
|
||||
e820.nr_map = nr_map;
|
||||
printk(KERN_INFO "modified physical RAM map:\n");
|
||||
print_memory_map("modified");
|
||||
}
|
|
@ -1,952 +0,0 @@
|
|||
/*
|
||||
* Handle the memory map.
|
||||
* The functions here do the job until bootmem takes over.
|
||||
*
|
||||
* Getting sanitize_e820_map() in sync with i386 version by applying change:
|
||||
* - Provisions for empty E820 memory regions (reported by certain BIOSes).
|
||||
* Alex Achenbach <xela@slit.de>, December 2002.
|
||||
* Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
|
||||
*
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/ioport.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/kexec.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/pfn.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/e820.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/setup.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/kdebug.h>
|
||||
#include <asm/trampoline.h>
|
||||
|
||||
struct e820map e820;
|
||||
|
||||
/*
|
||||
* PFN of last memory page.
|
||||
*/
|
||||
unsigned long end_pfn;
|
||||
|
||||
/*
|
||||
* end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
|
||||
* The direct mapping extends to max_pfn_mapped, so that we can directly access
|
||||
* apertures, ACPI and other tables without having to play with fixmaps.
|
||||
*/
|
||||
unsigned long max_pfn_mapped;
|
||||
|
||||
/*
|
||||
* Last pfn which the user wants to use.
|
||||
*/
|
||||
static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
|
||||
|
||||
/*
|
||||
* Early reserved memory areas.
|
||||
*/
|
||||
#define MAX_EARLY_RES 20
|
||||
|
||||
struct early_res {
|
||||
unsigned long start, end;
|
||||
char name[16];
|
||||
};
|
||||
static struct early_res early_res[MAX_EARLY_RES] __initdata = {
|
||||
{ 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
|
||||
#ifdef CONFIG_X86_TRAMPOLINE
|
||||
{ TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
|
||||
#endif
|
||||
{}
|
||||
};
|
||||
|
||||
void __init reserve_early(unsigned long start, unsigned long end, char *name)
|
||||
{
|
||||
int i;
|
||||
struct early_res *r;
|
||||
for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
|
||||
r = &early_res[i];
|
||||
if (end > r->start && start < r->end)
|
||||
panic("Overlapping early reservations %lx-%lx %s to %lx-%lx %s\n",
|
||||
start, end - 1, name?name:"", r->start, r->end - 1, r->name);
|
||||
}
|
||||
if (i >= MAX_EARLY_RES)
|
||||
panic("Too many early reservations");
|
||||
r = &early_res[i];
|
||||
r->start = start;
|
||||
r->end = end;
|
||||
if (name)
|
||||
strncpy(r->name, name, sizeof(r->name) - 1);
|
||||
}
|
||||
|
||||
void __init free_early(unsigned long start, unsigned long end)
|
||||
{
|
||||
struct early_res *r;
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
|
||||
r = &early_res[i];
|
||||
if (start == r->start && end == r->end)
|
||||
break;
|
||||
}
|
||||
if (i >= MAX_EARLY_RES || !early_res[i].end)
|
||||
panic("free_early on not reserved area: %lx-%lx!", start, end);
|
||||
|
||||
for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
|
||||
;
|
||||
|
||||
memmove(&early_res[i], &early_res[i + 1],
|
||||
(j - 1 - i) * sizeof(struct early_res));
|
||||
|
||||
early_res[j - 1].end = 0;
|
||||
}
|
||||
|
||||
void __init early_res_to_bootmem(unsigned long start, unsigned long end)
|
||||
{
|
||||
int i;
|
||||
unsigned long final_start, final_end;
|
||||
for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
|
||||
struct early_res *r = &early_res[i];
|
||||
final_start = max(start, r->start);
|
||||
final_end = min(end, r->end);
|
||||
if (final_start >= final_end)
|
||||
continue;
|
||||
printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i,
|
||||
final_start, final_end - 1, r->name);
|
||||
reserve_bootmem_generic(final_start, final_end - final_start);
|
||||
}
|
||||
}
|
||||
|
||||
/* Check for already reserved areas */
|
||||
static inline int __init
|
||||
bad_addr(unsigned long *addrp, unsigned long size, unsigned long align)
|
||||
{
|
||||
int i;
|
||||
unsigned long addr = *addrp, last;
|
||||
int changed = 0;
|
||||
again:
|
||||
last = addr + size;
|
||||
for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
|
||||
struct early_res *r = &early_res[i];
|
||||
if (last >= r->start && addr < r->end) {
|
||||
*addrp = addr = round_up(r->end, align);
|
||||
changed = 1;
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
return changed;
|
||||
}
|
||||
|
||||
/* Check for already reserved areas */
|
||||
static inline int __init
|
||||
bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align)
|
||||
{
|
||||
int i;
|
||||
unsigned long addr = *addrp, last;
|
||||
unsigned long size = *sizep;
|
||||
int changed = 0;
|
||||
again:
|
||||
last = addr + size;
|
||||
for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
|
||||
struct early_res *r = &early_res[i];
|
||||
if (last > r->start && addr < r->start) {
|
||||
size = r->start - addr;
|
||||
changed = 1;
|
||||
goto again;
|
||||
}
|
||||
if (last > r->end && addr < r->end) {
|
||||
addr = round_up(r->end, align);
|
||||
size = last - addr;
|
||||
changed = 1;
|
||||
goto again;
|
||||
}
|
||||
if (last <= r->end && addr >= r->start) {
|
||||
(*sizep)++;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (changed) {
|
||||
*addrp = addr;
|
||||
*sizep = size;
|
||||
}
|
||||
return changed;
|
||||
}
|
||||
/*
|
||||
* This function checks if any part of the range <start,end> is mapped
|
||||
* with type.
|
||||
*/
|
||||
int
|
||||
e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < e820.nr_map; i++) {
|
||||
struct e820entry *ei = &e820.map[i];
|
||||
|
||||
if (type && ei->type != type)
|
||||
continue;
|
||||
if (ei->addr >= end || ei->addr + ei->size <= start)
|
||||
continue;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(e820_any_mapped);
|
||||
|
||||
/*
|
||||
* This function checks if the entire range <start,end> is mapped with type.
|
||||
*
|
||||
* Note: this function only works correct if the e820 table is sorted and
|
||||
* not-overlapping, which is the case
|
||||
*/
|
||||
int __init e820_all_mapped(unsigned long start, unsigned long end,
|
||||
unsigned type)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < e820.nr_map; i++) {
|
||||
struct e820entry *ei = &e820.map[i];
|
||||
|
||||
if (type && ei->type != type)
|
||||
continue;
|
||||
/* is the region (part) in overlap with the current region ?*/
|
||||
if (ei->addr >= end || ei->addr + ei->size <= start)
|
||||
continue;
|
||||
|
||||
/* if the region is at the beginning of <start,end> we move
|
||||
* start to the end of the region since it's ok until there
|
||||
*/
|
||||
if (ei->addr <= start)
|
||||
start = ei->addr + ei->size;
|
||||
/*
|
||||
* if start is now at or beyond end, we're done, full
|
||||
* coverage
|
||||
*/
|
||||
if (start >= end)
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find a free area with specified alignment in a specific range.
|
||||
*/
|
||||
unsigned long __init find_e820_area(unsigned long start, unsigned long end,
|
||||
unsigned long size, unsigned long align)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < e820.nr_map; i++) {
|
||||
struct e820entry *ei = &e820.map[i];
|
||||
unsigned long addr, last;
|
||||
unsigned long ei_last;
|
||||
|
||||
if (ei->type != E820_RAM)
|
||||
continue;
|
||||
addr = round_up(ei->addr, align);
|
||||
ei_last = ei->addr + ei->size;
|
||||
if (addr < start)
|
||||
addr = round_up(start, align);
|
||||
if (addr >= ei_last)
|
||||
continue;
|
||||
while (bad_addr(&addr, size, align) && addr+size <= ei_last)
|
||||
;
|
||||
last = addr + size;
|
||||
if (last > ei_last)
|
||||
continue;
|
||||
if (last > end)
|
||||
continue;
|
||||
return addr;
|
||||
}
|
||||
return -1UL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find next free range after *start
|
||||
*/
|
||||
unsigned long __init find_e820_area_size(unsigned long start,
|
||||
unsigned long *sizep,
|
||||
unsigned long align)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < e820.nr_map; i++) {
|
||||
struct e820entry *ei = &e820.map[i];
|
||||
unsigned long addr, last;
|
||||
unsigned long ei_last;
|
||||
|
||||
if (ei->type != E820_RAM)
|
||||
continue;
|
||||
addr = round_up(ei->addr, align);
|
||||
ei_last = ei->addr + ei->size;
|
||||
if (addr < start)
|
||||
addr = round_up(start, align);
|
||||
if (addr >= ei_last)
|
||||
continue;
|
||||
*sizep = ei_last - addr;
|
||||
while (bad_addr_size(&addr, sizep, align) &&
|
||||
addr + *sizep <= ei_last)
|
||||
;
|
||||
last = addr + *sizep;
|
||||
if (last > ei_last)
|
||||
continue;
|
||||
return addr;
|
||||
}
|
||||
return -1UL;
|
||||
|
||||
}
|
||||
/*
|
||||
* Find the highest page frame number we have available
|
||||
*/
|
||||
unsigned long __init e820_end_of_ram(void)
|
||||
{
|
||||
unsigned long end_pfn;
|
||||
|
||||
end_pfn = find_max_pfn_with_active_regions();
|
||||
|
||||
if (end_pfn > max_pfn_mapped)
|
||||
max_pfn_mapped = end_pfn;
|
||||
if (max_pfn_mapped > MAXMEM>>PAGE_SHIFT)
|
||||
max_pfn_mapped = MAXMEM>>PAGE_SHIFT;
|
||||
if (end_pfn > end_user_pfn)
|
||||
end_pfn = end_user_pfn;
|
||||
if (end_pfn > max_pfn_mapped)
|
||||
end_pfn = max_pfn_mapped;
|
||||
|
||||
printk(KERN_INFO "max_pfn_mapped = %lu\n", max_pfn_mapped);
|
||||
return end_pfn;
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark e820 reserved areas as busy for the resource manager.
|
||||
*/
|
||||
void __init e820_reserve_resources(void)
|
||||
{
|
||||
int i;
|
||||
struct resource *res;
|
||||
|
||||
res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
|
||||
for (i = 0; i < e820.nr_map; i++) {
|
||||
switch (e820.map[i].type) {
|
||||
case E820_RAM: res->name = "System RAM"; break;
|
||||
case E820_ACPI: res->name = "ACPI Tables"; break;
|
||||
case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
|
||||
default: res->name = "reserved";
|
||||
}
|
||||
res->start = e820.map[i].addr;
|
||||
res->end = res->start + e820.map[i].size - 1;
|
||||
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
|
||||
insert_resource(&iomem_resource, res);
|
||||
res++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the ranges of physical addresses that do not correspond to
|
||||
* e820 RAM areas and mark the corresponding pages as nosave for software
|
||||
* suspend and suspend to RAM.
|
||||
*
|
||||
* This function requires the e820 map to be sorted and without any
|
||||
* overlapping entries and assumes the first e820 area to be RAM.
|
||||
*/
|
||||
void __init e820_mark_nosave_regions(void)
|
||||
{
|
||||
int i;
|
||||
unsigned long paddr;
|
||||
|
||||
paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE);
|
||||
for (i = 1; i < e820.nr_map; i++) {
|
||||
struct e820entry *ei = &e820.map[i];
|
||||
|
||||
if (paddr < ei->addr)
|
||||
register_nosave_region(PFN_DOWN(paddr),
|
||||
PFN_UP(ei->addr));
|
||||
|
||||
paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
|
||||
if (ei->type != E820_RAM)
|
||||
register_nosave_region(PFN_UP(ei->addr),
|
||||
PFN_DOWN(paddr));
|
||||
|
||||
if (paddr >= (end_pfn << PAGE_SHIFT))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Finds an active region in the address range from start_pfn to end_pfn and
|
||||
* returns its range in ei_startpfn and ei_endpfn for the e820 entry.
|
||||
*/
|
||||
static int __init e820_find_active_region(const struct e820entry *ei,
|
||||
unsigned long start_pfn,
|
||||
unsigned long end_pfn,
|
||||
unsigned long *ei_startpfn,
|
||||
unsigned long *ei_endpfn)
|
||||
{
|
||||
*ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
|
||||
*ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE) >> PAGE_SHIFT;
|
||||
|
||||
/* Skip map entries smaller than a page */
|
||||
if (*ei_startpfn >= *ei_endpfn)
|
||||
return 0;
|
||||
|
||||
/* Check if max_pfn_mapped should be updated */
|
||||
if (ei->type != E820_RAM && *ei_endpfn > max_pfn_mapped)
|
||||
max_pfn_mapped = *ei_endpfn;
|
||||
|
||||
/* Skip if map is outside the node */
|
||||
if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
|
||||
*ei_startpfn >= end_pfn)
|
||||
return 0;
|
||||
|
||||
/* Check for overlaps */
|
||||
if (*ei_startpfn < start_pfn)
|
||||
*ei_startpfn = start_pfn;
|
||||
if (*ei_endpfn > end_pfn)
|
||||
*ei_endpfn = end_pfn;
|
||||
|
||||
/* Obey end_user_pfn to save on memmap */
|
||||
if (*ei_startpfn >= end_user_pfn)
|
||||
return 0;
|
||||
if (*ei_endpfn > end_user_pfn)
|
||||
*ei_endpfn = end_user_pfn;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Walk the e820 map and register active regions within a node */
|
||||
void __init
|
||||
e820_register_active_regions(int nid, unsigned long start_pfn,
|
||||
unsigned long end_pfn)
|
||||
{
|
||||
unsigned long ei_startpfn;
|
||||
unsigned long ei_endpfn;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < e820.nr_map; i++)
|
||||
if (e820_find_active_region(&e820.map[i],
|
||||
start_pfn, end_pfn,
|
||||
&ei_startpfn, &ei_endpfn))
|
||||
add_active_range(nid, ei_startpfn, ei_endpfn);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a memory region to the kernel e820 map.
|
||||
*/
|
||||
void __init add_memory_region(unsigned long start, unsigned long size, int type)
|
||||
{
|
||||
int x = e820.nr_map;
|
||||
|
||||
if (x == E820MAX) {
|
||||
printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
e820.map[x].addr = start;
|
||||
e820.map[x].size = size;
|
||||
e820.map[x].type = type;
|
||||
e820.nr_map++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the hole size (in bytes) in the memory range.
|
||||
* @start: starting address of the memory range to scan
|
||||
* @end: ending address of the memory range to scan
|
||||
*/
|
||||
unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
|
||||
{
|
||||
unsigned long start_pfn = start >> PAGE_SHIFT;
|
||||
unsigned long end_pfn = end >> PAGE_SHIFT;
|
||||
unsigned long ei_startpfn, ei_endpfn, ram = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < e820.nr_map; i++) {
|
||||
if (e820_find_active_region(&e820.map[i],
|
||||
start_pfn, end_pfn,
|
||||
&ei_startpfn, &ei_endpfn))
|
||||
ram += ei_endpfn - ei_startpfn;
|
||||
}
|
||||
return end - start - (ram << PAGE_SHIFT);
|
||||
}
|
||||
|
||||
static void __init e820_print_map(char *who)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < e820.nr_map; i++) {
|
||||
printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
|
||||
(unsigned long long) e820.map[i].addr,
|
||||
(unsigned long long)
|
||||
(e820.map[i].addr + e820.map[i].size));
|
||||
switch (e820.map[i].type) {
|
||||
case E820_RAM:
|
||||
printk(KERN_CONT "(usable)\n");
|
||||
break;
|
||||
case E820_RESERVED:
|
||||
printk(KERN_CONT "(reserved)\n");
|
||||
break;
|
||||
case E820_ACPI:
|
||||
printk(KERN_CONT "(ACPI data)\n");
|
||||
break;
|
||||
case E820_NVS:
|
||||
printk(KERN_CONT "(ACPI NVS)\n");
|
||||
break;
|
||||
default:
|
||||
printk(KERN_CONT "type %u\n", e820.map[i].type);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Sanitize the BIOS e820 map.
|
||||
*
|
||||
* Some e820 responses include overlapping entries. The following
|
||||
* replaces the original e820 map with a new one, removing overlaps.
|
||||
*
|
||||
*/
|
||||
static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map)
|
||||
{
|
||||
struct change_member {
|
||||
struct e820entry *pbios; /* pointer to original bios entry */
|
||||
unsigned long long addr; /* address for this change point */
|
||||
};
|
||||
static struct change_member change_point_list[2*E820MAX] __initdata;
|
||||
static struct change_member *change_point[2*E820MAX] __initdata;
|
||||
static struct e820entry *overlap_list[E820MAX] __initdata;
|
||||
static struct e820entry new_bios[E820MAX] __initdata;
|
||||
struct change_member *change_tmp;
|
||||
unsigned long current_type, last_type;
|
||||
unsigned long long last_addr;
|
||||
int chgidx, still_changing;
|
||||
int overlap_entries;
|
||||
int new_bios_entry;
|
||||
int old_nr, new_nr, chg_nr;
|
||||
int i;
|
||||
|
||||
/*
|
||||
Visually we're performing the following
|
||||
(1,2,3,4 = memory types)...
|
||||
|
||||
Sample memory map (w/overlaps):
|
||||
____22__________________
|
||||
______________________4_
|
||||
____1111________________
|
||||
_44_____________________
|
||||
11111111________________
|
||||
____________________33__
|
||||
___________44___________
|
||||
__________33333_________
|
||||
______________22________
|
||||
___________________2222_
|
||||
_________111111111______
|
||||
_____________________11_
|
||||
_________________4______
|
||||
|
||||
Sanitized equivalent (no overlap):
|
||||
1_______________________
|
||||
_44_____________________
|
||||
___1____________________
|
||||
____22__________________
|
||||
______11________________
|
||||
_________1______________
|
||||
__________3_____________
|
||||
___________44___________
|
||||
_____________33_________
|
||||
_______________2________
|
||||
________________1_______
|
||||
_________________4______
|
||||
___________________2____
|
||||
____________________33__
|
||||
______________________4_
|
||||
*/
|
||||
|
||||
/* if there's only one memory region, don't bother */
|
||||
if (*pnr_map < 2)
|
||||
return -1;
|
||||
|
||||
old_nr = *pnr_map;
|
||||
|
||||
/* bail out if we find any unreasonable addresses in bios map */
|
||||
for (i = 0; i < old_nr; i++)
|
||||
if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
|
||||
return -1;
|
||||
|
||||
/* create pointers for initial change-point information (for sorting) */
|
||||
for (i = 0; i < 2 * old_nr; i++)
|
||||
change_point[i] = &change_point_list[i];
|
||||
|
||||
/* record all known change-points (starting and ending addresses),
|
||||
omitting those that are for empty memory regions */
|
||||
chgidx = 0;
|
||||
for (i = 0; i < old_nr; i++) {
|
||||
if (biosmap[i].size != 0) {
|
||||
change_point[chgidx]->addr = biosmap[i].addr;
|
||||
change_point[chgidx++]->pbios = &biosmap[i];
|
||||
change_point[chgidx]->addr = biosmap[i].addr +
|
||||
biosmap[i].size;
|
||||
change_point[chgidx++]->pbios = &biosmap[i];
|
||||
}
|
||||
}
|
||||
chg_nr = chgidx;
|
||||
|
||||
/* sort change-point list by memory addresses (low -> high) */
|
||||
still_changing = 1;
|
||||
while (still_changing) {
|
||||
still_changing = 0;
|
||||
for (i = 1; i < chg_nr; i++) {
|
||||
unsigned long long curaddr, lastaddr;
|
||||
unsigned long long curpbaddr, lastpbaddr;
|
||||
|
||||
curaddr = change_point[i]->addr;
|
||||
lastaddr = change_point[i - 1]->addr;
|
||||
curpbaddr = change_point[i]->pbios->addr;
|
||||
lastpbaddr = change_point[i - 1]->pbios->addr;
|
||||
|
||||
/*
|
||||
* swap entries, when:
|
||||
*
|
||||
* curaddr > lastaddr or
|
||||
* curaddr == lastaddr and curaddr == curpbaddr and
|
||||
* lastaddr != lastpbaddr
|
||||
*/
|
||||
if (curaddr < lastaddr ||
|
||||
(curaddr == lastaddr && curaddr == curpbaddr &&
|
||||
lastaddr != lastpbaddr)) {
|
||||
change_tmp = change_point[i];
|
||||
change_point[i] = change_point[i-1];
|
||||
change_point[i-1] = change_tmp;
|
||||
still_changing = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* create a new bios memory map, removing overlaps */
|
||||
overlap_entries = 0; /* number of entries in the overlap table */
|
||||
new_bios_entry = 0; /* index for creating new bios map entries */
|
||||
last_type = 0; /* start with undefined memory type */
|
||||
last_addr = 0; /* start with 0 as last starting address */
|
||||
|
||||
/* loop through change-points, determining affect on the new bios map */
|
||||
for (chgidx = 0; chgidx < chg_nr; chgidx++) {
|
||||
/* keep track of all overlapping bios entries */
|
||||
if (change_point[chgidx]->addr ==
|
||||
change_point[chgidx]->pbios->addr) {
|
||||
/*
|
||||
* add map entry to overlap list (> 1 entry
|
||||
* implies an overlap)
|
||||
*/
|
||||
overlap_list[overlap_entries++] =
|
||||
change_point[chgidx]->pbios;
|
||||
} else {
|
||||
/*
|
||||
* remove entry from list (order independent,
|
||||
* so swap with last)
|
||||
*/
|
||||
for (i = 0; i < overlap_entries; i++) {
|
||||
if (overlap_list[i] ==
|
||||
change_point[chgidx]->pbios)
|
||||
overlap_list[i] =
|
||||
overlap_list[overlap_entries-1];
|
||||
}
|
||||
overlap_entries--;
|
||||
}
|
||||
/*
|
||||
* if there are overlapping entries, decide which
|
||||
* "type" to use (larger value takes precedence --
|
||||
* 1=usable, 2,3,4,4+=unusable)
|
||||
*/
|
||||
current_type = 0;
|
||||
for (i = 0; i < overlap_entries; i++)
|
||||
if (overlap_list[i]->type > current_type)
|
||||
current_type = overlap_list[i]->type;
|
||||
/*
|
||||
* continue building up new bios map based on this
|
||||
* information
|
||||
*/
|
||||
if (current_type != last_type) {
|
||||
if (last_type != 0) {
|
||||
new_bios[new_bios_entry].size =
|
||||
change_point[chgidx]->addr - last_addr;
|
||||
/*
|
||||
* move forward only if the new size
|
||||
* was non-zero
|
||||
*/
|
||||
if (new_bios[new_bios_entry].size != 0)
|
||||
/*
|
||||
* no more space left for new
|
||||
* bios entries ?
|
||||
*/
|
||||
if (++new_bios_entry >= E820MAX)
|
||||
break;
|
||||
}
|
||||
if (current_type != 0) {
|
||||
new_bios[new_bios_entry].addr =
|
||||
change_point[chgidx]->addr;
|
||||
new_bios[new_bios_entry].type = current_type;
|
||||
last_addr = change_point[chgidx]->addr;
|
||||
}
|
||||
last_type = current_type;
|
||||
}
|
||||
}
|
||||
/* retain count for new bios entries */
|
||||
new_nr = new_bios_entry;
|
||||
|
||||
/* copy new bios mapping into original location */
|
||||
memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
|
||||
*pnr_map = new_nr;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy the BIOS e820 map into a safe place.
|
||||
*
|
||||
* Sanity-check it while we're at it..
|
||||
*
|
||||
* If we're lucky and live on a modern system, the setup code
|
||||
* will have given us a memory map that we can use to properly
|
||||
* set up memory. If we aren't, we'll fake a memory map.
|
||||
*/
|
||||
static int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
|
||||
{
|
||||
/* Only one memory region (or negative)? Ignore it */
|
||||
if (nr_map < 2)
|
||||
return -1;
|
||||
|
||||
do {
|
||||
u64 start = biosmap->addr;
|
||||
u64 size = biosmap->size;
|
||||
u64 end = start + size;
|
||||
u32 type = biosmap->type;
|
||||
|
||||
/* Overflow in 64 bits? Ignore the memory map. */
|
||||
if (start > end)
|
||||
return -1;
|
||||
|
||||
add_memory_region(start, size, type);
|
||||
} while (biosmap++, --nr_map);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void early_panic(char *msg)
|
||||
{
|
||||
early_printk(msg);
|
||||
panic(msg);
|
||||
}
|
||||
|
||||
/* We're not void only for x86 32-bit compat */
|
||||
char * __init machine_specific_memory_setup(void)
|
||||
{
|
||||
char *who = "BIOS-e820";
|
||||
/*
|
||||
* Try to copy the BIOS-supplied E820-map.
|
||||
*
|
||||
* Otherwise fake a memory map; one section from 0k->640k,
|
||||
* the next section from 1mb->appropriate_mem_k
|
||||
*/
|
||||
sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries);
|
||||
if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0)
|
||||
early_panic("Cannot find a valid memory map");
|
||||
printk(KERN_INFO "BIOS-provided physical RAM map:\n");
|
||||
e820_print_map(who);
|
||||
|
||||
/* In case someone cares... */
|
||||
return who;
|
||||
}
|
||||
|
||||
static int __init parse_memopt(char *p)
|
||||
{
|
||||
if (!p)
|
||||
return -EINVAL;
|
||||
end_user_pfn = memparse(p, &p);
|
||||
end_user_pfn >>= PAGE_SHIFT;
|
||||
return 0;
|
||||
}
|
||||
early_param("mem", parse_memopt);
|
||||
|
||||
static int userdef __initdata;
|
||||
|
||||
static int __init parse_memmap_opt(char *p)
|
||||
{
|
||||
char *oldp;
|
||||
unsigned long long start_at, mem_size;
|
||||
|
||||
if (!strcmp(p, "exactmap")) {
|
||||
#ifdef CONFIG_CRASH_DUMP
|
||||
/*
|
||||
* If we are doing a crash dump, we still need to know
|
||||
* the real mem size before original memory map is
|
||||
* reset.
|
||||
*/
|
||||
e820_register_active_regions(0, 0, -1UL);
|
||||
saved_max_pfn = e820_end_of_ram();
|
||||
remove_all_active_ranges();
|
||||
#endif
|
||||
max_pfn_mapped = 0;
|
||||
e820.nr_map = 0;
|
||||
userdef = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
oldp = p;
|
||||
mem_size = memparse(p, &p);
|
||||
if (p == oldp)
|
||||
return -EINVAL;
|
||||
|
||||
userdef = 1;
|
||||
if (*p == '@') {
|
||||
start_at = memparse(p+1, &p);
|
||||
add_memory_region(start_at, mem_size, E820_RAM);
|
||||
} else if (*p == '#') {
|
||||
start_at = memparse(p+1, &p);
|
||||
add_memory_region(start_at, mem_size, E820_ACPI);
|
||||
} else if (*p == '$') {
|
||||
start_at = memparse(p+1, &p);
|
||||
add_memory_region(start_at, mem_size, E820_RESERVED);
|
||||
} else {
|
||||
end_user_pfn = (mem_size >> PAGE_SHIFT);
|
||||
}
|
||||
return *p == '\0' ? 0 : -EINVAL;
|
||||
}
|
||||
early_param("memmap", parse_memmap_opt);
|
||||
|
||||
void __init finish_e820_parsing(void)
|
||||
{
|
||||
if (userdef) {
|
||||
char nr = e820.nr_map;
|
||||
|
||||
if (sanitize_e820_map(e820.map, &nr) < 0)
|
||||
early_panic("Invalid user supplied memory map");
|
||||
e820.nr_map = nr;
|
||||
|
||||
printk(KERN_INFO "user-defined physical RAM map:\n");
|
||||
e820_print_map("user");
|
||||
}
|
||||
}
|
||||
|
||||
void __init update_memory_range(u64 start, u64 size, unsigned old_type,
|
||||
unsigned new_type)
|
||||
{
|
||||
int i;
|
||||
|
||||
BUG_ON(old_type == new_type);
|
||||
|
||||
for (i = 0; i < e820.nr_map; i++) {
|
||||
struct e820entry *ei = &e820.map[i];
|
||||
u64 final_start, final_end;
|
||||
if (ei->type != old_type)
|
||||
continue;
|
||||
/* totally covered? */
|
||||
if (ei->addr >= start && ei->size <= size) {
|
||||
ei->type = new_type;
|
||||
continue;
|
||||
}
|
||||
/* partially covered */
|
||||
final_start = max(start, ei->addr);
|
||||
final_end = min(start + size, ei->addr + ei->size);
|
||||
if (final_start >= final_end)
|
||||
continue;
|
||||
add_memory_region(final_start, final_end - final_start,
|
||||
new_type);
|
||||
}
|
||||
}
|
||||
|
||||
void __init update_e820(void)
|
||||
{
|
||||
u8 nr_map;
|
||||
|
||||
nr_map = e820.nr_map;
|
||||
if (sanitize_e820_map(e820.map, &nr_map))
|
||||
return;
|
||||
e820.nr_map = nr_map;
|
||||
printk(KERN_INFO "modified physical RAM map:\n");
|
||||
e820_print_map("modified");
|
||||
}
|
||||
|
||||
unsigned long pci_mem_start = 0xaeedbabe;
|
||||
EXPORT_SYMBOL(pci_mem_start);
|
||||
|
||||
/*
|
||||
* Search for the biggest gap in the low 32 bits of the e820
|
||||
* memory space. We pass this space to PCI to assign MMIO resources
|
||||
* for hotplug or unconfigured devices in.
|
||||
* Hopefully the BIOS let enough space left.
|
||||
*/
|
||||
__init void e820_setup_gap(void)
|
||||
{
|
||||
unsigned long gapstart, gapsize, round;
|
||||
unsigned long last;
|
||||
int i;
|
||||
int found = 0;
|
||||
|
||||
last = 0x100000000ull;
|
||||
gapstart = 0x10000000;
|
||||
gapsize = 0x400000;
|
||||
i = e820.nr_map;
|
||||
while (--i >= 0) {
|
||||
unsigned long long start = e820.map[i].addr;
|
||||
unsigned long long end = start + e820.map[i].size;
|
||||
|
||||
/*
|
||||
* Since "last" is at most 4GB, we know we'll
|
||||
* fit in 32 bits if this condition is true
|
||||
*/
|
||||
if (last > end) {
|
||||
unsigned long gap = last - end;
|
||||
|
||||
if (gap > gapsize) {
|
||||
gapsize = gap;
|
||||
gapstart = end;
|
||||
found = 1;
|
||||
}
|
||||
}
|
||||
if (start < last)
|
||||
last = start;
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
|
||||
printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
|
||||
"address range\n"
|
||||
KERN_ERR "PCI: Unassigned devices with 32bit resource "
|
||||
"registers may break!\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* See how much we want to round up: start off with
|
||||
* rounding to the next 1MB area.
|
||||
*/
|
||||
round = 0x100000;
|
||||
while ((gapsize >> 4) > round)
|
||||
round += round;
|
||||
/* Fun with two's complement */
|
||||
pci_mem_start = (gapstart + round) & -round;
|
||||
|
||||
printk(KERN_INFO
|
||||
"Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
|
||||
pci_mem_start, gapstart, gapsize);
|
||||
}
|
||||
|
||||
int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (slot < 0 || slot >= e820.nr_map)
|
||||
return -1;
|
||||
for (i = slot; i < e820.nr_map; i++) {
|
||||
if (e820.map[i].type != E820_RAM)
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT))
|
||||
return -1;
|
||||
*addr = e820.map[i].addr;
|
||||
*size = min_t(u64, e820.map[i].size + e820.map[i].addr,
|
||||
max_pfn << PAGE_SHIFT) - *addr;
|
||||
return i + 1;
|
||||
}
|
|
@ -50,7 +50,7 @@ static void __init fix_hypertransport_config(int num, int slot, int func)
|
|||
static void __init via_bugs(int num, int slot, int func)
|
||||
{
|
||||
#ifdef CONFIG_GART_IOMMU
|
||||
if ((end_pfn > MAX_DMA32_PFN || force_iommu) &&
|
||||
if ((max_pfn > MAX_DMA32_PFN || force_iommu) &&
|
||||
!gart_iommu_aperture_allowed) {
|
||||
printk(KERN_INFO
|
||||
"Looks like a VIA chipset. Disabling IOMMU."
|
||||
|
@ -98,17 +98,6 @@ static void __init nvidia_bugs(int num, int slot, int func)
|
|||
|
||||
}
|
||||
|
||||
static void __init ati_bugs(int num, int slot, int func)
|
||||
{
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
if (timer_over_8254 == 1) {
|
||||
timer_over_8254 = 0;
|
||||
printk(KERN_INFO
|
||||
"ATI board detected. Disabling timer routing over 8254.\n");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#define QFLAG_APPLY_ONCE 0x1
|
||||
#define QFLAG_APPLIED 0x2
|
||||
#define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED)
|
||||
|
@ -126,8 +115,6 @@ static struct chipset early_qrk[] __initdata = {
|
|||
PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs },
|
||||
{ PCI_VENDOR_ID_VIA, PCI_ANY_ID,
|
||||
PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs },
|
||||
{ PCI_VENDOR_ID_ATI, PCI_ANY_ID,
|
||||
PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, ati_bugs },
|
||||
{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB,
|
||||
PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config },
|
||||
{}
|
||||
|
|
|
@ -64,6 +64,17 @@ static int __init setup_noefi(char *arg)
|
|||
}
|
||||
early_param("noefi", setup_noefi);
|
||||
|
||||
int add_efi_memmap;
|
||||
EXPORT_SYMBOL(add_efi_memmap);
|
||||
|
||||
static int __init setup_add_efi_memmap(char *arg)
|
||||
{
|
||||
add_efi_memmap = 1;
|
||||
return 0;
|
||||
}
|
||||
early_param("add_efi_memmap", setup_add_efi_memmap);
|
||||
|
||||
|
||||
static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
|
||||
{
|
||||
return efi_call_virt2(get_time, tm, tc);
|
||||
|
@ -213,6 +224,50 @@ unsigned long efi_get_time(void)
|
|||
eft.minute, eft.second);
|
||||
}
|
||||
|
||||
/*
|
||||
* Tell the kernel about the EFI memory map. This might include
|
||||
* more than the max 128 entries that can fit in the e820 legacy
|
||||
* (zeropage) memory map.
|
||||
*/
|
||||
|
||||
static void __init do_add_efi_memmap(void)
|
||||
{
|
||||
void *p;
|
||||
|
||||
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
|
||||
efi_memory_desc_t *md = p;
|
||||
unsigned long long start = md->phys_addr;
|
||||
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
|
||||
int e820_type;
|
||||
|
||||
if (md->attribute & EFI_MEMORY_WB)
|
||||
e820_type = E820_RAM;
|
||||
else
|
||||
e820_type = E820_RESERVED;
|
||||
e820_add_region(start, size, e820_type);
|
||||
}
|
||||
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
|
||||
}
|
||||
|
||||
void __init efi_reserve_early(void)
|
||||
{
|
||||
unsigned long pmap;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
pmap = boot_params.efi_info.efi_memmap;
|
||||
#else
|
||||
pmap = (boot_params.efi_info.efi_memmap |
|
||||
((__u64)boot_params.efi_info.efi_memmap_hi<<32));
|
||||
#endif
|
||||
memmap.phys_map = (void *)pmap;
|
||||
memmap.nr_map = boot_params.efi_info.efi_memmap_size /
|
||||
boot_params.efi_info.efi_memdesc_size;
|
||||
memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
|
||||
memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
|
||||
reserve_early(pmap, pmap + memmap.nr_map * memmap.desc_size,
|
||||
"EFI memmap");
|
||||
}
|
||||
|
||||
#if EFI_DEBUG
|
||||
static void __init print_efi_memmap(void)
|
||||
{
|
||||
|
@ -244,19 +299,11 @@ void __init efi_init(void)
|
|||
|
||||
#ifdef CONFIG_X86_32
|
||||
efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
|
||||
memmap.phys_map = (void *)boot_params.efi_info.efi_memmap;
|
||||
#else
|
||||
efi_phys.systab = (efi_system_table_t *)
|
||||
(boot_params.efi_info.efi_systab |
|
||||
((__u64)boot_params.efi_info.efi_systab_hi<<32));
|
||||
memmap.phys_map = (void *)
|
||||
(boot_params.efi_info.efi_memmap |
|
||||
((__u64)boot_params.efi_info.efi_memmap_hi<<32));
|
||||
#endif
|
||||
memmap.nr_map = boot_params.efi_info.efi_memmap_size /
|
||||
boot_params.efi_info.efi_memdesc_size;
|
||||
memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
|
||||
memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
|
||||
|
||||
efi.systab = early_ioremap((unsigned long)efi_phys.systab,
|
||||
sizeof(efi_system_table_t));
|
||||
|
@ -370,6 +417,8 @@ void __init efi_init(void)
|
|||
if (memmap.desc_size != sizeof(efi_memory_desc_t))
|
||||
printk(KERN_WARNING "Kernel-defined memdesc"
|
||||
"doesn't match the one from EFI!\n");
|
||||
if (add_efi_memmap)
|
||||
do_add_efi_memmap();
|
||||
|
||||
/* Setup for EFI runtime service */
|
||||
reboot_type = BOOT_EFI;
|
||||
|
@ -424,7 +473,7 @@ void __init efi_enter_virtual_mode(void)
|
|||
size = md->num_pages << EFI_PAGE_SHIFT;
|
||||
end = md->phys_addr + size;
|
||||
|
||||
if (PFN_UP(end) <= max_pfn_mapped)
|
||||
if (PFN_UP(end) <= max_low_pfn_mapped)
|
||||
va = __va(md->phys_addr);
|
||||
else
|
||||
va = efi_ioremap(md->phys_addr, size);
|
||||
|
|
|
@ -97,13 +97,7 @@ void __init efi_call_phys_epilog(void)
|
|||
early_runtime_code_mapping_set_exec(0);
|
||||
}
|
||||
|
||||
void __init efi_reserve_bootmem(void)
|
||||
{
|
||||
reserve_bootmem_generic((unsigned long)memmap.phys_map,
|
||||
memmap.nr_map * memmap.desc_size);
|
||||
}
|
||||
|
||||
void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size)
|
||||
void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size)
|
||||
{
|
||||
static unsigned pages_mapped __initdata;
|
||||
unsigned i, pages;
|
||||
|
|
|
@ -51,14 +51,14 @@
|
|||
#include <asm/percpu.h>
|
||||
#include <asm/dwarf2.h>
|
||||
#include <asm/processor-flags.h>
|
||||
#include "irq_vectors.h"
|
||||
#include <asm/irq_vectors.h>
|
||||
|
||||
/*
|
||||
* We use macros for low-level operations which need to be overridden
|
||||
* for paravirtualization. The following will never clobber any registers:
|
||||
* INTERRUPT_RETURN (aka. "iret")
|
||||
* GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
|
||||
* ENABLE_INTERRUPTS_SYSCALL_RET (aka "sti; sysexit").
|
||||
* ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
|
||||
*
|
||||
* For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
|
||||
* specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
|
||||
|
@ -349,7 +349,7 @@ sysenter_past_esp:
|
|||
xorl %ebp,%ebp
|
||||
TRACE_IRQS_ON
|
||||
1: mov PT_FS(%esp), %fs
|
||||
ENABLE_INTERRUPTS_SYSCALL_RET
|
||||
ENABLE_INTERRUPTS_SYSEXIT
|
||||
CFI_ENDPROC
|
||||
.pushsection .fixup,"ax"
|
||||
2: movl $0,PT_FS(%esp)
|
||||
|
@ -874,10 +874,10 @@ ENTRY(native_iret)
|
|||
.previous
|
||||
END(native_iret)
|
||||
|
||||
ENTRY(native_irq_enable_syscall_ret)
|
||||
ENTRY(native_irq_enable_sysexit)
|
||||
sti
|
||||
sysexit
|
||||
END(native_irq_enable_syscall_ret)
|
||||
END(native_irq_enable_sysexit)
|
||||
#endif
|
||||
|
||||
KPROBE_ENTRY(int3)
|
||||
|
@ -1024,6 +1024,7 @@ ENTRY(xen_sysenter_target)
|
|||
RING0_INT_FRAME
|
||||
addl $5*4, %esp /* remove xen-provided frame */
|
||||
jmp sysenter_past_esp
|
||||
CFI_ENDPROC
|
||||
|
||||
ENTRY(xen_hypervisor_callback)
|
||||
CFI_STARTPROC
|
||||
|
|
|
@ -59,8 +59,7 @@
|
|||
#endif
|
||||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
ENTRY(native_irq_enable_syscall_ret)
|
||||
movq %gs:pda_oldrsp,%rsp
|
||||
ENTRY(native_usergs_sysret64)
|
||||
swapgs
|
||||
sysretq
|
||||
#endif /* CONFIG_PARAVIRT */
|
||||
|
@ -104,7 +103,7 @@ ENTRY(native_irq_enable_syscall_ret)
|
|||
.macro FAKE_STACK_FRAME child_rip
|
||||
/* push in order ss, rsp, eflags, cs, rip */
|
||||
xorl %eax, %eax
|
||||
pushq %rax /* ss */
|
||||
pushq $__KERNEL_DS /* ss */
|
||||
CFI_ADJUST_CFA_OFFSET 8
|
||||
/*CFI_REL_OFFSET ss,0*/
|
||||
pushq %rax /* rsp */
|
||||
|
@ -169,13 +168,13 @@ ENTRY(ret_from_fork)
|
|||
CFI_ADJUST_CFA_OFFSET -4
|
||||
call schedule_tail
|
||||
GET_THREAD_INFO(%rcx)
|
||||
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
|
||||
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
|
||||
jnz rff_trace
|
||||
rff_action:
|
||||
RESTORE_REST
|
||||
testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
|
||||
je int_ret_from_sys_call
|
||||
testl $_TIF_IA32,threadinfo_flags(%rcx)
|
||||
testl $_TIF_IA32,TI_flags(%rcx)
|
||||
jnz int_ret_from_sys_call
|
||||
RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
|
||||
jmp ret_from_sys_call
|
||||
|
@ -244,7 +243,8 @@ ENTRY(system_call_after_swapgs)
|
|||
movq %rcx,RIP-ARGOFFSET(%rsp)
|
||||
CFI_REL_OFFSET rip,RIP-ARGOFFSET
|
||||
GET_THREAD_INFO(%rcx)
|
||||
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
|
||||
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
|
||||
TI_flags(%rcx)
|
||||
jnz tracesys
|
||||
cmpq $__NR_syscall_max,%rax
|
||||
ja badsys
|
||||
|
@ -263,7 +263,7 @@ sysret_check:
|
|||
GET_THREAD_INFO(%rcx)
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
TRACE_IRQS_OFF
|
||||
movl threadinfo_flags(%rcx),%edx
|
||||
movl TI_flags(%rcx),%edx
|
||||
andl %edi,%edx
|
||||
jnz sysret_careful
|
||||
CFI_REMEMBER_STATE
|
||||
|
@ -275,7 +275,8 @@ sysret_check:
|
|||
CFI_REGISTER rip,rcx
|
||||
RESTORE_ARGS 0,-ARG_SKIP,1
|
||||
/*CFI_REGISTER rflags,r11*/
|
||||
ENABLE_INTERRUPTS_SYSCALL_RET
|
||||
movq %gs:pda_oldrsp, %rsp
|
||||
USERGS_SYSRET64
|
||||
|
||||
CFI_RESTORE_STATE
|
||||
/* Handle reschedules */
|
||||
|
@ -305,7 +306,7 @@ sysret_signal:
|
|||
leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
|
||||
xorl %esi,%esi # oldset -> arg2
|
||||
call ptregscall_common
|
||||
1: movl $_TIF_NEED_RESCHED,%edi
|
||||
1: movl $_TIF_WORK_MASK,%edi
|
||||
/* Use IRET because user could have changed frame. This
|
||||
works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
|
@ -347,10 +348,10 @@ int_ret_from_sys_call:
|
|||
int_with_check:
|
||||
LOCKDEP_SYS_EXIT_IRQ
|
||||
GET_THREAD_INFO(%rcx)
|
||||
movl threadinfo_flags(%rcx),%edx
|
||||
movl TI_flags(%rcx),%edx
|
||||
andl %edi,%edx
|
||||
jnz int_careful
|
||||
andl $~TS_COMPAT,threadinfo_status(%rcx)
|
||||
andl $~TS_COMPAT,TI_status(%rcx)
|
||||
jmp retint_swapgs
|
||||
|
||||
/* Either reschedule or signal or syscall exit tracking needed. */
|
||||
|
@ -393,7 +394,7 @@ int_signal:
|
|||
movq %rsp,%rdi # &ptregs -> arg1
|
||||
xorl %esi,%esi # oldset -> arg2
|
||||
call do_notify_resume
|
||||
1: movl $_TIF_NEED_RESCHED,%edi
|
||||
1: movl $_TIF_WORK_MASK,%edi
|
||||
int_restore_rest:
|
||||
RESTORE_REST
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
|
@ -420,7 +421,6 @@ END(\label)
|
|||
PTREGSCALL stub_clone, sys_clone, %r8
|
||||
PTREGSCALL stub_fork, sys_fork, %rdi
|
||||
PTREGSCALL stub_vfork, sys_vfork, %rdi
|
||||
PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
|
||||
PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
|
||||
PTREGSCALL stub_iopl, sys_iopl, %rsi
|
||||
|
||||
|
@ -559,7 +559,7 @@ retint_with_reschedule:
|
|||
movl $_TIF_WORK_MASK,%edi
|
||||
retint_check:
|
||||
LOCKDEP_SYS_EXIT_IRQ
|
||||
movl threadinfo_flags(%rcx),%edx
|
||||
movl TI_flags(%rcx),%edx
|
||||
andl %edi,%edx
|
||||
CFI_REMEMBER_STATE
|
||||
jnz retint_careful
|
||||
|
@ -647,17 +647,16 @@ retint_signal:
|
|||
RESTORE_REST
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
TRACE_IRQS_OFF
|
||||
movl $_TIF_NEED_RESCHED,%edi
|
||||
GET_THREAD_INFO(%rcx)
|
||||
jmp retint_check
|
||||
jmp retint_with_reschedule
|
||||
|
||||
#ifdef CONFIG_PREEMPT
|
||||
/* Returning to kernel space. Check if we need preemption */
|
||||
/* rcx: threadinfo. interrupts off. */
|
||||
ENTRY(retint_kernel)
|
||||
cmpl $0,threadinfo_preempt_count(%rcx)
|
||||
cmpl $0,TI_preempt_count(%rcx)
|
||||
jnz retint_restore_args
|
||||
bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
|
||||
bt $TIF_NEED_RESCHED,TI_flags(%rcx)
|
||||
jnc retint_restore_args
|
||||
bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
|
||||
jnc retint_restore_args
|
||||
|
@ -720,6 +719,10 @@ ENTRY(apic_timer_interrupt)
|
|||
apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
|
||||
END(apic_timer_interrupt)
|
||||
|
||||
ENTRY(uv_bau_message_intr1)
|
||||
apicinterrupt 220,uv_bau_message_interrupt
|
||||
END(uv_bau_message_intr1)
|
||||
|
||||
ENTRY(error_interrupt)
|
||||
apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
|
||||
END(error_interrupt)
|
||||
|
@ -733,6 +736,7 @@ END(spurious_interrupt)
|
|||
*/
|
||||
.macro zeroentry sym
|
||||
INTR_FRAME
|
||||
PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
pushq $0 /* push error code/oldrax */
|
||||
CFI_ADJUST_CFA_OFFSET 8
|
||||
pushq %rax /* push real oldrax to the rdi slot */
|
||||
|
@ -745,6 +749,7 @@ END(spurious_interrupt)
|
|||
|
||||
.macro errorentry sym
|
||||
XCPT_FRAME
|
||||
PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
pushq %rax
|
||||
CFI_ADJUST_CFA_OFFSET 8
|
||||
CFI_REL_OFFSET rax,0
|
||||
|
@ -814,7 +819,7 @@ paranoid_restore\trace:
|
|||
jmp irq_return
|
||||
paranoid_userspace\trace:
|
||||
GET_THREAD_INFO(%rcx)
|
||||
movl threadinfo_flags(%rcx),%ebx
|
||||
movl TI_flags(%rcx),%ebx
|
||||
andl $_TIF_WORK_MASK,%ebx
|
||||
jz paranoid_swapgs\trace
|
||||
movq %rsp,%rdi /* &pt_regs */
|
||||
|
@ -912,7 +917,7 @@ error_exit:
|
|||
testl %eax,%eax
|
||||
jne retint_kernel
|
||||
LOCKDEP_SYS_EXIT_IRQ
|
||||
movl threadinfo_flags(%rcx),%edx
|
||||
movl TI_flags(%rcx),%edx
|
||||
movl $_TIF_WORK_MASK,%edi
|
||||
andl %edi,%edx
|
||||
jnz retint_careful
|
||||
|
@ -926,11 +931,11 @@ error_kernelspace:
|
|||
iret run with kernel gs again, so don't set the user space flag.
|
||||
B stepping K8s sometimes report an truncated RIP for IRET
|
||||
exceptions returning to compat mode. Check for these here too. */
|
||||
leaq irq_return(%rip),%rbp
|
||||
cmpq %rbp,RIP(%rsp)
|
||||
leaq irq_return(%rip),%rcx
|
||||
cmpq %rcx,RIP(%rsp)
|
||||
je error_swapgs
|
||||
movl %ebp,%ebp /* zero extend */
|
||||
cmpq %rbp,RIP(%rsp)
|
||||
movl %ecx,%ecx /* zero extend */
|
||||
cmpq %rcx,RIP(%rsp)
|
||||
je error_swapgs
|
||||
cmpq $gs_change,RIP(%rsp)
|
||||
je error_swapgs
|
||||
|
@ -939,7 +944,7 @@ KPROBE_END(error_entry)
|
|||
|
||||
/* Reload gs selector with exception handling */
|
||||
/* edi: new selector */
|
||||
ENTRY(load_gs_index)
|
||||
ENTRY(native_load_gs_index)
|
||||
CFI_STARTPROC
|
||||
pushf
|
||||
CFI_ADJUST_CFA_OFFSET 8
|
||||
|
@ -953,7 +958,7 @@ gs_change:
|
|||
CFI_ADJUST_CFA_OFFSET -8
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(load_gs_index)
|
||||
ENDPROC(native_load_gs_index)
|
||||
|
||||
.section __ex_table,"a"
|
||||
.align 8
|
||||
|
@ -1120,10 +1125,6 @@ ENTRY(coprocessor_segment_overrun)
|
|||
zeroentry do_coprocessor_segment_overrun
|
||||
END(coprocessor_segment_overrun)
|
||||
|
||||
ENTRY(reserved)
|
||||
zeroentry do_reserved
|
||||
END(reserved)
|
||||
|
||||
/* runs on exception stack */
|
||||
ENTRY(double_fault)
|
||||
XCPT_FRAME
|
||||
|
|
|
@ -51,7 +51,7 @@ void __init setup_apic_routing(void)
|
|||
else
|
||||
#endif
|
||||
|
||||
if (num_possible_cpus() <= 8)
|
||||
if (max_physical_apicid < 8)
|
||||
genapic = &apic_flat;
|
||||
else
|
||||
genapic = &apic_physflat;
|
||||
|
|
|
@ -5,9 +5,10 @@
|
|||
*
|
||||
* SGI UV APIC functions (note: not an Intel compatible APIC)
|
||||
*
|
||||
* Copyright (C) 2007 Silicon Graphics, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/threads.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/string.h>
|
||||
|
@ -20,6 +21,7 @@
|
|||
#include <asm/smp.h>
|
||||
#include <asm/ipi.h>
|
||||
#include <asm/genapic.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/uv/uv_mmrs.h>
|
||||
#include <asm/uv/uv_hub.h>
|
||||
|
||||
|
@ -55,37 +57,37 @@ static cpumask_t uv_vector_allocation_domain(int cpu)
|
|||
int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
|
||||
{
|
||||
unsigned long val;
|
||||
int nasid;
|
||||
int pnode;
|
||||
|
||||
nasid = uv_apicid_to_nasid(phys_apicid);
|
||||
pnode = uv_apicid_to_pnode(phys_apicid);
|
||||
val = (1UL << UVH_IPI_INT_SEND_SHFT) |
|
||||
(phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
|
||||
(((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
|
||||
APIC_DM_INIT;
|
||||
uv_write_global_mmr64(nasid, UVH_IPI_INT, val);
|
||||
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
|
||||
mdelay(10);
|
||||
|
||||
val = (1UL << UVH_IPI_INT_SEND_SHFT) |
|
||||
(phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
|
||||
(((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
|
||||
APIC_DM_STARTUP;
|
||||
uv_write_global_mmr64(nasid, UVH_IPI_INT, val);
|
||||
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void uv_send_IPI_one(int cpu, int vector)
|
||||
{
|
||||
unsigned long val, apicid, lapicid;
|
||||
int nasid;
|
||||
int pnode;
|
||||
|
||||
apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */
|
||||
lapicid = apicid & 0x3f; /* ZZZ macro needed */
|
||||
nasid = uv_apicid_to_nasid(apicid);
|
||||
pnode = uv_apicid_to_pnode(apicid);
|
||||
val =
|
||||
(1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid <<
|
||||
UVH_IPI_INT_APIC_ID_SHFT) |
|
||||
(vector << UVH_IPI_INT_VECTOR_SHFT);
|
||||
uv_write_global_mmr64(nasid, UVH_IPI_INT, val);
|
||||
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
|
||||
}
|
||||
|
||||
static void uv_send_IPI_mask(cpumask_t mask, int vector)
|
||||
|
@ -159,39 +161,146 @@ struct genapic apic_x2apic_uv_x = {
|
|||
.phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */
|
||||
};
|
||||
|
||||
static __cpuinit void set_x2apic_extra_bits(int nasid)
|
||||
static __cpuinit void set_x2apic_extra_bits(int pnode)
|
||||
{
|
||||
__get_cpu_var(x2apic_extra_bits) = ((nasid >> 1) << 6);
|
||||
__get_cpu_var(x2apic_extra_bits) = (pnode << 6);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called on boot cpu.
|
||||
*/
|
||||
static __init int boot_pnode_to_blade(int pnode)
|
||||
{
|
||||
int blade;
|
||||
|
||||
for (blade = 0; blade < uv_num_possible_blades(); blade++)
|
||||
if (pnode == uv_blade_info[blade].pnode)
|
||||
return blade;
|
||||
BUG();
|
||||
}
|
||||
|
||||
struct redir_addr {
|
||||
unsigned long redirect;
|
||||
unsigned long alias;
|
||||
};
|
||||
|
||||
#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT
|
||||
|
||||
static __initdata struct redir_addr redir_addrs[] = {
|
||||
{UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_SI_ALIAS0_OVERLAY_CONFIG},
|
||||
{UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_SI_ALIAS1_OVERLAY_CONFIG},
|
||||
{UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_SI_ALIAS2_OVERLAY_CONFIG},
|
||||
};
|
||||
|
||||
static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
|
||||
{
|
||||
union uvh_si_alias0_overlay_config_u alias;
|
||||
union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) {
|
||||
alias.v = uv_read_local_mmr(redir_addrs[i].alias);
|
||||
if (alias.s.base == 0) {
|
||||
*size = (1UL << alias.s.m_alias);
|
||||
redirect.v = uv_read_local_mmr(redir_addrs[i].redirect);
|
||||
*base = (unsigned long)redirect.s.dest_base << DEST_SHIFT;
|
||||
return;
|
||||
}
|
||||
}
|
||||
BUG();
|
||||
}
|
||||
|
||||
static __init void map_low_mmrs(void)
|
||||
{
|
||||
init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
|
||||
init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
|
||||
}
|
||||
|
||||
enum map_type {map_wb, map_uc};
|
||||
|
||||
static void map_high(char *id, unsigned long base, int shift, enum map_type map_type)
|
||||
{
|
||||
unsigned long bytes, paddr;
|
||||
|
||||
paddr = base << shift;
|
||||
bytes = (1UL << shift);
|
||||
printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr,
|
||||
paddr + bytes);
|
||||
if (map_type == map_uc)
|
||||
init_extra_mapping_uc(paddr, bytes);
|
||||
else
|
||||
init_extra_mapping_wb(paddr, bytes);
|
||||
|
||||
}
|
||||
static __init void map_gru_high(int max_pnode)
|
||||
{
|
||||
union uvh_rh_gam_gru_overlay_config_mmr_u gru;
|
||||
int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
|
||||
|
||||
gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
|
||||
if (gru.s.enable)
|
||||
map_high("GRU", gru.s.base, shift, map_wb);
|
||||
}
|
||||
|
||||
static __init void map_config_high(int max_pnode)
|
||||
{
|
||||
union uvh_rh_gam_cfg_overlay_config_mmr_u cfg;
|
||||
int shift = UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT;
|
||||
|
||||
cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR);
|
||||
if (cfg.s.enable)
|
||||
map_high("CONFIG", cfg.s.base, shift, map_uc);
|
||||
}
|
||||
|
||||
static __init void map_mmr_high(int max_pnode)
|
||||
{
|
||||
union uvh_rh_gam_mmr_overlay_config_mmr_u mmr;
|
||||
int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT;
|
||||
|
||||
mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
|
||||
if (mmr.s.enable)
|
||||
map_high("MMR", mmr.s.base, shift, map_uc);
|
||||
}
|
||||
|
||||
static __init void map_mmioh_high(int max_pnode)
|
||||
{
|
||||
union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
|
||||
int shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
|
||||
|
||||
mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
|
||||
if (mmioh.s.enable)
|
||||
map_high("MMIOH", mmioh.s.base, shift, map_uc);
|
||||
}
|
||||
|
||||
static __init void uv_system_init(void)
|
||||
{
|
||||
union uvh_si_addr_map_config_u m_n_config;
|
||||
int bytes, nid, cpu, lcpu, nasid, last_nasid, blade;
|
||||
unsigned long mmr_base;
|
||||
union uvh_node_id_u node_id;
|
||||
unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
|
||||
int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
|
||||
int max_pnode = 0;
|
||||
unsigned long mmr_base, present;
|
||||
|
||||
map_low_mmrs();
|
||||
|
||||
m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
|
||||
m_val = m_n_config.s.m_skt;
|
||||
n_val = m_n_config.s.n_skt;
|
||||
mmr_base =
|
||||
uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
|
||||
~UV_MMR_ENABLE;
|
||||
printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
|
||||
|
||||
last_nasid = -1;
|
||||
for_each_possible_cpu(cpu) {
|
||||
nid = cpu_to_node(cpu);
|
||||
nasid = uv_apicid_to_nasid(per_cpu(x86_cpu_to_apicid, cpu));
|
||||
if (nasid != last_nasid)
|
||||
uv_possible_blades++;
|
||||
last_nasid = nasid;
|
||||
}
|
||||
for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++)
|
||||
uv_possible_blades +=
|
||||
hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8));
|
||||
printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
|
||||
|
||||
bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
|
||||
uv_blade_info = alloc_bootmem_pages(bytes);
|
||||
|
||||
get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
|
||||
|
||||
bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes();
|
||||
uv_node_to_blade = alloc_bootmem_pages(bytes);
|
||||
memset(uv_node_to_blade, 255, bytes);
|
||||
|
@ -200,43 +309,62 @@ static __init void uv_system_init(void)
|
|||
uv_cpu_to_blade = alloc_bootmem_pages(bytes);
|
||||
memset(uv_cpu_to_blade, 255, bytes);
|
||||
|
||||
last_nasid = -1;
|
||||
blade = -1;
|
||||
lcpu = -1;
|
||||
for_each_possible_cpu(cpu) {
|
||||
nid = cpu_to_node(cpu);
|
||||
nasid = uv_apicid_to_nasid(per_cpu(x86_cpu_to_apicid, cpu));
|
||||
if (nasid != last_nasid) {
|
||||
blade++;
|
||||
lcpu = -1;
|
||||
uv_blade_info[blade].nr_posible_cpus = 0;
|
||||
blade = 0;
|
||||
for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) {
|
||||
present = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8);
|
||||
for (j = 0; j < 64; j++) {
|
||||
if (!test_bit(j, &present))
|
||||
continue;
|
||||
uv_blade_info[blade].pnode = (i * 64 + j);
|
||||
uv_blade_info[blade].nr_possible_cpus = 0;
|
||||
uv_blade_info[blade].nr_online_cpus = 0;
|
||||
blade++;
|
||||
}
|
||||
last_nasid = nasid;
|
||||
lcpu++;
|
||||
}
|
||||
|
||||
uv_cpu_hub_info(cpu)->m_val = m_n_config.s.m_skt;
|
||||
uv_cpu_hub_info(cpu)->n_val = m_n_config.s.n_skt;
|
||||
node_id.v = uv_read_local_mmr(UVH_NODE_ID);
|
||||
gnode_upper = (((unsigned long)node_id.s.node_id) &
|
||||
~((1 << n_val) - 1)) << m_val;
|
||||
|
||||
for_each_present_cpu(cpu) {
|
||||
nid = cpu_to_node(cpu);
|
||||
pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu));
|
||||
blade = boot_pnode_to_blade(pnode);
|
||||
lcpu = uv_blade_info[blade].nr_possible_cpus;
|
||||
uv_blade_info[blade].nr_possible_cpus++;
|
||||
|
||||
uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
|
||||
uv_cpu_hub_info(cpu)->lowmem_remap_top =
|
||||
lowmem_redir_base + lowmem_redir_size;
|
||||
uv_cpu_hub_info(cpu)->m_val = m_val;
|
||||
uv_cpu_hub_info(cpu)->n_val = m_val;
|
||||
uv_cpu_hub_info(cpu)->numa_blade_id = blade;
|
||||
uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
|
||||
uv_cpu_hub_info(cpu)->local_nasid = nasid;
|
||||
uv_cpu_hub_info(cpu)->gnode_upper =
|
||||
nasid & ~((1 << uv_hub_info->n_val) - 1);
|
||||
uv_cpu_hub_info(cpu)->pnode = pnode;
|
||||
uv_cpu_hub_info(cpu)->pnode_mask = (1 << n_val) - 1;
|
||||
uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
|
||||
uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
|
||||
uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
|
||||
uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */
|
||||
uv_blade_info[blade].nasid = nasid;
|
||||
uv_blade_info[blade].nr_posible_cpus++;
|
||||
uv_node_to_blade[nid] = blade;
|
||||
uv_cpu_to_blade[cpu] = blade;
|
||||
max_pnode = max(pnode, max_pnode);
|
||||
|
||||
printk(KERN_DEBUG "UV cpu %d, apicid 0x%x, nasid %d, nid %d\n",
|
||||
cpu, per_cpu(x86_cpu_to_apicid, cpu), nasid, nid);
|
||||
printk(KERN_DEBUG "UV lcpu %d, blade %d\n", lcpu, blade);
|
||||
printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, "
|
||||
"lcpu %d, blade %d\n",
|
||||
cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid,
|
||||
lcpu, blade);
|
||||
}
|
||||
|
||||
map_gru_high(max_pnode);
|
||||
map_mmr_high(max_pnode);
|
||||
map_config_high(max_pnode);
|
||||
map_mmioh_high(max_pnode);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called on each cpu to initialize the per_cpu UV data area.
|
||||
* ZZZ hotplug not supported yet
|
||||
*/
|
||||
void __cpuinit uv_cpu_init(void)
|
||||
{
|
||||
|
@ -246,5 +374,5 @@ void __cpuinit uv_cpu_init(void)
|
|||
uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
|
||||
|
||||
if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
|
||||
set_x2apic_extra_bits(uv_hub_info->local_nasid);
|
||||
set_x2apic_extra_bits(uv_hub_info->pnode);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
#include <linux/kernel.h>
|
||||
#include <linux/init.h>
|
||||
|
||||
#include <asm/setup.h>
|
||||
#include <asm/bios_ebda.h>
|
||||
|
||||
#define BIOS_LOWMEM_KILOBYTES 0x413
|
||||
|
||||
/*
|
||||
* The BIOS places the EBDA/XBDA at the top of conventional
|
||||
* memory, and usually decreases the reported amount of
|
||||
* conventional memory (int 0x12) too. This also contains a
|
||||
* workaround for Dell systems that neglect to reserve EBDA.
|
||||
* The same workaround also avoids a problem with the AMD768MPX
|
||||
* chipset: reserve a page before VGA to prevent PCI prefetch
|
||||
* into it (errata #56). Usually the page is reserved anyways,
|
||||
* unless you have no PS/2 mouse plugged in.
|
||||
*/
|
||||
void __init reserve_ebda_region(void)
|
||||
{
|
||||
unsigned int lowmem, ebda_addr;
|
||||
|
||||
/* To determine the position of the EBDA and the */
|
||||
/* end of conventional memory, we need to look at */
|
||||
/* the BIOS data area. In a paravirtual environment */
|
||||
/* that area is absent. We'll just have to assume */
|
||||
/* that the paravirt case can handle memory setup */
|
||||
/* correctly, without our help. */
|
||||
if (paravirt_enabled())
|
||||
return;
|
||||
|
||||
/* end of low (conventional) memory */
|
||||
lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
|
||||
lowmem <<= 10;
|
||||
|
||||
/* start of EBDA area */
|
||||
ebda_addr = get_bios_ebda();
|
||||
|
||||
/* Fixup: bios puts an EBDA in the top 64K segment */
|
||||
/* of conventional memory, but does not adjust lowmem. */
|
||||
if ((lowmem - ebda_addr) <= 0x10000)
|
||||
lowmem = ebda_addr;
|
||||
|
||||
/* Fixup: bios does not report an EBDA at all. */
|
||||
/* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
|
||||
if ((ebda_addr == 0) && (lowmem >= 0x9f000))
|
||||
lowmem = 0x9f000;
|
||||
|
||||
/* Paranoia: should never happen, but... */
|
||||
if ((lowmem == 0) || (lowmem >= 0x100000))
|
||||
lowmem = 0x9f000;
|
||||
|
||||
/* reserve all memory between lowmem and the 1MB mark */
|
||||
reserve_early_overlap_ok(lowmem, 0x100000, "BIOS reserved");
|
||||
}
|
|
@ -8,7 +8,34 @@
|
|||
#include <linux/init.h>
|
||||
#include <linux/start_kernel.h>
|
||||
|
||||
#include <asm/setup.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/e820.h>
|
||||
#include <asm/bios_ebda.h>
|
||||
|
||||
void __init i386_start_kernel(void)
|
||||
{
|
||||
reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_INITRD
|
||||
/* Reserve INITRD */
|
||||
if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
|
||||
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
|
||||
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
|
||||
u64 ramdisk_end = ramdisk_image + ramdisk_size;
|
||||
reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
|
||||
}
|
||||
#endif
|
||||
reserve_early(init_pg_tables_start, init_pg_tables_end,
|
||||
"INIT_PG_TABLE");
|
||||
|
||||
reserve_ebda_region();
|
||||
|
||||
/*
|
||||
* At this point everything still needed from the boot loader
|
||||
* or BIOS or kernel text should be early reserved or marked not
|
||||
* RAM in e820. All other memory is free game.
|
||||
*/
|
||||
|
||||
start_kernel();
|
||||
}
|
||||
|
|
|
@ -25,6 +25,20 @@
|
|||
#include <asm/e820.h>
|
||||
#include <asm/bios_ebda.h>
|
||||
|
||||
/* boot cpu pda */
|
||||
static struct x8664_pda _boot_cpu_pda __read_mostly;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* We install an empty cpu_pda pointer table to indicate to early users
|
||||
* (numa_set_node) that the cpu_pda pointer table for cpus other than
|
||||
* the boot cpu is not yet setup.
|
||||
*/
|
||||
static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
|
||||
#else
|
||||
static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
|
||||
#endif
|
||||
|
||||
static void __init zap_identity_mappings(void)
|
||||
{
|
||||
pgd_t *pgd = pgd_offset_k(0UL);
|
||||
|
@ -51,74 +65,6 @@ static void __init copy_bootdata(char *real_mode_data)
|
|||
}
|
||||
}
|
||||
|
||||
#define BIOS_LOWMEM_KILOBYTES 0x413
|
||||
|
||||
/*
|
||||
* The BIOS places the EBDA/XBDA at the top of conventional
|
||||
* memory, and usually decreases the reported amount of
|
||||
* conventional memory (int 0x12) too. This also contains a
|
||||
* workaround for Dell systems that neglect to reserve EBDA.
|
||||
* The same workaround also avoids a problem with the AMD768MPX
|
||||
* chipset: reserve a page before VGA to prevent PCI prefetch
|
||||
* into it (errata #56). Usually the page is reserved anyways,
|
||||
* unless you have no PS/2 mouse plugged in.
|
||||
*/
|
||||
static void __init reserve_ebda_region(void)
|
||||
{
|
||||
unsigned int lowmem, ebda_addr;
|
||||
|
||||
/* To determine the position of the EBDA and the */
|
||||
/* end of conventional memory, we need to look at */
|
||||
/* the BIOS data area. In a paravirtual environment */
|
||||
/* that area is absent. We'll just have to assume */
|
||||
/* that the paravirt case can handle memory setup */
|
||||
/* correctly, without our help. */
|
||||
if (paravirt_enabled())
|
||||
return;
|
||||
|
||||
/* end of low (conventional) memory */
|
||||
lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
|
||||
lowmem <<= 10;
|
||||
|
||||
/* start of EBDA area */
|
||||
ebda_addr = get_bios_ebda();
|
||||
|
||||
/* Fixup: bios puts an EBDA in the top 64K segment */
|
||||
/* of conventional memory, but does not adjust lowmem. */
|
||||
if ((lowmem - ebda_addr) <= 0x10000)
|
||||
lowmem = ebda_addr;
|
||||
|
||||
/* Fixup: bios does not report an EBDA at all. */
|
||||
/* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
|
||||
if ((ebda_addr == 0) && (lowmem >= 0x9f000))
|
||||
lowmem = 0x9f000;
|
||||
|
||||
/* Paranoia: should never happen, but... */
|
||||
if ((lowmem == 0) || (lowmem >= 0x100000))
|
||||
lowmem = 0x9f000;
|
||||
|
||||
/* reserve all memory between lowmem and the 1MB mark */
|
||||
reserve_early(lowmem, 0x100000, "BIOS reserved");
|
||||
}
|
||||
|
||||
static void __init reserve_setup_data(void)
|
||||
{
|
||||
struct setup_data *data;
|
||||
unsigned long pa_data;
|
||||
char buf[32];
|
||||
|
||||
if (boot_params.hdr.version < 0x0209)
|
||||
return;
|
||||
pa_data = boot_params.hdr.setup_data;
|
||||
while (pa_data) {
|
||||
data = early_ioremap(pa_data, sizeof(*data));
|
||||
sprintf(buf, "setup data %x", data->type);
|
||||
reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
|
||||
pa_data = data->next;
|
||||
early_iounmap(data, sizeof(*data));
|
||||
}
|
||||
}
|
||||
|
||||
void __init x86_64_start_kernel(char * real_mode_data)
|
||||
{
|
||||
int i;
|
||||
|
@ -156,10 +102,17 @@ void __init x86_64_start_kernel(char * real_mode_data)
|
|||
|
||||
early_printk("Kernel alive\n");
|
||||
|
||||
for (i = 0; i < NR_CPUS; i++)
|
||||
cpu_pda(i) = &boot_cpu_pda[i];
|
||||
|
||||
_cpu_pda = __cpu_pda;
|
||||
cpu_pda(0) = &_boot_cpu_pda;
|
||||
pda_init(0);
|
||||
|
||||
early_printk("Kernel really alive\n");
|
||||
|
||||
x86_64_start_reservations(real_mode_data);
|
||||
}
|
||||
|
||||
void __init x86_64_start_reservations(char *real_mode_data)
|
||||
{
|
||||
copy_bootdata(__va(real_mode_data));
|
||||
|
||||
reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
|
||||
|
@ -175,7 +128,6 @@ void __init x86_64_start_kernel(char * real_mode_data)
|
|||
#endif
|
||||
|
||||
reserve_ebda_region();
|
||||
reserve_setup_data();
|
||||
|
||||
/*
|
||||
* At this point everything still needed from the boot loader
|
||||
|
|
|
@ -194,6 +194,7 @@ default_entry:
|
|||
xorl %ebx,%ebx /* %ebx is kept at zero */
|
||||
|
||||
movl $pa(pg0), %edi
|
||||
movl %edi, pa(init_pg_tables_start)
|
||||
movl $pa(swapper_pg_pmd), %edx
|
||||
movl $PTE_ATTR, %eax
|
||||
10:
|
||||
|
@ -219,6 +220,8 @@ default_entry:
|
|||
jb 10b
|
||||
1:
|
||||
movl %edi,pa(init_pg_tables_end)
|
||||
shrl $12, %eax
|
||||
movl %eax, pa(max_pfn_mapped)
|
||||
|
||||
/* Do early initialization of the fixmap area */
|
||||
movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
|
||||
|
@ -228,6 +231,7 @@ default_entry:
|
|||
page_pde_offset = (__PAGE_OFFSET >> 20);
|
||||
|
||||
movl $pa(pg0), %edi
|
||||
movl %edi, pa(init_pg_tables_start)
|
||||
movl $pa(swapper_pg_dir), %edx
|
||||
movl $PTE_ATTR, %eax
|
||||
10:
|
||||
|
@ -249,6 +253,8 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
|
|||
cmpl %ebp,%eax
|
||||
jb 10b
|
||||
movl %edi,pa(init_pg_tables_end)
|
||||
shrl $12, %eax
|
||||
movl %eax, pa(max_pfn_mapped)
|
||||
|
||||
/* Do early initialization of the fixmap area */
|
||||
movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
|
||||
|
@ -446,10 +452,13 @@ is386: movl $2,%ecx # set MP
|
|||
je 1f
|
||||
movl $(__KERNEL_PERCPU), %eax
|
||||
movl %eax,%fs # set this cpu's percpu
|
||||
jmp initialize_secondary # all other CPUs call initialize_secondary
|
||||
movl (stack_start), %esp
|
||||
1:
|
||||
#endif /* CONFIG_SMP */
|
||||
jmp i386_start_kernel
|
||||
jmp *(initial_code)
|
||||
.align 4
|
||||
ENTRY(initial_code)
|
||||
.long i386_start_kernel
|
||||
|
||||
/*
|
||||
* We depend on ET to be correct. This checks for 287/387.
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include <asm/page.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/cache.h>
|
||||
#include <asm/processor-flags.h>
|
||||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
#include <asm/asm-offsets.h>
|
||||
|
@ -31,6 +32,13 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
|
||||
|
||||
L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET)
|
||||
L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET)
|
||||
L4_START_KERNEL = pgd_index(__START_KERNEL_map)
|
||||
L3_START_KERNEL = pud_index(__START_KERNEL_map)
|
||||
|
||||
.text
|
||||
.section .text.head
|
||||
.code64
|
||||
|
@ -76,8 +84,8 @@ startup_64:
|
|||
/* Fixup the physical addresses in the page table
|
||||
*/
|
||||
addq %rbp, init_level4_pgt + 0(%rip)
|
||||
addq %rbp, init_level4_pgt + (258*8)(%rip)
|
||||
addq %rbp, init_level4_pgt + (511*8)(%rip)
|
||||
addq %rbp, init_level4_pgt + (L4_PAGE_OFFSET*8)(%rip)
|
||||
addq %rbp, init_level4_pgt + (L4_START_KERNEL*8)(%rip)
|
||||
|
||||
addq %rbp, level3_ident_pgt + 0(%rip)
|
||||
|
||||
|
@ -154,9 +162,7 @@ ENTRY(secondary_startup_64)
|
|||
*/
|
||||
|
||||
/* Enable PAE mode and PGE */
|
||||
xorq %rax, %rax
|
||||
btsq $5, %rax
|
||||
btsq $7, %rax
|
||||
movl $(X86_CR4_PAE | X86_CR4_PGE), %eax
|
||||
movq %rax, %cr4
|
||||
|
||||
/* Setup early boot stage 4 level pagetables. */
|
||||
|
@ -184,19 +190,15 @@ ENTRY(secondary_startup_64)
|
|||
1: wrmsr /* Make changes effective */
|
||||
|
||||
/* Setup cr0 */
|
||||
#define CR0_PM 1 /* protected mode */
|
||||
#define CR0_MP (1<<1)
|
||||
#define CR0_ET (1<<4)
|
||||
#define CR0_NE (1<<5)
|
||||
#define CR0_WP (1<<16)
|
||||
#define CR0_AM (1<<18)
|
||||
#define CR0_PAGING (1<<31)
|
||||
movl $CR0_PM|CR0_MP|CR0_ET|CR0_NE|CR0_WP|CR0_AM|CR0_PAGING,%eax
|
||||
#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
|
||||
X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
|
||||
X86_CR0_PG)
|
||||
movl $CR0_STATE, %eax
|
||||
/* Make changes effective */
|
||||
movq %rax, %cr0
|
||||
|
||||
/* Setup a boot time stack */
|
||||
movq init_rsp(%rip),%rsp
|
||||
movq stack_start(%rip),%rsp
|
||||
|
||||
/* zero EFLAGS after setting rsp */
|
||||
pushq $0
|
||||
|
@ -208,7 +210,7 @@ ENTRY(secondary_startup_64)
|
|||
* addresses where we're currently running on. We have to do that here
|
||||
* because in 32bit we couldn't load a 64bit linear address.
|
||||
*/
|
||||
lgdt cpu_gdt_descr(%rip)
|
||||
lgdt early_gdt_descr(%rip)
|
||||
|
||||
/* set up data segments. actually 0 would do too */
|
||||
movl $__KERNEL_DS,%eax
|
||||
|
@ -257,8 +259,9 @@ ENTRY(secondary_startup_64)
|
|||
.quad x86_64_start_kernel
|
||||
__FINITDATA
|
||||
|
||||
ENTRY(init_rsp)
|
||||
ENTRY(stack_start)
|
||||
.quad init_thread_union+THREAD_SIZE-8
|
||||
.word 0
|
||||
|
||||
bad_address:
|
||||
jmp bad_address
|
||||
|
@ -327,11 +330,11 @@ early_idt_ripmsg:
|
|||
ENTRY(name)
|
||||
|
||||
/* Automate the creation of 1 to 1 mapping pmd entries */
|
||||
#define PMDS(START, PERM, COUNT) \
|
||||
i = 0 ; \
|
||||
.rept (COUNT) ; \
|
||||
.quad (START) + (i << 21) + (PERM) ; \
|
||||
i = i + 1 ; \
|
||||
#define PMDS(START, PERM, COUNT) \
|
||||
i = 0 ; \
|
||||
.rept (COUNT) ; \
|
||||
.quad (START) + (i << PMD_SHIFT) + (PERM) ; \
|
||||
i = i + 1 ; \
|
||||
.endr
|
||||
|
||||
/*
|
||||
|
@ -342,9 +345,9 @@ ENTRY(name)
|
|||
*/
|
||||
NEXT_PAGE(init_level4_pgt)
|
||||
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
|
||||
.fill 257,8,0
|
||||
.org init_level4_pgt + L4_PAGE_OFFSET*8, 0
|
||||
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
|
||||
.fill 252,8,0
|
||||
.org init_level4_pgt + L4_START_KERNEL*8, 0
|
||||
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
|
||||
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
|
||||
|
||||
|
@ -353,7 +356,7 @@ NEXT_PAGE(level3_ident_pgt)
|
|||
.fill 511,8,0
|
||||
|
||||
NEXT_PAGE(level3_kernel_pgt)
|
||||
.fill 510,8,0
|
||||
.fill L3_START_KERNEL,8,0
|
||||
/* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
|
||||
.quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
|
||||
.quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
|
||||
|
@ -384,7 +387,7 @@ NEXT_PAGE(level2_kernel_pgt)
|
|||
* If you want to increase this then increase MODULES_VADDR
|
||||
* too.)
|
||||
*/
|
||||
PMDS(0, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL,
|
||||
PMDS(0, __PAGE_KERNEL_LARGE_EXEC,
|
||||
KERNEL_IMAGE_SIZE/PMD_SIZE)
|
||||
|
||||
NEXT_PAGE(level2_spare_pgt)
|
||||
|
@ -395,54 +398,16 @@ NEXT_PAGE(level2_spare_pgt)
|
|||
|
||||
.data
|
||||
.align 16
|
||||
.globl cpu_gdt_descr
|
||||
cpu_gdt_descr:
|
||||
.word gdt_end-cpu_gdt_table-1
|
||||
gdt:
|
||||
.quad cpu_gdt_table
|
||||
#ifdef CONFIG_SMP
|
||||
.rept NR_CPUS-1
|
||||
.word 0
|
||||
.quad 0
|
||||
.endr
|
||||
#endif
|
||||
.globl early_gdt_descr
|
||||
early_gdt_descr:
|
||||
.word GDT_ENTRIES*8-1
|
||||
.quad per_cpu__gdt_page
|
||||
|
||||
ENTRY(phys_base)
|
||||
/* This must match the first entry in level2_kernel_pgt */
|
||||
.quad 0x0000000000000000
|
||||
|
||||
/* We need valid kernel segments for data and code in long mode too
|
||||
* IRET will check the segment types kkeil 2000/10/28
|
||||
* Also sysret mandates a special GDT layout
|
||||
*/
|
||||
|
||||
.section .data.page_aligned, "aw"
|
||||
.align PAGE_SIZE
|
||||
|
||||
/* The TLS descriptors are currently at a different place compared to i386.
|
||||
Hopefully nobody expects them at a fixed place (Wine?) */
|
||||
|
||||
ENTRY(cpu_gdt_table)
|
||||
.quad 0x0000000000000000 /* NULL descriptor */
|
||||
.quad 0x00cf9b000000ffff /* __KERNEL32_CS */
|
||||
.quad 0x00af9b000000ffff /* __KERNEL_CS */
|
||||
.quad 0x00cf93000000ffff /* __KERNEL_DS */
|
||||
.quad 0x00cffb000000ffff /* __USER32_CS */
|
||||
.quad 0x00cff3000000ffff /* __USER_DS, __USER32_DS */
|
||||
.quad 0x00affb000000ffff /* __USER_CS */
|
||||
.quad 0x0 /* unused */
|
||||
.quad 0,0 /* TSS */
|
||||
.quad 0,0 /* LDT */
|
||||
.quad 0,0,0 /* three TLS descriptors */
|
||||
.quad 0x0000f40000000000 /* node/CPU stored in limit */
|
||||
gdt_end:
|
||||
/* asm/segment.h:GDT_ENTRIES must match this */
|
||||
/* This should be a multiple of the cache line size */
|
||||
/* GDTs of other CPUs are now dynamically allocated */
|
||||
|
||||
/* zero the remaining page */
|
||||
.fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0
|
||||
|
||||
.section .bss, "aw", @nobits
|
||||
.align L1_CACHE_BYTES
|
||||
ENTRY(idt_table)
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
|
||||
/* FSEC = 10^-15
|
||||
NSEC = 10^-9 */
|
||||
#define FSEC_PER_NSEC 1000000
|
||||
#define FSEC_PER_NSEC 1000000L
|
||||
|
||||
/*
|
||||
* HPET address is set in acpi/boot.c, when an ACPI entry exists
|
||||
|
@ -36,26 +36,15 @@ static inline void hpet_writel(unsigned long d, unsigned long a)
|
|||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
|
||||
static inline void hpet_set_mapping(void)
|
||||
{
|
||||
set_fixmap_nocache(FIX_HPET_BASE, hpet_address);
|
||||
__set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
|
||||
hpet_virt_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE);
|
||||
}
|
||||
|
||||
static inline void hpet_clear_mapping(void)
|
||||
{
|
||||
hpet_virt_address = NULL;
|
||||
}
|
||||
|
||||
#else
|
||||
#endif
|
||||
|
||||
static inline void hpet_set_mapping(void)
|
||||
{
|
||||
hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
|
||||
#ifdef CONFIG_X86_64
|
||||
__set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void hpet_clear_mapping(void)
|
||||
|
@ -63,7 +52,6 @@ static inline void hpet_clear_mapping(void)
|
|||
iounmap(hpet_virt_address);
|
||||
hpet_virt_address = NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* HPET command line enable / disable
|
||||
|
@ -206,20 +194,19 @@ static void hpet_enable_legacy_int(void)
|
|||
|
||||
static void hpet_legacy_clockevent_register(void)
|
||||
{
|
||||
uint64_t hpet_freq;
|
||||
|
||||
/* Start HPET legacy interrupts */
|
||||
hpet_enable_legacy_int();
|
||||
|
||||
/*
|
||||
* The period is a femto seconds value. We need to calculate the
|
||||
* scaled math multiplication factor for nanosecond to hpet tick
|
||||
* conversion.
|
||||
* The mult factor is defined as (include/linux/clockchips.h)
|
||||
* mult/2^shift = cyc/ns (in contrast to ns/cyc in clocksource.h)
|
||||
* hpet_period is in units of femtoseconds (per cycle), so
|
||||
* mult/2^shift = cyc/ns = 10^6/hpet_period
|
||||
* mult = (10^6 * 2^shift)/hpet_period
|
||||
* mult = (FSEC_PER_NSEC << hpet_clockevent.shift)/hpet_period
|
||||
*/
|
||||
hpet_freq = 1000000000000000ULL;
|
||||
do_div(hpet_freq, hpet_period);
|
||||
hpet_clockevent.mult = div_sc((unsigned long) hpet_freq,
|
||||
NSEC_PER_SEC, hpet_clockevent.shift);
|
||||
hpet_clockevent.mult = div_sc((unsigned long) FSEC_PER_NSEC,
|
||||
hpet_period, hpet_clockevent.shift);
|
||||
/* Calculate the min / max delta */
|
||||
hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
|
||||
&hpet_clockevent);
|
||||
|
@ -324,7 +311,7 @@ static struct clocksource clocksource_hpet = {
|
|||
|
||||
static int hpet_clocksource_register(void)
|
||||
{
|
||||
u64 tmp, start, now;
|
||||
u64 start, now;
|
||||
cycle_t t1;
|
||||
|
||||
/* Start the counter */
|
||||
|
@ -351,21 +338,15 @@ static int hpet_clocksource_register(void)
|
|||
return -ENODEV;
|
||||
}
|
||||
|
||||
/* Initialize and register HPET clocksource
|
||||
*
|
||||
* hpet period is in femto seconds per cycle
|
||||
* so we need to convert this to ns/cyc units
|
||||
* approximated by mult/2^shift
|
||||
*
|
||||
* fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift
|
||||
* fsec/cyc * 1ns/1000000fsec * 2^shift = mult
|
||||
* fsec/cyc * 2^shift * 1nsec/1000000fsec = mult
|
||||
* (fsec/cyc << shift)/1000000 = mult
|
||||
* (hpet_period << shift)/FSEC_PER_NSEC = mult
|
||||
/*
|
||||
* The definition of mult is (include/linux/clocksource.h)
|
||||
* mult/2^shift = ns/cyc and hpet_period is in units of fsec/cyc
|
||||
* so we first need to convert hpet_period to ns/cyc units:
|
||||
* mult/2^shift = ns/cyc = hpet_period/10^6
|
||||
* mult = (hpet_period * 2^shift)/10^6
|
||||
* mult = (hpet_period << shift)/FSEC_PER_NSEC
|
||||
*/
|
||||
tmp = (u64)hpet_period << HPET_SHIFT;
|
||||
do_div(tmp, FSEC_PER_NSEC);
|
||||
clocksource_hpet.mult = (u32)tmp;
|
||||
clocksource_hpet.mult = div_sc(hpet_period, FSEC_PER_NSEC, HPET_SHIFT);
|
||||
|
||||
clocksource_register(&clocksource_hpet);
|
||||
|
||||
|
|
|
@ -162,7 +162,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
|
|||
int ret;
|
||||
|
||||
if (!cpu_has_fxsr)
|
||||
return -EIO;
|
||||
return -ENODEV;
|
||||
|
||||
ret = init_fpu(target);
|
||||
if (ret)
|
||||
|
@ -179,7 +179,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
|
|||
int ret;
|
||||
|
||||
if (!cpu_has_fxsr)
|
||||
return -EIO;
|
||||
return -ENODEV;
|
||||
|
||||
ret = init_fpu(target);
|
||||
if (ret)
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
#include <linux/linkage.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/signal.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/ioport.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/timex.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/init.h>
|
||||
|
@ -10,10 +12,12 @@
|
|||
#include <linux/sysdev.h>
|
||||
#include <linux/bitops.h>
|
||||
|
||||
#include <asm/acpi.h>
|
||||
#include <asm/atomic.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/timer.h>
|
||||
#include <asm/hw_irq.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/delay.h>
|
||||
#include <asm/desc.h>
|
||||
|
@ -32,7 +36,7 @@ static int i8259A_auto_eoi;
|
|||
DEFINE_SPINLOCK(i8259A_lock);
|
||||
static void mask_and_ack_8259A(unsigned int);
|
||||
|
||||
static struct irq_chip i8259A_chip = {
|
||||
struct irq_chip i8259A_chip = {
|
||||
.name = "XT-PIC",
|
||||
.mask = disable_8259A_irq,
|
||||
.disable = disable_8259A_irq,
|
||||
|
@ -125,14 +129,14 @@ static inline int i8259A_irq_real(unsigned int irq)
|
|||
int irqmask = 1<<irq;
|
||||
|
||||
if (irq < 8) {
|
||||
outb(0x0B,PIC_MASTER_CMD); /* ISR register */
|
||||
outb(0x0B, PIC_MASTER_CMD); /* ISR register */
|
||||
value = inb(PIC_MASTER_CMD) & irqmask;
|
||||
outb(0x0A,PIC_MASTER_CMD); /* back to the IRR register */
|
||||
outb(0x0A, PIC_MASTER_CMD); /* back to the IRR register */
|
||||
return value;
|
||||
}
|
||||
outb(0x0B,PIC_SLAVE_CMD); /* ISR register */
|
||||
outb(0x0B, PIC_SLAVE_CMD); /* ISR register */
|
||||
value = inb(PIC_SLAVE_CMD) & (irqmask >> 8);
|
||||
outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */
|
||||
outb(0x0A, PIC_SLAVE_CMD); /* back to the IRR register */
|
||||
return value;
|
||||
}
|
||||
|
||||
|
@ -171,12 +175,14 @@ handle_real_irq:
|
|||
if (irq & 8) {
|
||||
inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */
|
||||
outb(cached_slave_mask, PIC_SLAVE_IMR);
|
||||
outb(0x60+(irq&7),PIC_SLAVE_CMD);/* 'Specific EOI' to slave */
|
||||
outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); /* 'Specific EOI' to master-IRQ2 */
|
||||
/* 'Specific EOI' to slave */
|
||||
outb(0x60+(irq&7), PIC_SLAVE_CMD);
|
||||
/* 'Specific EOI' to master-IRQ2 */
|
||||
outb(0x60+PIC_CASCADE_IR, PIC_MASTER_CMD);
|
||||
} else {
|
||||
inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */
|
||||
outb(cached_master_mask, PIC_MASTER_IMR);
|
||||
outb(0x60+irq,PIC_MASTER_CMD); /* 'Specific EOI to master */
|
||||
outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */
|
||||
}
|
||||
spin_unlock_irqrestore(&i8259A_lock, flags);
|
||||
return;
|
||||
|
@ -199,7 +205,8 @@ spurious_8259A_irq:
|
|||
* lets ACK and report it. [once per IRQ]
|
||||
*/
|
||||
if (!(spurious_irq_mask & irqmask)) {
|
||||
printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq);
|
||||
printk(KERN_DEBUG
|
||||
"spurious 8259A interrupt: IRQ%d.\n", irq);
|
||||
spurious_irq_mask |= irqmask;
|
||||
}
|
||||
atomic_inc(&irq_err_count);
|
||||
|
@ -290,17 +297,28 @@ void init_8259A(int auto_eoi)
|
|||
* outb_pic - this has to work on a wide range of PC hardware.
|
||||
*/
|
||||
outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */
|
||||
outb_pic(0x20 + 0, PIC_MASTER_IMR); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */
|
||||
outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */
|
||||
|
||||
/* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 on x86-64,
|
||||
to 0x20-0x27 on i386 */
|
||||
outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR);
|
||||
|
||||
/* 8259A-1 (the master) has a slave on IR2 */
|
||||
outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR);
|
||||
|
||||
if (auto_eoi) /* master does Auto EOI */
|
||||
outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
|
||||
else /* master expects normal EOI */
|
||||
outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
|
||||
|
||||
outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */
|
||||
outb_pic(0x20 + 8, PIC_SLAVE_IMR); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */
|
||||
outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */
|
||||
outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */
|
||||
|
||||
/* ICW2: 8259A-2 IR0-7 mapped to IRQ8_VECTOR */
|
||||
outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR);
|
||||
/* 8259A-2 is a slave on master's IR2 */
|
||||
outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR);
|
||||
/* (slave's support for AEOI in flat mode is to be investigated) */
|
||||
outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR);
|
||||
|
||||
if (auto_eoi)
|
||||
/*
|
||||
* In AEOI mode we just have to mask the interrupt
|
||||
|
@ -317,93 +335,3 @@ void init_8259A(int auto_eoi)
|
|||
|
||||
spin_unlock_irqrestore(&i8259A_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Note that on a 486, we don't want to do a SIGFPE on an irq13
|
||||
* as the irq is unreliable, and exception 16 works correctly
|
||||
* (ie as explained in the intel literature). On a 386, you
|
||||
* can't use exception 16 due to bad IBM design, so we have to
|
||||
* rely on the less exact irq13.
|
||||
*
|
||||
* Careful.. Not only is IRQ13 unreliable, but it is also
|
||||
* leads to races. IBM designers who came up with it should
|
||||
* be shot.
|
||||
*/
|
||||
|
||||
|
||||
static irqreturn_t math_error_irq(int cpl, void *dev_id)
|
||||
{
|
||||
extern void math_error(void __user *);
|
||||
outb(0,0xF0);
|
||||
if (ignore_fpu_irq || !boot_cpu_data.hard_math)
|
||||
return IRQ_NONE;
|
||||
math_error((void __user *)get_irq_regs()->ip);
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
/*
|
||||
* New motherboards sometimes make IRQ 13 be a PCI interrupt,
|
||||
* so allow interrupt sharing.
|
||||
*/
|
||||
static struct irqaction fpu_irq = {
|
||||
.handler = math_error_irq,
|
||||
.mask = CPU_MASK_NONE,
|
||||
.name = "fpu",
|
||||
};
|
||||
|
||||
void __init init_ISA_irqs (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
init_bsp_APIC();
|
||||
#endif
|
||||
init_8259A(0);
|
||||
|
||||
/*
|
||||
* 16 old-style INTA-cycle interrupts:
|
||||
*/
|
||||
for (i = 0; i < 16; i++) {
|
||||
set_irq_chip_and_handler_name(i, &i8259A_chip,
|
||||
handle_level_irq, "XT");
|
||||
}
|
||||
}
|
||||
|
||||
/* Overridden in paravirt.c */
|
||||
void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
|
||||
|
||||
void __init native_init_IRQ(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* all the set up before the call gates are initialised */
|
||||
pre_intr_init_hook();
|
||||
|
||||
/*
|
||||
* Cover the whole vector space, no vector can escape
|
||||
* us. (some of these will be overridden and become
|
||||
* 'special' SMP interrupts)
|
||||
*/
|
||||
for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
|
||||
int vector = FIRST_EXTERNAL_VECTOR + i;
|
||||
if (i >= NR_IRQS)
|
||||
break;
|
||||
/* SYSCALL_VECTOR was reserved in trap_init. */
|
||||
if (!test_bit(vector, used_vectors))
|
||||
set_intr_gate(vector, interrupt[i]);
|
||||
}
|
||||
|
||||
/* setup after call gates are initialised (usually add in
|
||||
* the architecture specific gates)
|
||||
*/
|
||||
intr_init_hook();
|
||||
|
||||
/*
|
||||
* External FPU? Set up irq13 if so, for
|
||||
* original braindamaged IBM FERR coupling.
|
||||
*/
|
||||
if (boot_cpu_data.hard_math && !cpu_has_fpu)
|
||||
setup_irq(FPU_IRQ, &fpu_irq);
|
||||
|
||||
irq_ctx_init(smp_processor_id());
|
||||
}
|
|
@ -1,512 +0,0 @@
|
|||
#include <linux/linkage.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/signal.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/ioport.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/timex.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel_stat.h>
|
||||
#include <linux/sysdev.h>
|
||||
#include <linux/bitops.h>
|
||||
|
||||
#include <asm/acpi.h>
|
||||
#include <asm/atomic.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/hw_irq.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/delay.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/i8259.h>
|
||||
|
||||
/*
|
||||
* Common place to define all x86 IRQ vectors
|
||||
*
|
||||
* This builds up the IRQ handler stubs using some ugly macros in irq.h
|
||||
*
|
||||
* These macros create the low-level assembly IRQ routines that save
|
||||
* register context and call do_IRQ(). do_IRQ() then does all the
|
||||
* operations that are needed to keep the AT (or SMP IOAPIC)
|
||||
* interrupt-controller happy.
|
||||
*/
|
||||
|
||||
#define BI(x,y) \
|
||||
BUILD_IRQ(x##y)
|
||||
|
||||
#define BUILD_16_IRQS(x) \
|
||||
BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
|
||||
BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
|
||||
BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
|
||||
BI(x,c) BI(x,d) BI(x,e) BI(x,f)
|
||||
|
||||
/*
|
||||
* ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
|
||||
* (these are usually mapped to vectors 0x30-0x3f)
|
||||
*/
|
||||
|
||||
/*
|
||||
* The IO-APIC gives us many more interrupt sources. Most of these
|
||||
* are unused but an SMP system is supposed to have enough memory ...
|
||||
* sometimes (mostly wrt. hw bugs) we get corrupted vectors all
|
||||
* across the spectrum, so we really want to be prepared to get all
|
||||
* of these. Plus, more powerful systems might have more than 64
|
||||
* IO-APIC registers.
|
||||
*
|
||||
* (these are usually mapped into the 0x30-0xff vector range)
|
||||
*/
|
||||
BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
|
||||
BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
|
||||
BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
|
||||
BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf)
|
||||
|
||||
#undef BUILD_16_IRQS
|
||||
#undef BI
|
||||
|
||||
|
||||
#define IRQ(x,y) \
|
||||
IRQ##x##y##_interrupt
|
||||
|
||||
#define IRQLIST_16(x) \
|
||||
IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
|
||||
IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
|
||||
IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
|
||||
IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
|
||||
|
||||
/* for the irq vectors */
|
||||
static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
|
||||
IRQLIST_16(0x2), IRQLIST_16(0x3),
|
||||
IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
|
||||
IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
|
||||
IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf)
|
||||
};
|
||||
|
||||
#undef IRQ
|
||||
#undef IRQLIST_16
|
||||
|
||||
/*
|
||||
* This is the 'legacy' 8259A Programmable Interrupt Controller,
|
||||
* present in the majority of PC/AT boxes.
|
||||
* plus some generic x86 specific things if generic specifics makes
|
||||
* any sense at all.
|
||||
* this file should become arch/i386/kernel/irq.c when the old irq.c
|
||||
* moves to arch independent land
|
||||
*/
|
||||
|
||||
static int i8259A_auto_eoi;
|
||||
DEFINE_SPINLOCK(i8259A_lock);
|
||||
static void mask_and_ack_8259A(unsigned int);
|
||||
|
||||
static struct irq_chip i8259A_chip = {
|
||||
.name = "XT-PIC",
|
||||
.mask = disable_8259A_irq,
|
||||
.disable = disable_8259A_irq,
|
||||
.unmask = enable_8259A_irq,
|
||||
.mask_ack = mask_and_ack_8259A,
|
||||
};
|
||||
|
||||
/*
|
||||
* 8259A PIC functions to handle ISA devices:
|
||||
*/
|
||||
|
||||
/*
|
||||
* This contains the irq mask for both 8259A irq controllers,
|
||||
*/
|
||||
unsigned int cached_irq_mask = 0xffff;
|
||||
|
||||
/*
|
||||
* Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
|
||||
* boards the timer interrupt is not really connected to any IO-APIC pin,
|
||||
* it's fed to the master 8259A's IR0 line only.
|
||||
*
|
||||
* Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
|
||||
* this 'mixed mode' IRQ handling costs nothing because it's only used
|
||||
* at IRQ setup time.
|
||||
*/
|
||||
unsigned long io_apic_irqs;
|
||||
|
||||
void disable_8259A_irq(unsigned int irq)
|
||||
{
|
||||
unsigned int mask = 1 << irq;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&i8259A_lock, flags);
|
||||
cached_irq_mask |= mask;
|
||||
if (irq & 8)
|
||||
outb(cached_slave_mask, PIC_SLAVE_IMR);
|
||||
else
|
||||
outb(cached_master_mask, PIC_MASTER_IMR);
|
||||
spin_unlock_irqrestore(&i8259A_lock, flags);
|
||||
}
|
||||
|
||||
void enable_8259A_irq(unsigned int irq)
|
||||
{
|
||||
unsigned int mask = ~(1 << irq);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&i8259A_lock, flags);
|
||||
cached_irq_mask &= mask;
|
||||
if (irq & 8)
|
||||
outb(cached_slave_mask, PIC_SLAVE_IMR);
|
||||
else
|
||||
outb(cached_master_mask, PIC_MASTER_IMR);
|
||||
spin_unlock_irqrestore(&i8259A_lock, flags);
|
||||
}
|
||||
|
||||
int i8259A_irq_pending(unsigned int irq)
|
||||
{
|
||||
unsigned int mask = 1<<irq;
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
spin_lock_irqsave(&i8259A_lock, flags);
|
||||
if (irq < 8)
|
||||
ret = inb(PIC_MASTER_CMD) & mask;
|
||||
else
|
||||
ret = inb(PIC_SLAVE_CMD) & (mask >> 8);
|
||||
spin_unlock_irqrestore(&i8259A_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void make_8259A_irq(unsigned int irq)
|
||||
{
|
||||
disable_irq_nosync(irq);
|
||||
io_apic_irqs &= ~(1<<irq);
|
||||
set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq,
|
||||
"XT");
|
||||
enable_irq(irq);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function assumes to be called rarely. Switching between
|
||||
* 8259A registers is slow.
|
||||
* This has to be protected by the irq controller spinlock
|
||||
* before being called.
|
||||
*/
|
||||
static inline int i8259A_irq_real(unsigned int irq)
|
||||
{
|
||||
int value;
|
||||
int irqmask = 1<<irq;
|
||||
|
||||
if (irq < 8) {
|
||||
outb(0x0B,PIC_MASTER_CMD); /* ISR register */
|
||||
value = inb(PIC_MASTER_CMD) & irqmask;
|
||||
outb(0x0A,PIC_MASTER_CMD); /* back to the IRR register */
|
||||
return value;
|
||||
}
|
||||
outb(0x0B,PIC_SLAVE_CMD); /* ISR register */
|
||||
value = inb(PIC_SLAVE_CMD) & (irqmask >> 8);
|
||||
outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */
|
||||
return value;
|
||||
}
|
||||
|
||||
/*
|
||||
* Careful! The 8259A is a fragile beast, it pretty
|
||||
* much _has_ to be done exactly like this (mask it
|
||||
* first, _then_ send the EOI, and the order of EOI
|
||||
* to the two 8259s is important!
|
||||
*/
|
||||
static void mask_and_ack_8259A(unsigned int irq)
|
||||
{
|
||||
unsigned int irqmask = 1 << irq;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&i8259A_lock, flags);
|
||||
/*
|
||||
* Lightweight spurious IRQ detection. We do not want
|
||||
* to overdo spurious IRQ handling - it's usually a sign
|
||||
* of hardware problems, so we only do the checks we can
|
||||
* do without slowing down good hardware unnecessarily.
|
||||
*
|
||||
* Note that IRQ7 and IRQ15 (the two spurious IRQs
|
||||
* usually resulting from the 8259A-1|2 PICs) occur
|
||||
* even if the IRQ is masked in the 8259A. Thus we
|
||||
* can check spurious 8259A IRQs without doing the
|
||||
* quite slow i8259A_irq_real() call for every IRQ.
|
||||
* This does not cover 100% of spurious interrupts,
|
||||
* but should be enough to warn the user that there
|
||||
* is something bad going on ...
|
||||
*/
|
||||
if (cached_irq_mask & irqmask)
|
||||
goto spurious_8259A_irq;
|
||||
cached_irq_mask |= irqmask;
|
||||
|
||||
handle_real_irq:
|
||||
if (irq & 8) {
|
||||
inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */
|
||||
outb(cached_slave_mask, PIC_SLAVE_IMR);
|
||||
/* 'Specific EOI' to slave */
|
||||
outb(0x60+(irq&7),PIC_SLAVE_CMD);
|
||||
/* 'Specific EOI' to master-IRQ2 */
|
||||
outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD);
|
||||
} else {
|
||||
inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */
|
||||
outb(cached_master_mask, PIC_MASTER_IMR);
|
||||
/* 'Specific EOI' to master */
|
||||
outb(0x60+irq,PIC_MASTER_CMD);
|
||||
}
|
||||
spin_unlock_irqrestore(&i8259A_lock, flags);
|
||||
return;
|
||||
|
||||
spurious_8259A_irq:
|
||||
/*
|
||||
* this is the slow path - should happen rarely.
|
||||
*/
|
||||
if (i8259A_irq_real(irq))
|
||||
/*
|
||||
* oops, the IRQ _is_ in service according to the
|
||||
* 8259A - not spurious, go handle it.
|
||||
*/
|
||||
goto handle_real_irq;
|
||||
|
||||
{
|
||||
static int spurious_irq_mask;
|
||||
/*
|
||||
* At this point we can be sure the IRQ is spurious,
|
||||
* lets ACK and report it. [once per IRQ]
|
||||
*/
|
||||
if (!(spurious_irq_mask & irqmask)) {
|
||||
printk(KERN_DEBUG
|
||||
"spurious 8259A interrupt: IRQ%d.\n", irq);
|
||||
spurious_irq_mask |= irqmask;
|
||||
}
|
||||
atomic_inc(&irq_err_count);
|
||||
/*
|
||||
* Theoretically we do not have to handle this IRQ,
|
||||
* but in Linux this does not cause problems and is
|
||||
* simpler for us.
|
||||
*/
|
||||
goto handle_real_irq;
|
||||
}
|
||||
}
|
||||
|
||||
static char irq_trigger[2];
|
||||
/**
|
||||
* ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ
|
||||
*/
|
||||
static void restore_ELCR(char *trigger)
|
||||
{
|
||||
outb(trigger[0], 0x4d0);
|
||||
outb(trigger[1], 0x4d1);
|
||||
}
|
||||
|
||||
static void save_ELCR(char *trigger)
|
||||
{
|
||||
/* IRQ 0,1,2,8,13 are marked as reserved */
|
||||
trigger[0] = inb(0x4d0) & 0xF8;
|
||||
trigger[1] = inb(0x4d1) & 0xDE;
|
||||
}
|
||||
|
||||
static int i8259A_resume(struct sys_device *dev)
|
||||
{
|
||||
init_8259A(i8259A_auto_eoi);
|
||||
restore_ELCR(irq_trigger);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int i8259A_suspend(struct sys_device *dev, pm_message_t state)
|
||||
{
|
||||
save_ELCR(irq_trigger);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int i8259A_shutdown(struct sys_device *dev)
|
||||
{
|
||||
/* Put the i8259A into a quiescent state that
|
||||
* the kernel initialization code can get it
|
||||
* out of.
|
||||
*/
|
||||
outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
|
||||
outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct sysdev_class i8259_sysdev_class = {
|
||||
.name = "i8259",
|
||||
.suspend = i8259A_suspend,
|
||||
.resume = i8259A_resume,
|
||||
.shutdown = i8259A_shutdown,
|
||||
};
|
||||
|
||||
static struct sys_device device_i8259A = {
|
||||
.id = 0,
|
||||
.cls = &i8259_sysdev_class,
|
||||
};
|
||||
|
||||
static int __init i8259A_init_sysfs(void)
|
||||
{
|
||||
int error = sysdev_class_register(&i8259_sysdev_class);
|
||||
if (!error)
|
||||
error = sysdev_register(&device_i8259A);
|
||||
return error;
|
||||
}
|
||||
|
||||
device_initcall(i8259A_init_sysfs);
|
||||
|
||||
void init_8259A(int auto_eoi)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
i8259A_auto_eoi = auto_eoi;
|
||||
|
||||
spin_lock_irqsave(&i8259A_lock, flags);
|
||||
|
||||
outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
|
||||
outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
|
||||
|
||||
/*
|
||||
* outb_pic - this has to work on a wide range of PC hardware.
|
||||
*/
|
||||
outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */
|
||||
/* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
|
||||
outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR);
|
||||
/* 8259A-1 (the master) has a slave on IR2 */
|
||||
outb_pic(0x04, PIC_MASTER_IMR);
|
||||
if (auto_eoi) /* master does Auto EOI */
|
||||
outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
|
||||
else /* master expects normal EOI */
|
||||
outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
|
||||
|
||||
outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */
|
||||
/* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */
|
||||
outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR);
|
||||
/* 8259A-2 is a slave on master's IR2 */
|
||||
outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR);
|
||||
/* (slave's support for AEOI in flat mode is to be investigated) */
|
||||
outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR);
|
||||
|
||||
if (auto_eoi)
|
||||
/*
|
||||
* In AEOI mode we just have to mask the interrupt
|
||||
* when acking.
|
||||
*/
|
||||
i8259A_chip.mask_ack = disable_8259A_irq;
|
||||
else
|
||||
i8259A_chip.mask_ack = mask_and_ack_8259A;
|
||||
|
||||
udelay(100); /* wait for 8259A to initialize */
|
||||
|
||||
outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
|
||||
outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
|
||||
|
||||
spin_unlock_irqrestore(&i8259A_lock, flags);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* IRQ2 is cascade interrupt to second interrupt controller
|
||||
*/
|
||||
|
||||
static struct irqaction irq2 = {
|
||||
.handler = no_action,
|
||||
.mask = CPU_MASK_NONE,
|
||||
.name = "cascade",
|
||||
};
|
||||
DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
|
||||
[0 ... IRQ0_VECTOR - 1] = -1,
|
||||
[IRQ0_VECTOR] = 0,
|
||||
[IRQ1_VECTOR] = 1,
|
||||
[IRQ2_VECTOR] = 2,
|
||||
[IRQ3_VECTOR] = 3,
|
||||
[IRQ4_VECTOR] = 4,
|
||||
[IRQ5_VECTOR] = 5,
|
||||
[IRQ6_VECTOR] = 6,
|
||||
[IRQ7_VECTOR] = 7,
|
||||
[IRQ8_VECTOR] = 8,
|
||||
[IRQ9_VECTOR] = 9,
|
||||
[IRQ10_VECTOR] = 10,
|
||||
[IRQ11_VECTOR] = 11,
|
||||
[IRQ12_VECTOR] = 12,
|
||||
[IRQ13_VECTOR] = 13,
|
||||
[IRQ14_VECTOR] = 14,
|
||||
[IRQ15_VECTOR] = 15,
|
||||
[IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
|
||||
};
|
||||
|
||||
void __init init_ISA_irqs (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
init_bsp_APIC();
|
||||
init_8259A(0);
|
||||
|
||||
for (i = 0; i < NR_IRQS; i++) {
|
||||
irq_desc[i].status = IRQ_DISABLED;
|
||||
irq_desc[i].action = NULL;
|
||||
irq_desc[i].depth = 1;
|
||||
|
||||
if (i < 16) {
|
||||
/*
|
||||
* 16 old-style INTA-cycle interrupts:
|
||||
*/
|
||||
set_irq_chip_and_handler_name(i, &i8259A_chip,
|
||||
handle_level_irq, "XT");
|
||||
} else {
|
||||
/*
|
||||
* 'high' PCI IRQs filled in on demand
|
||||
*/
|
||||
irq_desc[i].chip = &no_irq_chip;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
|
||||
|
||||
void __init native_init_IRQ(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
init_ISA_irqs();
|
||||
/*
|
||||
* Cover the whole vector space, no vector can escape
|
||||
* us. (some of these will be overridden and become
|
||||
* 'special' SMP interrupts)
|
||||
*/
|
||||
for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
|
||||
int vector = FIRST_EXTERNAL_VECTOR + i;
|
||||
if (vector != IA32_SYSCALL_VECTOR)
|
||||
set_intr_gate(vector, interrupt[i]);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* The reschedule interrupt is a CPU-to-CPU reschedule-helper
|
||||
* IPI, driven by wakeup.
|
||||
*/
|
||||
set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
|
||||
|
||||
/* IPIs for invalidation */
|
||||
set_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
|
||||
set_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
|
||||
set_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
|
||||
set_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
|
||||
set_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
|
||||
set_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
|
||||
set_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
|
||||
set_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
|
||||
|
||||
/* IPI for generic function call */
|
||||
set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
|
||||
|
||||
/* Low priority IPI to cleanup after moving an irq */
|
||||
set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
|
||||
#endif
|
||||
set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
|
||||
set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
|
||||
|
||||
/* self generated IPI for local APIC timer */
|
||||
set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
|
||||
|
||||
/* IPI vectors for APIC spurious and error interrupts */
|
||||
set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
|
||||
set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
|
||||
|
||||
if (!acpi_ioapic)
|
||||
setup_irq(2, &irq2);
|
||||
}
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -61,7 +61,7 @@ struct irq_cfg {
|
|||
};
|
||||
|
||||
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
|
||||
struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
|
||||
static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
|
||||
[0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, },
|
||||
[1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, },
|
||||
[2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, },
|
||||
|
@ -82,6 +82,10 @@ struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
|
|||
|
||||
static int assign_irq_vector(int irq, cpumask_t mask);
|
||||
|
||||
int first_system_vector = 0xfe;
|
||||
|
||||
char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
|
||||
|
||||
#define __apicdebuginit __init
|
||||
|
||||
int sis_apic_bug; /* not actually supported, dummy for compile */
|
||||
|
@ -90,7 +94,7 @@ static int no_timer_check;
|
|||
|
||||
static int disable_timer_pin_1 __initdata;
|
||||
|
||||
int timer_over_8254 __initdata = 1;
|
||||
int timer_through_8259 __initdata;
|
||||
|
||||
/* Where if anywhere is the i8259 connect in external int mode */
|
||||
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
|
||||
|
@ -104,15 +108,17 @@ DEFINE_SPINLOCK(vector_lock);
|
|||
int nr_ioapic_registers[MAX_IO_APICS];
|
||||
|
||||
/* I/O APIC entries */
|
||||
struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
|
||||
struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
|
||||
int nr_ioapics;
|
||||
|
||||
/* MP IRQ source entries */
|
||||
struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
|
||||
struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
|
||||
|
||||
/* # of MP IRQ source entries */
|
||||
int mp_irq_entries;
|
||||
|
||||
DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
|
||||
|
||||
/*
|
||||
* Rough estimation of how many shared IRQs there are, can
|
||||
* be changed anytime.
|
||||
|
@ -140,7 +146,7 @@ struct io_apic {
|
|||
static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
|
||||
{
|
||||
return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
|
||||
+ (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
|
||||
+ (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
|
||||
}
|
||||
|
||||
static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
|
||||
|
@ -183,7 +189,7 @@ static bool io_apic_level_ack_pending(unsigned int irq)
|
|||
break;
|
||||
reg = io_apic_read(entry->apic, 0x10 + pin*2);
|
||||
/* Is the remote IRR bit set? */
|
||||
if ((reg >> 14) & 1) {
|
||||
if (reg & IO_APIC_REDIR_REMOTE_IRR) {
|
||||
spin_unlock_irqrestore(&ioapic_lock, flags);
|
||||
return true;
|
||||
}
|
||||
|
@ -298,7 +304,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
|
|||
break;
|
||||
io_apic_write(apic, 0x11 + pin*2, dest);
|
||||
reg = io_apic_read(apic, 0x10 + pin*2);
|
||||
reg &= ~0x000000ff;
|
||||
reg &= ~IO_APIC_REDIR_VECTOR_MASK;
|
||||
reg |= vector;
|
||||
io_apic_modify(apic, reg);
|
||||
if (!entry->next)
|
||||
|
@ -360,16 +366,37 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
|
|||
entry->pin = pin;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reroute an IRQ to a different pin.
|
||||
*/
|
||||
static void __init replace_pin_at_irq(unsigned int irq,
|
||||
int oldapic, int oldpin,
|
||||
int newapic, int newpin)
|
||||
{
|
||||
struct irq_pin_list *entry = irq_2_pin + irq;
|
||||
|
||||
while (1) {
|
||||
if (entry->apic == oldapic && entry->pin == oldpin) {
|
||||
entry->apic = newapic;
|
||||
entry->pin = newpin;
|
||||
}
|
||||
if (!entry->next)
|
||||
break;
|
||||
entry = irq_2_pin + entry->next;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#define DO_ACTION(name,R,ACTION, FINAL) \
|
||||
\
|
||||
static void name##_IO_APIC_irq (unsigned int irq) \
|
||||
__DO_ACTION(R, ACTION, FINAL)
|
||||
|
||||
DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) )
|
||||
/* mask = 1 */
|
||||
DO_ACTION( __unmask, 0, &= 0xfffeffff, )
|
||||
/* mask = 0 */
|
||||
/* mask = 1 */
|
||||
DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic))
|
||||
|
||||
/* mask = 0 */
|
||||
DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, )
|
||||
|
||||
static void mask_IO_APIC_irq (unsigned int irq)
|
||||
{
|
||||
|
@ -430,20 +457,6 @@ static int __init disable_timer_pin_setup(char *arg)
|
|||
}
|
||||
__setup("disable_timer_pin_1", disable_timer_pin_setup);
|
||||
|
||||
static int __init setup_disable_8254_timer(char *s)
|
||||
{
|
||||
timer_over_8254 = -1;
|
||||
return 1;
|
||||
}
|
||||
static int __init setup_enable_8254_timer(char *s)
|
||||
{
|
||||
timer_over_8254 = 2;
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("disable_8254_timer", setup_disable_8254_timer);
|
||||
__setup("enable_8254_timer", setup_enable_8254_timer);
|
||||
|
||||
|
||||
/*
|
||||
* Find the IRQ entry number of a certain pin.
|
||||
|
@ -453,10 +466,10 @@ static int find_irq_entry(int apic, int pin, int type)
|
|||
int i;
|
||||
|
||||
for (i = 0; i < mp_irq_entries; i++)
|
||||
if (mp_irqs[i].mpc_irqtype == type &&
|
||||
(mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
|
||||
mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
|
||||
mp_irqs[i].mpc_dstirq == pin)
|
||||
if (mp_irqs[i].mp_irqtype == type &&
|
||||
(mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
|
||||
mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
|
||||
mp_irqs[i].mp_dstirq == pin)
|
||||
return i;
|
||||
|
||||
return -1;
|
||||
|
@ -470,13 +483,13 @@ static int __init find_isa_irq_pin(int irq, int type)
|
|||
int i;
|
||||
|
||||
for (i = 0; i < mp_irq_entries; i++) {
|
||||
int lbus = mp_irqs[i].mpc_srcbus;
|
||||
int lbus = mp_irqs[i].mp_srcbus;
|
||||
|
||||
if (test_bit(lbus, mp_bus_not_pci) &&
|
||||
(mp_irqs[i].mpc_irqtype == type) &&
|
||||
(mp_irqs[i].mpc_srcbusirq == irq))
|
||||
(mp_irqs[i].mp_irqtype == type) &&
|
||||
(mp_irqs[i].mp_srcbusirq == irq))
|
||||
|
||||
return mp_irqs[i].mpc_dstirq;
|
||||
return mp_irqs[i].mp_dstirq;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
@ -486,17 +499,17 @@ static int __init find_isa_irq_apic(int irq, int type)
|
|||
int i;
|
||||
|
||||
for (i = 0; i < mp_irq_entries; i++) {
|
||||
int lbus = mp_irqs[i].mpc_srcbus;
|
||||
int lbus = mp_irqs[i].mp_srcbus;
|
||||
|
||||
if (test_bit(lbus, mp_bus_not_pci) &&
|
||||
(mp_irqs[i].mpc_irqtype == type) &&
|
||||
(mp_irqs[i].mpc_srcbusirq == irq))
|
||||
(mp_irqs[i].mp_irqtype == type) &&
|
||||
(mp_irqs[i].mp_srcbusirq == irq))
|
||||
break;
|
||||
}
|
||||
if (i < mp_irq_entries) {
|
||||
int apic;
|
||||
for(apic = 0; apic < nr_ioapics; apic++) {
|
||||
if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
|
||||
if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
|
||||
return apic;
|
||||
}
|
||||
}
|
||||
|
@ -516,28 +529,28 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
|
|||
|
||||
apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
|
||||
bus, slot, pin);
|
||||
if (mp_bus_id_to_pci_bus[bus] == -1) {
|
||||
if (test_bit(bus, mp_bus_not_pci)) {
|
||||
apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
|
||||
return -1;
|
||||
}
|
||||
for (i = 0; i < mp_irq_entries; i++) {
|
||||
int lbus = mp_irqs[i].mpc_srcbus;
|
||||
int lbus = mp_irqs[i].mp_srcbus;
|
||||
|
||||
for (apic = 0; apic < nr_ioapics; apic++)
|
||||
if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
|
||||
mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
|
||||
if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
|
||||
mp_irqs[i].mp_dstapic == MP_APIC_ALL)
|
||||
break;
|
||||
|
||||
if (!test_bit(lbus, mp_bus_not_pci) &&
|
||||
!mp_irqs[i].mpc_irqtype &&
|
||||
!mp_irqs[i].mp_irqtype &&
|
||||
(bus == lbus) &&
|
||||
(slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
|
||||
int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
|
||||
(slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
|
||||
int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
|
||||
|
||||
if (!(apic || IO_APIC_IRQ(irq)))
|
||||
continue;
|
||||
|
||||
if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
|
||||
if (pin == (mp_irqs[i].mp_srcbusirq & 3))
|
||||
return irq;
|
||||
/*
|
||||
* Use the first all-but-pin matching entry as a
|
||||
|
@ -565,13 +578,13 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
|
|||
|
||||
static int MPBIOS_polarity(int idx)
|
||||
{
|
||||
int bus = mp_irqs[idx].mpc_srcbus;
|
||||
int bus = mp_irqs[idx].mp_srcbus;
|
||||
int polarity;
|
||||
|
||||
/*
|
||||
* Determine IRQ line polarity (high active or low active):
|
||||
*/
|
||||
switch (mp_irqs[idx].mpc_irqflag & 3)
|
||||
switch (mp_irqs[idx].mp_irqflag & 3)
|
||||
{
|
||||
case 0: /* conforms, ie. bus-type dependent polarity */
|
||||
if (test_bit(bus, mp_bus_not_pci))
|
||||
|
@ -607,13 +620,13 @@ static int MPBIOS_polarity(int idx)
|
|||
|
||||
static int MPBIOS_trigger(int idx)
|
||||
{
|
||||
int bus = mp_irqs[idx].mpc_srcbus;
|
||||
int bus = mp_irqs[idx].mp_srcbus;
|
||||
int trigger;
|
||||
|
||||
/*
|
||||
* Determine IRQ trigger mode (edge or level sensitive):
|
||||
*/
|
||||
switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
|
||||
switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
|
||||
{
|
||||
case 0: /* conforms, ie. bus-type dependent */
|
||||
if (test_bit(bus, mp_bus_not_pci))
|
||||
|
@ -660,16 +673,16 @@ static inline int irq_trigger(int idx)
|
|||
static int pin_2_irq(int idx, int apic, int pin)
|
||||
{
|
||||
int irq, i;
|
||||
int bus = mp_irqs[idx].mpc_srcbus;
|
||||
int bus = mp_irqs[idx].mp_srcbus;
|
||||
|
||||
/*
|
||||
* Debugging check, we are in big trouble if this message pops up!
|
||||
*/
|
||||
if (mp_irqs[idx].mpc_dstirq != pin)
|
||||
if (mp_irqs[idx].mp_dstirq != pin)
|
||||
printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
|
||||
|
||||
if (test_bit(bus, mp_bus_not_pci)) {
|
||||
irq = mp_irqs[idx].mpc_srcbusirq;
|
||||
irq = mp_irqs[idx].mp_srcbusirq;
|
||||
} else {
|
||||
/*
|
||||
* PCI IRQs are mapped in order
|
||||
|
@ -730,7 +743,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
|
|||
offset = current_offset;
|
||||
next:
|
||||
vector += 8;
|
||||
if (vector >= FIRST_SYSTEM_VECTOR) {
|
||||
if (vector >= first_system_vector) {
|
||||
/* If we run out of vectors on large boxen, must share them. */
|
||||
offset = (offset + 1) % 8;
|
||||
vector = FIRST_DEVICE_VECTOR + offset;
|
||||
|
@ -788,7 +801,7 @@ static void __clear_irq_vector(int irq)
|
|||
cpus_clear(cfg->domain);
|
||||
}
|
||||
|
||||
void __setup_vector_irq(int cpu)
|
||||
static void __setup_vector_irq(int cpu)
|
||||
{
|
||||
/* Initialize vector_irq on a new cpu */
|
||||
/* This function must be called with vector_lock held */
|
||||
|
@ -811,6 +824,13 @@ void __setup_vector_irq(int cpu)
|
|||
}
|
||||
}
|
||||
|
||||
void setup_vector_irq(int cpu)
|
||||
{
|
||||
spin_lock(&vector_lock);
|
||||
__setup_vector_irq(smp_processor_id());
|
||||
spin_unlock(&vector_lock);
|
||||
}
|
||||
|
||||
|
||||
static struct irq_chip ioapic_chip;
|
||||
|
||||
|
@ -846,7 +866,7 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
|
|||
apic_printk(APIC_VERBOSE,KERN_DEBUG
|
||||
"IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
|
||||
"IRQ %d Mode:%i Active:%i)\n",
|
||||
apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector,
|
||||
apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
|
||||
irq, trigger, polarity);
|
||||
|
||||
/*
|
||||
|
@ -887,10 +907,10 @@ static void __init setup_IO_APIC_irqs(void)
|
|||
idx = find_irq_entry(apic,pin,mp_INT);
|
||||
if (idx == -1) {
|
||||
if (first_notcon) {
|
||||
apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin);
|
||||
apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
|
||||
first_notcon = 0;
|
||||
} else
|
||||
apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin);
|
||||
apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
|
||||
continue;
|
||||
}
|
||||
if (!first_notcon) {
|
||||
|
@ -911,26 +931,21 @@ static void __init setup_IO_APIC_irqs(void)
|
|||
}
|
||||
|
||||
/*
|
||||
* Set up the 8259A-master output pin as broadcast to all
|
||||
* CPUs.
|
||||
* Set up the timer pin, possibly with the 8259A-master behind.
|
||||
*/
|
||||
static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
|
||||
static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
|
||||
int vector)
|
||||
{
|
||||
struct IO_APIC_route_entry entry;
|
||||
|
||||
memset(&entry, 0, sizeof(entry));
|
||||
|
||||
disable_8259A_irq(0);
|
||||
|
||||
/* mask LVT0 */
|
||||
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
|
||||
|
||||
/*
|
||||
* We use logical delivery to get the timer IRQ
|
||||
* to the first CPU.
|
||||
*/
|
||||
entry.dest_mode = INT_DEST_MODE;
|
||||
entry.mask = 0; /* unmask IRQ now */
|
||||
entry.mask = 1; /* mask IRQ now */
|
||||
entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
|
||||
entry.delivery_mode = INT_DELIVERY_MODE;
|
||||
entry.polarity = 0;
|
||||
|
@ -939,7 +954,7 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
|
|||
|
||||
/*
|
||||
* The timer IRQ doesn't have to know that behind the
|
||||
* scene we have a 8259A-master in AEOI mode ...
|
||||
* scene we may have a 8259A-master in AEOI mode ...
|
||||
*/
|
||||
set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
|
||||
|
||||
|
@ -947,8 +962,6 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
|
|||
* Add it to the IO-APIC irq-routing table:
|
||||
*/
|
||||
ioapic_write_entry(apic, pin, entry);
|
||||
|
||||
enable_8259A_irq(0);
|
||||
}
|
||||
|
||||
void __apicdebuginit print_IO_APIC(void)
|
||||
|
@ -965,7 +978,7 @@ void __apicdebuginit print_IO_APIC(void)
|
|||
printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
|
||||
for (i = 0; i < nr_ioapics; i++)
|
||||
printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
|
||||
mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
|
||||
mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
|
||||
|
||||
/*
|
||||
* We are a bit conservative about what we expect. We have to
|
||||
|
@ -983,7 +996,7 @@ void __apicdebuginit print_IO_APIC(void)
|
|||
spin_unlock_irqrestore(&ioapic_lock, flags);
|
||||
|
||||
printk("\n");
|
||||
printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
|
||||
printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
|
||||
printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
|
||||
printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
|
||||
|
||||
|
@ -1077,6 +1090,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
|
|||
|
||||
printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
|
||||
smp_processor_id(), hard_smp_processor_id());
|
||||
v = apic_read(APIC_ID);
|
||||
printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id()));
|
||||
v = apic_read(APIC_LVR);
|
||||
printk(KERN_INFO "... APIC VERSION: %08x\n", v);
|
||||
|
@ -1540,7 +1554,7 @@ static inline void init_IO_APIC_traps(void)
|
|||
}
|
||||
}
|
||||
|
||||
static void enable_lapic_irq (unsigned int irq)
|
||||
static void unmask_lapic_irq(unsigned int irq)
|
||||
{
|
||||
unsigned long v;
|
||||
|
||||
|
@ -1548,7 +1562,7 @@ static void enable_lapic_irq (unsigned int irq)
|
|||
apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
|
||||
}
|
||||
|
||||
static void disable_lapic_irq (unsigned int irq)
|
||||
static void mask_lapic_irq(unsigned int irq)
|
||||
{
|
||||
unsigned long v;
|
||||
|
||||
|
@ -1561,19 +1575,20 @@ static void ack_lapic_irq (unsigned int irq)
|
|||
ack_APIC_irq();
|
||||
}
|
||||
|
||||
static void end_lapic_irq (unsigned int i) { /* nothing */ }
|
||||
|
||||
static struct hw_interrupt_type lapic_irq_type __read_mostly = {
|
||||
.name = "local-APIC",
|
||||
.typename = "local-APIC-edge",
|
||||
.startup = NULL, /* startup_irq() not used for IRQ0 */
|
||||
.shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
|
||||
.enable = enable_lapic_irq,
|
||||
.disable = disable_lapic_irq,
|
||||
.ack = ack_lapic_irq,
|
||||
.end = end_lapic_irq,
|
||||
static struct irq_chip lapic_chip __read_mostly = {
|
||||
.name = "local-APIC",
|
||||
.mask = mask_lapic_irq,
|
||||
.unmask = unmask_lapic_irq,
|
||||
.ack = ack_lapic_irq,
|
||||
};
|
||||
|
||||
static void lapic_register_intr(int irq)
|
||||
{
|
||||
irq_desc[irq].status &= ~IRQ_LEVEL;
|
||||
set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
|
||||
"edge");
|
||||
}
|
||||
|
||||
static void __init setup_nmi(void)
|
||||
{
|
||||
/*
|
||||
|
@ -1659,6 +1674,7 @@ static inline void __init check_timer(void)
|
|||
struct irq_cfg *cfg = irq_cfg + 0;
|
||||
int apic1, pin1, apic2, pin2;
|
||||
unsigned long flags;
|
||||
int no_pin1 = 0;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
|
@ -1669,16 +1685,11 @@ static inline void __init check_timer(void)
|
|||
assign_irq_vector(0, TARGET_CPUS);
|
||||
|
||||
/*
|
||||
* Subtle, code in do_timer_interrupt() expects an AEOI
|
||||
* mode for the 8259A whenever interrupts are routed
|
||||
* through I/O APICs. Also IRQ0 has to be enabled in
|
||||
* the 8259A which implies the virtual wire has to be
|
||||
* disabled in the local APIC.
|
||||
* As IRQ0 is to be enabled in the 8259A, the virtual
|
||||
* wire has to be disabled in the local APIC.
|
||||
*/
|
||||
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
|
||||
init_8259A(1);
|
||||
if (timer_over_8254 > 0)
|
||||
enable_8259A_irq(0);
|
||||
|
||||
pin1 = find_isa_irq_pin(0, mp_INT);
|
||||
apic1 = find_isa_irq_apic(0, mp_INT);
|
||||
|
@ -1688,15 +1699,33 @@ static inline void __init check_timer(void)
|
|||
apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
|
||||
cfg->vector, apic1, pin1, apic2, pin2);
|
||||
|
||||
/*
|
||||
* Some BIOS writers are clueless and report the ExtINTA
|
||||
* I/O APIC input from the cascaded 8259A as the timer
|
||||
* interrupt input. So just in case, if only one pin
|
||||
* was found above, try it both directly and through the
|
||||
* 8259A.
|
||||
*/
|
||||
if (pin1 == -1) {
|
||||
pin1 = pin2;
|
||||
apic1 = apic2;
|
||||
no_pin1 = 1;
|
||||
} else if (pin2 == -1) {
|
||||
pin2 = pin1;
|
||||
apic2 = apic1;
|
||||
}
|
||||
|
||||
if (pin1 != -1) {
|
||||
/*
|
||||
* Ok, does IRQ0 through the IOAPIC work?
|
||||
*/
|
||||
if (no_pin1) {
|
||||
add_pin_to_irq(0, apic1, pin1);
|
||||
setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
|
||||
}
|
||||
unmask_IO_APIC_irq(0);
|
||||
if (!no_timer_check && timer_irq_works()) {
|
||||
nmi_watchdog_default();
|
||||
if (nmi_watchdog == NMI_IO_APIC) {
|
||||
disable_8259A_irq(0);
|
||||
setup_nmi();
|
||||
enable_8259A_irq(0);
|
||||
}
|
||||
|
@ -1705,43 +1734,48 @@ static inline void __init check_timer(void)
|
|||
goto out;
|
||||
}
|
||||
clear_IO_APIC_pin(apic1, pin1);
|
||||
apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not "
|
||||
"connected to IO-APIC\n");
|
||||
}
|
||||
if (!no_pin1)
|
||||
apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: "
|
||||
"8254 timer not connected to IO-APIC\n");
|
||||
|
||||
apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) "
|
||||
"through the 8259A ... ");
|
||||
if (pin2 != -1) {
|
||||
apic_printk(APIC_VERBOSE,KERN_INFO
|
||||
"...trying to set up timer (IRQ0) "
|
||||
"through the 8259A ... ");
|
||||
apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...",
|
||||
apic2, pin2);
|
||||
/*
|
||||
* legacy devices should be connected to IO APIC #0
|
||||
*/
|
||||
setup_ExtINT_IRQ0_pin(apic2, pin2, cfg->vector);
|
||||
replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
|
||||
setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
|
||||
unmask_IO_APIC_irq(0);
|
||||
enable_8259A_irq(0);
|
||||
if (timer_irq_works()) {
|
||||
apic_printk(APIC_VERBOSE," works.\n");
|
||||
nmi_watchdog_default();
|
||||
timer_through_8259 = 1;
|
||||
if (nmi_watchdog == NMI_IO_APIC) {
|
||||
disable_8259A_irq(0);
|
||||
setup_nmi();
|
||||
enable_8259A_irq(0);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
/*
|
||||
* Cleanup, just in case ...
|
||||
*/
|
||||
disable_8259A_irq(0);
|
||||
clear_IO_APIC_pin(apic2, pin2);
|
||||
apic_printk(APIC_VERBOSE," failed.\n");
|
||||
}
|
||||
apic_printk(APIC_VERBOSE," failed.\n");
|
||||
|
||||
if (nmi_watchdog == NMI_IO_APIC) {
|
||||
printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
|
||||
nmi_watchdog = 0;
|
||||
nmi_watchdog = NMI_NONE;
|
||||
}
|
||||
|
||||
apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
|
||||
|
||||
disable_8259A_irq(0);
|
||||
irq_desc[0].chip = &lapic_irq_type;
|
||||
lapic_register_intr(0);
|
||||
apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
|
||||
enable_8259A_irq(0);
|
||||
|
||||
|
@ -1749,6 +1783,7 @@ static inline void __init check_timer(void)
|
|||
apic_printk(APIC_VERBOSE," works.\n");
|
||||
goto out;
|
||||
}
|
||||
disable_8259A_irq(0);
|
||||
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
|
||||
apic_printk(APIC_VERBOSE," failed.\n");
|
||||
|
||||
|
@ -1778,11 +1813,21 @@ static int __init notimercheck(char *s)
|
|||
__setup("no_timer_check", notimercheck);
|
||||
|
||||
/*
|
||||
*
|
||||
* IRQs that are handled by the PIC in the MPS IOAPIC case.
|
||||
* - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
|
||||
* Linux doesn't really care, as it's not actually used
|
||||
* for any interrupt handling anyway.
|
||||
* Traditionally ISA IRQ2 is the cascade IRQ, and is not available
|
||||
* to devices. However there may be an I/O APIC pin available for
|
||||
* this interrupt regardless. The pin may be left unconnected, but
|
||||
* typically it will be reused as an ExtINT cascade interrupt for
|
||||
* the master 8259A. In the MPS case such a pin will normally be
|
||||
* reported as an ExtINT interrupt in the MP table. With ACPI
|
||||
* there is no provision for ExtINT interrupts, and in the absence
|
||||
* of an override it would be treated as an ordinary ISA I/O APIC
|
||||
* interrupt, that is edge-triggered and unmasked by default. We
|
||||
* used to do this, but it caused problems on some systems because
|
||||
* of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
|
||||
* the same ExtINT cascade interrupt to drive the local APIC of the
|
||||
* bootstrap processor. Therefore we refrain from routing IRQ2 to
|
||||
* the I/O APIC in all cases now. No actual device should request
|
||||
* it anyway. --macro
|
||||
*/
|
||||
#define PIC_IRQS (1<<2)
|
||||
|
||||
|
@ -1793,10 +1838,7 @@ void __init setup_IO_APIC(void)
|
|||
* calling enable_IO_APIC() is moved to setup_local_APIC for BP
|
||||
*/
|
||||
|
||||
if (acpi_ioapic)
|
||||
io_apic_irqs = ~0; /* all IRQs go through IOAPIC */
|
||||
else
|
||||
io_apic_irqs = ~PIC_IRQS;
|
||||
io_apic_irqs = ~PIC_IRQS;
|
||||
|
||||
apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
|
||||
|
||||
|
@ -1841,8 +1883,8 @@ static int ioapic_resume(struct sys_device *dev)
|
|||
|
||||
spin_lock_irqsave(&ioapic_lock, flags);
|
||||
reg_00.raw = io_apic_read(dev->id, 0);
|
||||
if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
|
||||
reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
|
||||
if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
|
||||
reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
|
||||
io_apic_write(dev->id, 0, reg_00.raw);
|
||||
}
|
||||
spin_unlock_irqrestore(&ioapic_lock, flags);
|
||||
|
@ -2242,8 +2284,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
|
|||
return -1;
|
||||
|
||||
for (i = 0; i < mp_irq_entries; i++)
|
||||
if (mp_irqs[i].mpc_irqtype == mp_INT &&
|
||||
mp_irqs[i].mpc_srcbusirq == bus_irq)
|
||||
if (mp_irqs[i].mp_irqtype == mp_INT &&
|
||||
mp_irqs[i].mp_srcbusirq == bus_irq)
|
||||
break;
|
||||
if (i >= mp_irq_entries)
|
||||
return -1;
|
||||
|
@ -2336,7 +2378,7 @@ void __init ioapic_init_mappings(void)
|
|||
ioapic_res = ioapic_setup_resources();
|
||||
for (i = 0; i < nr_ioapics; i++) {
|
||||
if (smp_found_config) {
|
||||
ioapic_phys = mp_ioapics[i].mpc_apicaddr;
|
||||
ioapic_phys = mp_ioapics[i].mp_apicaddr;
|
||||
} else {
|
||||
ioapic_phys = (unsigned long)
|
||||
alloc_bootmem_pages(PAGE_SIZE);
|
||||
|
|
|
@ -8,7 +8,6 @@
|
|||
#include <linux/kernel_stat.h>
|
||||
#include <linux/mc146818rtc.h>
|
||||
#include <linux/cache.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
|
|
|
@ -48,6 +48,29 @@ void ack_bad_irq(unsigned int irq)
|
|||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_STACKOVERFLOW
|
||||
/* Debugging check for stack overflow: is there less than 1KB free? */
|
||||
static int check_stack_overflow(void)
|
||||
{
|
||||
long sp;
|
||||
|
||||
__asm__ __volatile__("andl %%esp,%0" :
|
||||
"=r" (sp) : "0" (THREAD_SIZE - 1));
|
||||
|
||||
return sp < (sizeof(struct thread_info) + STACK_WARN);
|
||||
}
|
||||
|
||||
static void print_stack_overflow(void)
|
||||
{
|
||||
printk(KERN_WARNING "low stack detected by irq handler\n");
|
||||
dump_stack();
|
||||
}
|
||||
|
||||
#else
|
||||
static inline int check_stack_overflow(void) { return 0; }
|
||||
static inline void print_stack_overflow(void) { }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_4KSTACKS
|
||||
/*
|
||||
* per-CPU IRQ handling contexts (thread information and stack)
|
||||
|
@ -59,48 +82,29 @@ union irq_ctx {
|
|||
|
||||
static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
|
||||
static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* do_IRQ handles all normal device IRQ's (the special
|
||||
* SMP cross-CPU interrupts have their own specific
|
||||
* handlers).
|
||||
*/
|
||||
unsigned int do_IRQ(struct pt_regs *regs)
|
||||
{
|
||||
struct pt_regs *old_regs;
|
||||
/* high bit used in ret_from_ code */
|
||||
int irq = ~regs->orig_ax;
|
||||
struct irq_desc *desc = irq_desc + irq;
|
||||
#ifdef CONFIG_4KSTACKS
|
||||
static char softirq_stack[NR_CPUS * THREAD_SIZE]
|
||||
__attribute__((__section__(".bss.page_aligned")));
|
||||
|
||||
static char hardirq_stack[NR_CPUS * THREAD_SIZE]
|
||||
__attribute__((__section__(".bss.page_aligned")));
|
||||
|
||||
static void call_on_stack(void *func, void *stack)
|
||||
{
|
||||
asm volatile("xchgl %%ebx,%%esp \n"
|
||||
"call *%%edi \n"
|
||||
"movl %%ebx,%%esp \n"
|
||||
: "=b" (stack)
|
||||
: "0" (stack),
|
||||
"D"(func)
|
||||
: "memory", "cc", "edx", "ecx", "eax");
|
||||
}
|
||||
|
||||
static inline int
|
||||
execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
|
||||
{
|
||||
union irq_ctx *curctx, *irqctx;
|
||||
u32 *isp;
|
||||
#endif
|
||||
|
||||
if (unlikely((unsigned)irq >= NR_IRQS)) {
|
||||
printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
|
||||
__func__, irq);
|
||||
BUG();
|
||||
}
|
||||
|
||||
old_regs = set_irq_regs(regs);
|
||||
irq_enter();
|
||||
#ifdef CONFIG_DEBUG_STACKOVERFLOW
|
||||
/* Debugging check for stack overflow: is there less than 1KB free? */
|
||||
{
|
||||
long sp;
|
||||
|
||||
__asm__ __volatile__("andl %%esp,%0" :
|
||||
"=r" (sp) : "0" (THREAD_SIZE - 1));
|
||||
if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) {
|
||||
printk("do_IRQ: stack overflow: %ld\n",
|
||||
sp - sizeof(struct thread_info));
|
||||
dump_stack();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_4KSTACKS
|
||||
u32 *isp, arg1, arg2;
|
||||
|
||||
curctx = (union irq_ctx *) current_thread_info();
|
||||
irqctx = hardirq_ctx[smp_processor_id()];
|
||||
|
@ -111,52 +115,39 @@ unsigned int do_IRQ(struct pt_regs *regs)
|
|||
* handler) we can't do that and just have to keep using the
|
||||
* current stack (which is the irq stack already after all)
|
||||
*/
|
||||
if (curctx != irqctx) {
|
||||
int arg1, arg2, bx;
|
||||
if (unlikely(curctx == irqctx))
|
||||
return 0;
|
||||
|
||||
/* build the stack frame on the IRQ stack */
|
||||
isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
|
||||
irqctx->tinfo.task = curctx->tinfo.task;
|
||||
irqctx->tinfo.previous_esp = current_stack_pointer;
|
||||
/* build the stack frame on the IRQ stack */
|
||||
isp = (u32 *) ((char*)irqctx + sizeof(*irqctx));
|
||||
irqctx->tinfo.task = curctx->tinfo.task;
|
||||
irqctx->tinfo.previous_esp = current_stack_pointer;
|
||||
|
||||
/*
|
||||
* Copy the softirq bits in preempt_count so that the
|
||||
* softirq checks work in the hardirq context.
|
||||
*/
|
||||
irqctx->tinfo.preempt_count =
|
||||
(irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) |
|
||||
(curctx->tinfo.preempt_count & SOFTIRQ_MASK);
|
||||
/*
|
||||
* Copy the softirq bits in preempt_count so that the
|
||||
* softirq checks work in the hardirq context.
|
||||
*/
|
||||
irqctx->tinfo.preempt_count =
|
||||
(irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) |
|
||||
(curctx->tinfo.preempt_count & SOFTIRQ_MASK);
|
||||
|
||||
asm volatile(
|
||||
" xchgl %%ebx,%%esp \n"
|
||||
" call *%%edi \n"
|
||||
" movl %%ebx,%%esp \n"
|
||||
: "=a" (arg1), "=d" (arg2), "=b" (bx)
|
||||
: "0" (irq), "1" (desc), "2" (isp),
|
||||
"D" (desc->handle_irq)
|
||||
: "memory", "cc", "ecx"
|
||||
);
|
||||
} else
|
||||
#endif
|
||||
desc->handle_irq(irq, desc);
|
||||
if (unlikely(overflow))
|
||||
call_on_stack(print_stack_overflow, isp);
|
||||
|
||||
irq_exit();
|
||||
set_irq_regs(old_regs);
|
||||
asm volatile("xchgl %%ebx,%%esp \n"
|
||||
"call *%%edi \n"
|
||||
"movl %%ebx,%%esp \n"
|
||||
: "=a" (arg1), "=d" (arg2), "=b" (isp)
|
||||
: "0" (irq), "1" (desc), "2" (isp),
|
||||
"D" (desc->handle_irq)
|
||||
: "memory", "cc", "ecx");
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_4KSTACKS
|
||||
|
||||
static char softirq_stack[NR_CPUS * THREAD_SIZE]
|
||||
__attribute__((__section__(".bss.page_aligned")));
|
||||
|
||||
static char hardirq_stack[NR_CPUS * THREAD_SIZE]
|
||||
__attribute__((__section__(".bss.page_aligned")));
|
||||
|
||||
/*
|
||||
* allocate per-cpu stacks for hardirq and for softirq processing
|
||||
*/
|
||||
void irq_ctx_init(int cpu)
|
||||
void __cpuinit irq_ctx_init(int cpu)
|
||||
{
|
||||
union irq_ctx *irqctx;
|
||||
|
||||
|
@ -164,25 +155,25 @@ void irq_ctx_init(int cpu)
|
|||
return;
|
||||
|
||||
irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE];
|
||||
irqctx->tinfo.task = NULL;
|
||||
irqctx->tinfo.exec_domain = NULL;
|
||||
irqctx->tinfo.cpu = cpu;
|
||||
irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
|
||||
irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
|
||||
irqctx->tinfo.task = NULL;
|
||||
irqctx->tinfo.exec_domain = NULL;
|
||||
irqctx->tinfo.cpu = cpu;
|
||||
irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
|
||||
irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
|
||||
|
||||
hardirq_ctx[cpu] = irqctx;
|
||||
|
||||
irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE];
|
||||
irqctx->tinfo.task = NULL;
|
||||
irqctx->tinfo.exec_domain = NULL;
|
||||
irqctx->tinfo.cpu = cpu;
|
||||
irqctx->tinfo.preempt_count = 0;
|
||||
irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
|
||||
irqctx->tinfo.task = NULL;
|
||||
irqctx->tinfo.exec_domain = NULL;
|
||||
irqctx->tinfo.cpu = cpu;
|
||||
irqctx->tinfo.preempt_count = 0;
|
||||
irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
|
||||
|
||||
softirq_ctx[cpu] = irqctx;
|
||||
|
||||
printk("CPU %u irqstacks, hard=%p soft=%p\n",
|
||||
cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
|
||||
printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n",
|
||||
cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
|
||||
}
|
||||
|
||||
void irq_ctx_exit(int cpu)
|
||||
|
@ -211,24 +202,55 @@ asmlinkage void do_softirq(void)
|
|||
/* build the stack frame on the softirq stack */
|
||||
isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
|
||||
|
||||
asm volatile(
|
||||
" xchgl %%ebx,%%esp \n"
|
||||
" call __do_softirq \n"
|
||||
" movl %%ebx,%%esp \n"
|
||||
: "=b"(isp)
|
||||
: "0"(isp)
|
||||
: "memory", "cc", "edx", "ecx", "eax"
|
||||
);
|
||||
call_on_stack(__do_softirq, isp);
|
||||
/*
|
||||
* Shouldnt happen, we returned above if in_interrupt():
|
||||
*/
|
||||
*/
|
||||
WARN_ON_ONCE(softirq_count());
|
||||
}
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
#else
|
||||
static inline int
|
||||
execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; }
|
||||
#endif
|
||||
|
||||
/*
|
||||
* do_IRQ handles all normal device IRQ's (the special
|
||||
* SMP cross-CPU interrupts have their own specific
|
||||
* handlers).
|
||||
*/
|
||||
unsigned int do_IRQ(struct pt_regs *regs)
|
||||
{
|
||||
struct pt_regs *old_regs;
|
||||
/* high bit used in ret_from_ code */
|
||||
int overflow, irq = ~regs->orig_ax;
|
||||
struct irq_desc *desc = irq_desc + irq;
|
||||
|
||||
if (unlikely((unsigned)irq >= NR_IRQS)) {
|
||||
printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
|
||||
__func__, irq);
|
||||
BUG();
|
||||
}
|
||||
|
||||
old_regs = set_irq_regs(regs);
|
||||
irq_enter();
|
||||
|
||||
overflow = check_stack_overflow();
|
||||
|
||||
if (!execute_on_irq_stack(overflow, desc, irq)) {
|
||||
if (unlikely(overflow))
|
||||
print_stack_overflow();
|
||||
desc->handle_irq(irq, desc);
|
||||
}
|
||||
|
||||
irq_exit();
|
||||
set_irq_regs(old_regs);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Interrupt statistics:
|
||||
*/
|
||||
|
@ -313,16 +335,20 @@ skip:
|
|||
per_cpu(irq_stat,j).irq_tlb_count);
|
||||
seq_printf(p, " TLB shootdowns\n");
|
||||
#endif
|
||||
#ifdef CONFIG_X86_MCE
|
||||
seq_printf(p, "TRM: ");
|
||||
for_each_online_cpu(j)
|
||||
seq_printf(p, "%10u ",
|
||||
per_cpu(irq_stat,j).irq_thermal_count);
|
||||
seq_printf(p, " Thermal event interrupts\n");
|
||||
#endif
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
seq_printf(p, "SPU: ");
|
||||
for_each_online_cpu(j)
|
||||
seq_printf(p, "%10u ",
|
||||
per_cpu(irq_stat,j).irq_spurious_count);
|
||||
seq_printf(p, " Spurious interrupts\n");
|
||||
#endif
|
||||
seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
|
||||
#if defined(CONFIG_X86_IO_APIC)
|
||||
seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
|
||||
|
@ -331,6 +357,40 @@ skip:
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* /proc/stat helpers
|
||||
*/
|
||||
u64 arch_irq_stat_cpu(unsigned int cpu)
|
||||
{
|
||||
u64 sum = nmi_count(cpu);
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
sum += per_cpu(irq_stat, cpu).apic_timer_irqs;
|
||||
#endif
|
||||
#ifdef CONFIG_SMP
|
||||
sum += per_cpu(irq_stat, cpu).irq_resched_count;
|
||||
sum += per_cpu(irq_stat, cpu).irq_call_count;
|
||||
sum += per_cpu(irq_stat, cpu).irq_tlb_count;
|
||||
#endif
|
||||
#ifdef CONFIG_X86_MCE
|
||||
sum += per_cpu(irq_stat, cpu).irq_thermal_count;
|
||||
#endif
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
sum += per_cpu(irq_stat, cpu).irq_spurious_count;
|
||||
#endif
|
||||
return sum;
|
||||
}
|
||||
|
||||
u64 arch_irq_stat(void)
|
||||
{
|
||||
u64 sum = atomic_read(&irq_err_count);
|
||||
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
sum += atomic_read(&irq_mis_count);
|
||||
#endif
|
||||
return sum;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
#include <mach_apic.h>
|
||||
|
||||
|
|
|
@ -135,6 +135,7 @@ skip:
|
|||
seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);
|
||||
seq_printf(p, " TLB shootdowns\n");
|
||||
#endif
|
||||
#ifdef CONFIG_X86_MCE
|
||||
seq_printf(p, "TRM: ");
|
||||
for_each_online_cpu(j)
|
||||
seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count);
|
||||
|
@ -143,6 +144,7 @@ skip:
|
|||
for_each_online_cpu(j)
|
||||
seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count);
|
||||
seq_printf(p, " Threshold APIC interrupts\n");
|
||||
#endif
|
||||
seq_printf(p, "SPU: ");
|
||||
for_each_online_cpu(j)
|
||||
seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count);
|
||||
|
@ -152,6 +154,32 @@ skip:
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* /proc/stat helpers
|
||||
*/
|
||||
u64 arch_irq_stat_cpu(unsigned int cpu)
|
||||
{
|
||||
u64 sum = cpu_pda(cpu)->__nmi_count;
|
||||
|
||||
sum += cpu_pda(cpu)->apic_timer_irqs;
|
||||
#ifdef CONFIG_SMP
|
||||
sum += cpu_pda(cpu)->irq_resched_count;
|
||||
sum += cpu_pda(cpu)->irq_call_count;
|
||||
sum += cpu_pda(cpu)->irq_tlb_count;
|
||||
#endif
|
||||
#ifdef CONFIG_X86_MCE
|
||||
sum += cpu_pda(cpu)->irq_thermal_count;
|
||||
sum += cpu_pda(cpu)->irq_threshold_count;
|
||||
#endif
|
||||
sum += cpu_pda(cpu)->irq_spurious_count;
|
||||
return sum;
|
||||
}
|
||||
|
||||
u64 arch_irq_stat(void)
|
||||
{
|
||||
return atomic_read(&irq_err_count);
|
||||
}
|
||||
|
||||
/*
|
||||
* do_IRQ handles all normal device IRQ's (the special
|
||||
* SMP cross-CPU interrupts have their own specific
|
||||
|
|
|
@ -0,0 +1,114 @@
|
|||
#include <linux/errno.h>
|
||||
#include <linux/signal.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/ioport.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel_stat.h>
|
||||
#include <linux/sysdev.h>
|
||||
#include <linux/bitops.h>
|
||||
|
||||
#include <asm/atomic.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/timer.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/delay.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/arch_hooks.h>
|
||||
#include <asm/i8259.h>
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Note that on a 486, we don't want to do a SIGFPE on an irq13
|
||||
* as the irq is unreliable, and exception 16 works correctly
|
||||
* (ie as explained in the intel literature). On a 386, you
|
||||
* can't use exception 16 due to bad IBM design, so we have to
|
||||
* rely on the less exact irq13.
|
||||
*
|
||||
* Careful.. Not only is IRQ13 unreliable, but it is also
|
||||
* leads to races. IBM designers who came up with it should
|
||||
* be shot.
|
||||
*/
|
||||
|
||||
|
||||
static irqreturn_t math_error_irq(int cpl, void *dev_id)
|
||||
{
|
||||
extern void math_error(void __user *);
|
||||
outb(0,0xF0);
|
||||
if (ignore_fpu_irq || !boot_cpu_data.hard_math)
|
||||
return IRQ_NONE;
|
||||
math_error((void __user *)get_irq_regs()->ip);
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
/*
|
||||
* New motherboards sometimes make IRQ 13 be a PCI interrupt,
|
||||
* so allow interrupt sharing.
|
||||
*/
|
||||
static struct irqaction fpu_irq = {
|
||||
.handler = math_error_irq,
|
||||
.mask = CPU_MASK_NONE,
|
||||
.name = "fpu",
|
||||
};
|
||||
|
||||
void __init init_ISA_irqs (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
init_bsp_APIC();
|
||||
#endif
|
||||
init_8259A(0);
|
||||
|
||||
/*
|
||||
* 16 old-style INTA-cycle interrupts:
|
||||
*/
|
||||
for (i = 0; i < 16; i++) {
|
||||
set_irq_chip_and_handler_name(i, &i8259A_chip,
|
||||
handle_level_irq, "XT");
|
||||
}
|
||||
}
|
||||
|
||||
/* Overridden in paravirt.c */
|
||||
void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
|
||||
|
||||
void __init native_init_IRQ(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* all the set up before the call gates are initialised */
|
||||
pre_intr_init_hook();
|
||||
|
||||
/*
|
||||
* Cover the whole vector space, no vector can escape
|
||||
* us. (some of these will be overridden and become
|
||||
* 'special' SMP interrupts)
|
||||
*/
|
||||
for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
|
||||
int vector = FIRST_EXTERNAL_VECTOR + i;
|
||||
if (i >= NR_IRQS)
|
||||
break;
|
||||
/* SYSCALL_VECTOR was reserved in trap_init. */
|
||||
if (!test_bit(vector, used_vectors))
|
||||
set_intr_gate(vector, interrupt[i]);
|
||||
}
|
||||
|
||||
/* setup after call gates are initialised (usually add in
|
||||
* the architecture specific gates)
|
||||
*/
|
||||
intr_init_hook();
|
||||
|
||||
/*
|
||||
* External FPU? Set up irq13 if so, for
|
||||
* original braindamaged IBM FERR coupling.
|
||||
*/
|
||||
if (boot_cpu_data.hard_math && !cpu_has_fpu)
|
||||
setup_irq(FPU_IRQ, &fpu_irq);
|
||||
|
||||
irq_ctx_init(smp_processor_id());
|
||||
}
|
|
@ -0,0 +1,217 @@
|
|||
#include <linux/linkage.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/signal.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/ioport.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/timex.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel_stat.h>
|
||||
#include <linux/sysdev.h>
|
||||
#include <linux/bitops.h>
|
||||
|
||||
#include <asm/acpi.h>
|
||||
#include <asm/atomic.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/hw_irq.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/delay.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/i8259.h>
|
||||
|
||||
/*
|
||||
* Common place to define all x86 IRQ vectors
|
||||
*
|
||||
* This builds up the IRQ handler stubs using some ugly macros in irq.h
|
||||
*
|
||||
* These macros create the low-level assembly IRQ routines that save
|
||||
* register context and call do_IRQ(). do_IRQ() then does all the
|
||||
* operations that are needed to keep the AT (or SMP IOAPIC)
|
||||
* interrupt-controller happy.
|
||||
*/
|
||||
|
||||
#define IRQ_NAME2(nr) nr##_interrupt(void)
|
||||
#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
|
||||
|
||||
/*
|
||||
* SMP has a few special interrupts for IPI messages
|
||||
*/
|
||||
|
||||
#define BUILD_IRQ(nr) \
|
||||
asmlinkage void IRQ_NAME(nr); \
|
||||
asm("\n.p2align\n" \
|
||||
"IRQ" #nr "_interrupt:\n\t" \
|
||||
"push $~(" #nr ") ; " \
|
||||
"jmp common_interrupt");
|
||||
|
||||
#define BI(x,y) \
|
||||
BUILD_IRQ(x##y)
|
||||
|
||||
#define BUILD_16_IRQS(x) \
|
||||
BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
|
||||
BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
|
||||
BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
|
||||
BI(x,c) BI(x,d) BI(x,e) BI(x,f)
|
||||
|
||||
/*
|
||||
* ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
|
||||
* (these are usually mapped to vectors 0x30-0x3f)
|
||||
*/
|
||||
|
||||
/*
|
||||
* The IO-APIC gives us many more interrupt sources. Most of these
|
||||
* are unused but an SMP system is supposed to have enough memory ...
|
||||
* sometimes (mostly wrt. hw bugs) we get corrupted vectors all
|
||||
* across the spectrum, so we really want to be prepared to get all
|
||||
* of these. Plus, more powerful systems might have more than 64
|
||||
* IO-APIC registers.
|
||||
*
|
||||
* (these are usually mapped into the 0x30-0xff vector range)
|
||||
*/
|
||||
BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
|
||||
BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
|
||||
BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
|
||||
BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf)
|
||||
|
||||
#undef BUILD_16_IRQS
|
||||
#undef BI
|
||||
|
||||
|
||||
#define IRQ(x,y) \
|
||||
IRQ##x##y##_interrupt
|
||||
|
||||
#define IRQLIST_16(x) \
|
||||
IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
|
||||
IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
|
||||
IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
|
||||
IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
|
||||
|
||||
/* for the irq vectors */
|
||||
static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
|
||||
IRQLIST_16(0x2), IRQLIST_16(0x3),
|
||||
IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
|
||||
IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
|
||||
IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf)
|
||||
};
|
||||
|
||||
#undef IRQ
|
||||
#undef IRQLIST_16
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* IRQ2 is cascade interrupt to second interrupt controller
|
||||
*/
|
||||
|
||||
static struct irqaction irq2 = {
|
||||
.handler = no_action,
|
||||
.mask = CPU_MASK_NONE,
|
||||
.name = "cascade",
|
||||
};
|
||||
DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
|
||||
[0 ... IRQ0_VECTOR - 1] = -1,
|
||||
[IRQ0_VECTOR] = 0,
|
||||
[IRQ1_VECTOR] = 1,
|
||||
[IRQ2_VECTOR] = 2,
|
||||
[IRQ3_VECTOR] = 3,
|
||||
[IRQ4_VECTOR] = 4,
|
||||
[IRQ5_VECTOR] = 5,
|
||||
[IRQ6_VECTOR] = 6,
|
||||
[IRQ7_VECTOR] = 7,
|
||||
[IRQ8_VECTOR] = 8,
|
||||
[IRQ9_VECTOR] = 9,
|
||||
[IRQ10_VECTOR] = 10,
|
||||
[IRQ11_VECTOR] = 11,
|
||||
[IRQ12_VECTOR] = 12,
|
||||
[IRQ13_VECTOR] = 13,
|
||||
[IRQ14_VECTOR] = 14,
|
||||
[IRQ15_VECTOR] = 15,
|
||||
[IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
|
||||
};
|
||||
|
||||
static void __init init_ISA_irqs (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
init_bsp_APIC();
|
||||
init_8259A(0);
|
||||
|
||||
for (i = 0; i < NR_IRQS; i++) {
|
||||
irq_desc[i].status = IRQ_DISABLED;
|
||||
irq_desc[i].action = NULL;
|
||||
irq_desc[i].depth = 1;
|
||||
|
||||
if (i < 16) {
|
||||
/*
|
||||
* 16 old-style INTA-cycle interrupts:
|
||||
*/
|
||||
set_irq_chip_and_handler_name(i, &i8259A_chip,
|
||||
handle_level_irq, "XT");
|
||||
} else {
|
||||
/*
|
||||
* 'high' PCI IRQs filled in on demand
|
||||
*/
|
||||
irq_desc[i].chip = &no_irq_chip;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
|
||||
|
||||
void __init native_init_IRQ(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
init_ISA_irqs();
|
||||
/*
|
||||
* Cover the whole vector space, no vector can escape
|
||||
* us. (some of these will be overridden and become
|
||||
* 'special' SMP interrupts)
|
||||
*/
|
||||
for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
|
||||
int vector = FIRST_EXTERNAL_VECTOR + i;
|
||||
if (vector != IA32_SYSCALL_VECTOR)
|
||||
set_intr_gate(vector, interrupt[i]);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* The reschedule interrupt is a CPU-to-CPU reschedule-helper
|
||||
* IPI, driven by wakeup.
|
||||
*/
|
||||
alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
|
||||
|
||||
/* IPIs for invalidation */
|
||||
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
|
||||
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
|
||||
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
|
||||
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
|
||||
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
|
||||
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
|
||||
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
|
||||
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
|
||||
|
||||
/* IPI for generic function call */
|
||||
alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
|
||||
|
||||
/* Low priority IPI to cleanup after moving an irq */
|
||||
set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
|
||||
#endif
|
||||
alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
|
||||
alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
|
||||
|
||||
/* self generated IPI for local APIC timer */
|
||||
alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
|
||||
|
||||
/* IPI vectors for APIC spurious and error interrupts */
|
||||
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
|
||||
alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
|
||||
|
||||
if (!acpi_ioapic)
|
||||
setup_irq(2, &irq2);
|
||||
}
|
|
@ -20,9 +20,9 @@
|
|||
#include <asm/mmu_context.h>
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static void flush_ldt(void *null)
|
||||
static void flush_ldt(void *current_mm)
|
||||
{
|
||||
if (current->active_mm)
|
||||
if (current->active_mm == current_mm)
|
||||
load_LDT(¤t->active_mm->context);
|
||||
}
|
||||
#endif
|
||||
|
@ -68,7 +68,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
|
|||
load_LDT(pc);
|
||||
mask = cpumask_of_cpu(smp_processor_id());
|
||||
if (!cpus_equal(current->mm->cpu_vm_mask, mask))
|
||||
smp_call_function(flush_ldt, NULL, 1, 1);
|
||||
smp_call_function(flush_ldt, current->mm, 1, 1);
|
||||
preempt_enable();
|
||||
#else
|
||||
load_LDT(pc);
|
||||
|
|
|
@ -39,7 +39,7 @@ static void set_idt(void *newidt, __u16 limit)
|
|||
curidt.address = (unsigned long)newidt;
|
||||
|
||||
load_idt(&curidt);
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
static void set_gdt(void *newgdt, __u16 limit)
|
||||
|
@ -51,7 +51,7 @@ static void set_gdt(void *newgdt, __u16 limit)
|
|||
curgdt.address = (unsigned long)newgdt;
|
||||
|
||||
load_gdt(&curgdt);
|
||||
};
|
||||
}
|
||||
|
||||
static void load_segments(void)
|
||||
{
|
||||
|
|
|
@ -110,7 +110,7 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
|
|||
{
|
||||
pgd_t *level4p;
|
||||
level4p = (pgd_t *)__va(start_pgtable);
|
||||
return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT);
|
||||
return init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
|
||||
}
|
||||
|
||||
static void set_idt(void *newidt, u16 limit)
|
||||
|
|
|
@ -5,13 +5,14 @@
|
|||
* 2006 Shaohua Li <shaohua.li@intel.com>
|
||||
*
|
||||
* This driver allows to upgrade microcode on Intel processors
|
||||
* belonging to IA-32 family - PentiumPro, Pentium II,
|
||||
* belonging to IA-32 family - PentiumPro, Pentium II,
|
||||
* Pentium III, Xeon, Pentium 4, etc.
|
||||
*
|
||||
* Reference: Section 8.10 of Volume III, Intel Pentium 4 Manual,
|
||||
* Order Number 245472 or free download from:
|
||||
*
|
||||
* http://developer.intel.com/design/pentium4/manuals/245472.htm
|
||||
* Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
|
||||
* Software Developer's Manual
|
||||
* Order Number 253668 or free download from:
|
||||
*
|
||||
* http://developer.intel.com/design/pentium4/manuals/253668.htm
|
||||
*
|
||||
* For more information, go to http://www.urbanmyth.org/microcode
|
||||
*
|
||||
|
@ -58,12 +59,12 @@
|
|||
* nature of implementation.
|
||||
* 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com>
|
||||
* Fix the panic when writing zero-length microcode chunk.
|
||||
* 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>,
|
||||
* 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>,
|
||||
* Jun Nakajima <jun.nakajima@intel.com>
|
||||
* Support for the microcode updates in the new format.
|
||||
* 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com>
|
||||
* Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl
|
||||
* because we no longer hold a copy of applied microcode
|
||||
* because we no longer hold a copy of applied microcode
|
||||
* in kernel memory.
|
||||
* 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com>
|
||||
* Fix sigmatch() macro to handle old CPUs with pf == 0.
|
||||
|
@ -320,11 +321,11 @@ static void apply_microcode(int cpu)
|
|||
return;
|
||||
|
||||
/* serialize access to the physical write to MSR 0x79 */
|
||||
spin_lock_irqsave(µcode_update_lock, flags);
|
||||
spin_lock_irqsave(µcode_update_lock, flags);
|
||||
|
||||
/* write microcode via MSR 0x79 */
|
||||
wrmsr(MSR_IA32_UCODE_WRITE,
|
||||
(unsigned long) uci->mc->bits,
|
||||
(unsigned long) uci->mc->bits,
|
||||
(unsigned long) uci->mc->bits >> 16 >> 16);
|
||||
wrmsr(MSR_IA32_UCODE_REV, 0, 0);
|
||||
|
||||
|
@ -341,7 +342,7 @@ static void apply_microcode(int cpu)
|
|||
return;
|
||||
}
|
||||
printk(KERN_INFO "microcode: CPU%d updated from revision "
|
||||
"0x%x to 0x%x, date = %08x \n",
|
||||
"0x%x to 0x%x, date = %08x \n",
|
||||
cpu_num, uci->rev, val[1], uci->mc->hdr.date);
|
||||
uci->rev = val[1];
|
||||
}
|
||||
|
@ -534,7 +535,7 @@ static int cpu_request_microcode(int cpu)
|
|||
c->x86, c->x86_model, c->x86_mask);
|
||||
error = request_firmware(&firmware, name, µcode_pdev->dev);
|
||||
if (error) {
|
||||
pr_debug("microcode: ucode data file %s load failed\n", name);
|
||||
pr_debug("microcode: data file %s load failed\n", name);
|
||||
return error;
|
||||
}
|
||||
buf = firmware->data;
|
||||
|
@ -805,6 +806,9 @@ static int __init microcode_init (void)
|
|||
{
|
||||
int error;
|
||||
|
||||
printk(KERN_INFO
|
||||
"IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n");
|
||||
|
||||
error = microcode_dev_init();
|
||||
if (error)
|
||||
return error;
|
||||
|
@ -825,9 +829,6 @@ static int __init microcode_init (void)
|
|||
}
|
||||
|
||||
register_hotcpu_notifier(&mc_cpu_notifier);
|
||||
|
||||
printk(KERN_INFO
|
||||
"IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include <asm/io.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/acpi.h>
|
||||
#include <asm/mmconfig.h>
|
||||
|
||||
#include "../pci/pci.h"
|
||||
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -11,10 +11,13 @@
|
|||
* Mikael Pettersson : PM converted to driver model. Disable/enable API.
|
||||
*/
|
||||
|
||||
#include <asm/apic.h>
|
||||
|
||||
#include <linux/nmi.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/nmi.h>
|
||||
#include <linux/sysdev.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/percpu.h>
|
||||
|
@ -22,12 +25,18 @@
|
|||
#include <linux/cpumask.h>
|
||||
#include <linux/kernel_stat.h>
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/i8259.h>
|
||||
#include <asm/io_apic.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/timer.h>
|
||||
|
||||
#include "mach_traps.h"
|
||||
#include <asm/mce.h>
|
||||
|
||||
#include <mach_traps.h>
|
||||
|
||||
int unknown_nmi_panic;
|
||||
int nmi_watchdog_enabled;
|
||||
|
@ -41,28 +50,65 @@ static cpumask_t backtrace_mask = CPU_MASK_NONE;
|
|||
* 0: the lapic NMI watchdog is disabled, but can be enabled
|
||||
*/
|
||||
atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
|
||||
EXPORT_SYMBOL(nmi_active);
|
||||
|
||||
unsigned int nmi_watchdog = NMI_NONE;
|
||||
EXPORT_SYMBOL(nmi_watchdog);
|
||||
|
||||
static int panic_on_timeout;
|
||||
|
||||
unsigned int nmi_watchdog = NMI_DEFAULT;
|
||||
static unsigned int nmi_hz = HZ;
|
||||
|
||||
static DEFINE_PER_CPU(short, wd_enabled);
|
||||
static int endflag __initdata;
|
||||
|
||||
static int endflag __initdata = 0;
|
||||
static inline unsigned int get_nmi_count(int cpu)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
return cpu_pda(cpu)->__nmi_count;
|
||||
#else
|
||||
return nmi_count(cpu);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int mce_in_progress(void)
|
||||
{
|
||||
#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
|
||||
return atomic_read(&mce_entry) > 0;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Take the local apic timer and PIT/HPET into account. We don't
|
||||
* know which one is active, when we have highres/dyntick on
|
||||
*/
|
||||
static inline unsigned int get_timer_irqs(int cpu)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
return read_pda(apic_timer_irqs) + read_pda(irq0_irqs);
|
||||
#else
|
||||
return per_cpu(irq_stat, cpu).apic_timer_irqs +
|
||||
per_cpu(irq_stat, cpu).irq0_irqs;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/* The performance counters used by NMI_LOCAL_APIC don't trigger when
|
||||
/*
|
||||
* The performance counters used by NMI_LOCAL_APIC don't trigger when
|
||||
* the CPU is idle. To make sure the NMI watchdog really ticks on all
|
||||
* CPUs during the test make them busy.
|
||||
*/
|
||||
static __init void nmi_cpu_busy(void *data)
|
||||
{
|
||||
local_irq_enable_in_hardirq();
|
||||
/* Intentionally don't use cpu_relax here. This is
|
||||
to make sure that the performance counter really ticks,
|
||||
even if there is a simulator or similar that catches the
|
||||
pause instruction. On a real HT machine this is fine because
|
||||
all other CPUs are busy with "useless" delay loops and don't
|
||||
care if they get somewhat less cycles. */
|
||||
/*
|
||||
* Intentionally don't use cpu_relax here. This is
|
||||
* to make sure that the performance counter really ticks,
|
||||
* even if there is a simulator or similar that catches the
|
||||
* pause instruction. On a real HT machine this is fine because
|
||||
* all other CPUs are busy with "useless" delay loops and don't
|
||||
* care if they get somewhat less cycles.
|
||||
*/
|
||||
while (endflag == 0)
|
||||
mb();
|
||||
}
|
||||
|
@ -73,15 +119,12 @@ int __init check_nmi_watchdog(void)
|
|||
unsigned int *prev_nmi_count;
|
||||
int cpu;
|
||||
|
||||
if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED))
|
||||
if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
|
||||
return 0;
|
||||
|
||||
if (!atomic_read(&nmi_active))
|
||||
return 0;
|
||||
|
||||
prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
|
||||
prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
|
||||
if (!prev_nmi_count)
|
||||
return -1;
|
||||
goto error;
|
||||
|
||||
printk(KERN_INFO "Testing NMI watchdog ... ");
|
||||
|
||||
|
@ -91,25 +134,19 @@ int __init check_nmi_watchdog(void)
|
|||
#endif
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
prev_nmi_count[cpu] = nmi_count(cpu);
|
||||
prev_nmi_count[cpu] = get_nmi_count(cpu);
|
||||
local_irq_enable();
|
||||
mdelay((20*1000)/nmi_hz); // wait 20 ticks
|
||||
mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
#ifdef CONFIG_SMP
|
||||
/* Check cpu_callin_map here because that is set
|
||||
after the timer is started. */
|
||||
if (!cpu_isset(cpu, cpu_callin_map))
|
||||
continue;
|
||||
#endif
|
||||
for_each_online_cpu(cpu) {
|
||||
if (!per_cpu(wd_enabled, cpu))
|
||||
continue;
|
||||
if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
|
||||
if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
|
||||
printk(KERN_WARNING "WARNING: CPU#%d: NMI "
|
||||
"appears to be stuck (%d->%d)!\n",
|
||||
cpu,
|
||||
prev_nmi_count[cpu],
|
||||
nmi_count(cpu));
|
||||
get_nmi_count(cpu));
|
||||
per_cpu(wd_enabled, cpu) = 0;
|
||||
atomic_dec(&nmi_active);
|
||||
}
|
||||
|
@ -118,37 +155,53 @@ int __init check_nmi_watchdog(void)
|
|||
if (!atomic_read(&nmi_active)) {
|
||||
kfree(prev_nmi_count);
|
||||
atomic_set(&nmi_active, -1);
|
||||
return -1;
|
||||
goto error;
|
||||
}
|
||||
printk("OK.\n");
|
||||
|
||||
/* now that we know it works we can reduce NMI frequency to
|
||||
something more reasonable; makes a difference in some configs */
|
||||
/*
|
||||
* now that we know it works we can reduce NMI frequency to
|
||||
* something more reasonable; makes a difference in some configs
|
||||
*/
|
||||
if (nmi_watchdog == NMI_LOCAL_APIC)
|
||||
nmi_hz = lapic_adjust_nmi_hz(1);
|
||||
|
||||
kfree(prev_nmi_count);
|
||||
return 0;
|
||||
error:
|
||||
if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259)
|
||||
disable_8259A_irq(0);
|
||||
#ifdef CONFIG_X86_32
|
||||
timer_ack = 0;
|
||||
#endif
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int __init setup_nmi_watchdog(char *str)
|
||||
{
|
||||
int nmi;
|
||||
unsigned int nmi;
|
||||
|
||||
if (!strncmp(str, "panic", 5)) {
|
||||
panic_on_timeout = 1;
|
||||
str = strchr(str, ',');
|
||||
if (!str)
|
||||
return 1;
|
||||
++str;
|
||||
}
|
||||
|
||||
get_option(&str, &nmi);
|
||||
|
||||
if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
|
||||
if (nmi >= NMI_INVALID)
|
||||
return 0;
|
||||
|
||||
nmi_watchdog = nmi;
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("nmi_watchdog=", setup_nmi_watchdog);
|
||||
|
||||
|
||||
/* Suspend/resume support */
|
||||
|
||||
/*
|
||||
* Suspend/resume support
|
||||
*/
|
||||
#ifdef CONFIG_PM
|
||||
|
||||
static int nmi_pm_active; /* nmi_active before suspend */
|
||||
|
@ -172,7 +225,6 @@ static int lapic_nmi_resume(struct sys_device *dev)
|
|||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static struct sysdev_class nmi_sysclass = {
|
||||
.name = "lapic_nmi",
|
||||
.resume = lapic_nmi_resume,
|
||||
|
@ -188,7 +240,8 @@ static int __init init_lapic_nmi_sysfs(void)
|
|||
{
|
||||
int error;
|
||||
|
||||
/* should really be a BUG_ON but b/c this is an
|
||||
/*
|
||||
* should really be a BUG_ON but b/c this is an
|
||||
* init call, it just doesn't work. -dcz
|
||||
*/
|
||||
if (nmi_watchdog != NMI_LOCAL_APIC)
|
||||
|
@ -202,6 +255,7 @@ static int __init init_lapic_nmi_sysfs(void)
|
|||
error = sysdev_register(&device_lapic_nmi);
|
||||
return error;
|
||||
}
|
||||
|
||||
/* must come after the local APIC's device_initcall() */
|
||||
late_initcall(init_lapic_nmi_sysfs);
|
||||
|
||||
|
@ -223,7 +277,7 @@ void acpi_nmi_enable(void)
|
|||
|
||||
static void __acpi_nmi_disable(void *__unused)
|
||||
{
|
||||
apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
|
||||
apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -242,12 +296,13 @@ void setup_apic_nmi_watchdog(void *unused)
|
|||
|
||||
/* cheap hack to support suspend/resume */
|
||||
/* if cpu0 is not active neither should the other cpus */
|
||||
if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
|
||||
if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0)
|
||||
return;
|
||||
|
||||
switch (nmi_watchdog) {
|
||||
case NMI_LOCAL_APIC:
|
||||
__get_cpu_var(wd_enabled) = 1; /* enable it before to avoid race with handler */
|
||||
/* enable it before to avoid race with handler */
|
||||
__get_cpu_var(wd_enabled) = 1;
|
||||
if (lapic_watchdog_init(nmi_hz) < 0) {
|
||||
__get_cpu_var(wd_enabled) = 0;
|
||||
return;
|
||||
|
@ -262,9 +317,8 @@ void setup_apic_nmi_watchdog(void *unused)
|
|||
void stop_apic_nmi_watchdog(void *unused)
|
||||
{
|
||||
/* only support LOCAL and IO APICs for now */
|
||||
if ((nmi_watchdog != NMI_LOCAL_APIC) &&
|
||||
(nmi_watchdog != NMI_IO_APIC))
|
||||
return;
|
||||
if (!nmi_watchdog_active())
|
||||
return;
|
||||
if (__get_cpu_var(wd_enabled) == 0)
|
||||
return;
|
||||
if (nmi_watchdog == NMI_LOCAL_APIC)
|
||||
|
@ -284,26 +338,26 @@ void stop_apic_nmi_watchdog(void *unused)
|
|||
* since NMIs don't listen to _any_ locks, we have to be extremely
|
||||
* careful not to rely on unsafe variables. The printk might lock
|
||||
* up though, so we have to break up any console locks first ...
|
||||
* [when there will be more tty-related locks, break them up
|
||||
* here too!]
|
||||
* [when there will be more tty-related locks, break them up here too!]
|
||||
*/
|
||||
|
||||
static unsigned int
|
||||
last_irq_sums [NR_CPUS],
|
||||
alert_counter [NR_CPUS];
|
||||
static DEFINE_PER_CPU(unsigned, last_irq_sum);
|
||||
static DEFINE_PER_CPU(local_t, alert_counter);
|
||||
static DEFINE_PER_CPU(int, nmi_touch);
|
||||
|
||||
void touch_nmi_watchdog(void)
|
||||
{
|
||||
if (nmi_watchdog > 0) {
|
||||
if (nmi_watchdog_active()) {
|
||||
unsigned cpu;
|
||||
|
||||
/*
|
||||
* Just reset the alert counters, (other CPUs might be
|
||||
* spinning on locks we hold):
|
||||
* Tell other CPUs to reset their alert counters. We cannot
|
||||
* do it ourselves because the alert count increase is not
|
||||
* atomic.
|
||||
*/
|
||||
for_each_present_cpu(cpu) {
|
||||
if (alert_counter[cpu])
|
||||
alert_counter[cpu] = 0;
|
||||
if (per_cpu(nmi_touch, cpu) != 1)
|
||||
per_cpu(nmi_touch, cpu) = 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -314,12 +368,9 @@ void touch_nmi_watchdog(void)
|
|||
}
|
||||
EXPORT_SYMBOL(touch_nmi_watchdog);
|
||||
|
||||
extern void die_nmi(struct pt_regs *, const char *msg);
|
||||
|
||||
notrace __kprobes int
|
||||
nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
|
||||
{
|
||||
|
||||
/*
|
||||
* Since current_thread_info()-> is always on the stack, and we
|
||||
* always switch the stack NMI-atomically, it's safe to use
|
||||
|
@ -337,39 +388,45 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
|
|||
touched = 1;
|
||||
}
|
||||
|
||||
sum = get_timer_irqs(cpu);
|
||||
|
||||
if (__get_cpu_var(nmi_touch)) {
|
||||
__get_cpu_var(nmi_touch) = 0;
|
||||
touched = 1;
|
||||
}
|
||||
|
||||
if (cpu_isset(cpu, backtrace_mask)) {
|
||||
static DEFINE_SPINLOCK(lock); /* Serialise the printks */
|
||||
|
||||
spin_lock(&lock);
|
||||
printk("NMI backtrace for cpu %d\n", cpu);
|
||||
printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
|
||||
dump_stack();
|
||||
spin_unlock(&lock);
|
||||
cpu_clear(cpu, backtrace_mask);
|
||||
}
|
||||
|
||||
/*
|
||||
* Take the local apic timer and PIT/HPET into account. We don't
|
||||
* know which one is active, when we have highres/dyntick on
|
||||
*/
|
||||
sum = per_cpu(irq_stat, cpu).apic_timer_irqs +
|
||||
per_cpu(irq_stat, cpu).irq0_irqs;
|
||||
/* Could check oops_in_progress here too, but it's safer not to */
|
||||
if (mce_in_progress())
|
||||
touched = 1;
|
||||
|
||||
/* if the none of the timers isn't firing, this cpu isn't doing much */
|
||||
if (!touched && last_irq_sums[cpu] == sum) {
|
||||
if (!touched && __get_cpu_var(last_irq_sum) == sum) {
|
||||
/*
|
||||
* Ayiee, looks like this CPU is stuck ...
|
||||
* wait a few IRQs (5 seconds) before doing the oops ...
|
||||
*/
|
||||
alert_counter[cpu]++;
|
||||
if (alert_counter[cpu] == 5*nmi_hz)
|
||||
local_inc(&__get_cpu_var(alert_counter));
|
||||
if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz)
|
||||
/*
|
||||
* die_nmi will return ONLY if NOTIFY_STOP happens..
|
||||
*/
|
||||
die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP");
|
||||
die_nmi("BUG: NMI Watchdog detected LOCKUP",
|
||||
regs, panic_on_timeout);
|
||||
} else {
|
||||
last_irq_sums[cpu] = sum;
|
||||
alert_counter[cpu] = 0;
|
||||
__get_cpu_var(last_irq_sum) = sum;
|
||||
local_set(&__get_cpu_var(alert_counter), 0);
|
||||
}
|
||||
|
||||
/* see if the nmi watchdog went off */
|
||||
if (!__get_cpu_var(wd_enabled))
|
||||
return rc;
|
||||
|
@ -378,7 +435,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
|
|||
rc |= lapic_wd_event(nmi_hz);
|
||||
break;
|
||||
case NMI_IO_APIC:
|
||||
/* don't know how to accurately check for this.
|
||||
/*
|
||||
* don't know how to accurately check for this.
|
||||
* just assume it was a watchdog timer interrupt
|
||||
* This matches the old behaviour.
|
||||
*/
|
||||
|
@ -396,7 +454,7 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
|
|||
char buf[64];
|
||||
|
||||
sprintf(buf, "NMI received for unknown reason %02x\n", reason);
|
||||
die_nmi(regs, buf);
|
||||
die_nmi(buf, regs, 1); /* Always panic here */
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -414,32 +472,26 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
|
|||
if (!!old_state == !!nmi_watchdog_enabled)
|
||||
return 0;
|
||||
|
||||
if (atomic_read(&nmi_active) < 0 || nmi_watchdog == NMI_DISABLED) {
|
||||
printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
|
||||
if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
|
||||
printk(KERN_WARNING
|
||||
"NMI watchdog is permanently disabled\n");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (nmi_watchdog == NMI_DEFAULT) {
|
||||
if (lapic_watchdog_ok())
|
||||
nmi_watchdog = NMI_LOCAL_APIC;
|
||||
else
|
||||
nmi_watchdog = NMI_IO_APIC;
|
||||
}
|
||||
|
||||
if (nmi_watchdog == NMI_LOCAL_APIC) {
|
||||
if (nmi_watchdog_enabled)
|
||||
enable_lapic_nmi_watchdog();
|
||||
else
|
||||
disable_lapic_nmi_watchdog();
|
||||
} else {
|
||||
printk( KERN_WARNING
|
||||
printk(KERN_WARNING
|
||||
"NMI watchdog doesn't know what hardware to touch\n");
|
||||
return -EIO;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif /* CONFIG_SYSCTL */
|
||||
|
||||
int do_nmi_callback(struct pt_regs *regs, int cpu)
|
||||
{
|
||||
|
@ -462,6 +514,3 @@ void __trigger_all_cpu_backtrace(void)
|
|||
mdelay(1);
|
||||
}
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(nmi_active);
|
||||
EXPORT_SYMBOL(nmi_watchdog);
|
|
@ -1,482 +0,0 @@
|
|||
/*
|
||||
* NMI watchdog support on APIC systems
|
||||
*
|
||||
* Started by Ingo Molnar <mingo@redhat.com>
|
||||
*
|
||||
* Fixes:
|
||||
* Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
|
||||
* Mikael Pettersson : Power Management for local APIC NMI watchdog.
|
||||
* Pavel Machek and
|
||||
* Mikael Pettersson : PM converted to driver model. Disable/enable API.
|
||||
*/
|
||||
|
||||
#include <linux/nmi.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sysdev.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/kdebug.h>
|
||||
|
||||
#include <asm/smp.h>
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
#include <mach_traps.h>
|
||||
|
||||
int unknown_nmi_panic;
|
||||
int nmi_watchdog_enabled;
|
||||
int panic_on_unrecovered_nmi;
|
||||
|
||||
static cpumask_t backtrace_mask = CPU_MASK_NONE;
|
||||
|
||||
/* nmi_active:
|
||||
* >0: the lapic NMI watchdog is active, but can be disabled
|
||||
* <0: the lapic NMI watchdog has not been set up, and cannot
|
||||
* be enabled
|
||||
* 0: the lapic NMI watchdog is disabled, but can be enabled
|
||||
*/
|
||||
atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
|
||||
static int panic_on_timeout;
|
||||
|
||||
unsigned int nmi_watchdog = NMI_DEFAULT;
|
||||
static unsigned int nmi_hz = HZ;
|
||||
|
||||
static DEFINE_PER_CPU(short, wd_enabled);
|
||||
|
||||
/* Run after command line and cpu_init init, but before all other checks */
|
||||
void nmi_watchdog_default(void)
|
||||
{
|
||||
if (nmi_watchdog != NMI_DEFAULT)
|
||||
return;
|
||||
nmi_watchdog = NMI_NONE;
|
||||
}
|
||||
|
||||
static int endflag __initdata = 0;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/* The performance counters used by NMI_LOCAL_APIC don't trigger when
|
||||
* the CPU is idle. To make sure the NMI watchdog really ticks on all
|
||||
* CPUs during the test make them busy.
|
||||
*/
|
||||
static __init void nmi_cpu_busy(void *data)
|
||||
{
|
||||
local_irq_enable_in_hardirq();
|
||||
/* Intentionally don't use cpu_relax here. This is
|
||||
to make sure that the performance counter really ticks,
|
||||
even if there is a simulator or similar that catches the
|
||||
pause instruction. On a real HT machine this is fine because
|
||||
all other CPUs are busy with "useless" delay loops and don't
|
||||
care if they get somewhat less cycles. */
|
||||
while (endflag == 0)
|
||||
mb();
|
||||
}
|
||||
#endif
|
||||
|
||||
int __init check_nmi_watchdog(void)
|
||||
{
|
||||
int *prev_nmi_count;
|
||||
int cpu;
|
||||
|
||||
if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED))
|
||||
return 0;
|
||||
|
||||
if (!atomic_read(&nmi_active))
|
||||
return 0;
|
||||
|
||||
prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
|
||||
if (!prev_nmi_count)
|
||||
return -1;
|
||||
|
||||
printk(KERN_INFO "Testing NMI watchdog ... ");
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
if (nmi_watchdog == NMI_LOCAL_APIC)
|
||||
smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
|
||||
#endif
|
||||
|
||||
for (cpu = 0; cpu < NR_CPUS; cpu++)
|
||||
prev_nmi_count[cpu] = cpu_pda(cpu)->__nmi_count;
|
||||
local_irq_enable();
|
||||
mdelay((20*1000)/nmi_hz); // wait 20 ticks
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
if (!per_cpu(wd_enabled, cpu))
|
||||
continue;
|
||||
if (cpu_pda(cpu)->__nmi_count - prev_nmi_count[cpu] <= 5) {
|
||||
printk(KERN_WARNING "WARNING: CPU#%d: NMI "
|
||||
"appears to be stuck (%d->%d)!\n",
|
||||
cpu,
|
||||
prev_nmi_count[cpu],
|
||||
cpu_pda(cpu)->__nmi_count);
|
||||
per_cpu(wd_enabled, cpu) = 0;
|
||||
atomic_dec(&nmi_active);
|
||||
}
|
||||
}
|
||||
endflag = 1;
|
||||
if (!atomic_read(&nmi_active)) {
|
||||
kfree(prev_nmi_count);
|
||||
atomic_set(&nmi_active, -1);
|
||||
return -1;
|
||||
}
|
||||
printk("OK.\n");
|
||||
|
||||
/* now that we know it works we can reduce NMI frequency to
|
||||
something more reasonable; makes a difference in some configs */
|
||||
if (nmi_watchdog == NMI_LOCAL_APIC)
|
||||
nmi_hz = lapic_adjust_nmi_hz(1);
|
||||
|
||||
kfree(prev_nmi_count);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init setup_nmi_watchdog(char *str)
|
||||
{
|
||||
int nmi;
|
||||
|
||||
if (!strncmp(str,"panic",5)) {
|
||||
panic_on_timeout = 1;
|
||||
str = strchr(str, ',');
|
||||
if (!str)
|
||||
return 1;
|
||||
++str;
|
||||
}
|
||||
|
||||
get_option(&str, &nmi);
|
||||
|
||||
if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
|
||||
return 0;
|
||||
|
||||
nmi_watchdog = nmi;
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("nmi_watchdog=", setup_nmi_watchdog);
|
||||
|
||||
#ifdef CONFIG_PM
|
||||
|
||||
static int nmi_pm_active; /* nmi_active before suspend */
|
||||
|
||||
static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
|
||||
{
|
||||
/* only CPU0 goes here, other CPUs should be offline */
|
||||
nmi_pm_active = atomic_read(&nmi_active);
|
||||
stop_apic_nmi_watchdog(NULL);
|
||||
BUG_ON(atomic_read(&nmi_active) != 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int lapic_nmi_resume(struct sys_device *dev)
|
||||
{
|
||||
/* only CPU0 goes here, other CPUs should be offline */
|
||||
if (nmi_pm_active > 0) {
|
||||
setup_apic_nmi_watchdog(NULL);
|
||||
touch_nmi_watchdog();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct sysdev_class nmi_sysclass = {
|
||||
.name = "lapic_nmi",
|
||||
.resume = lapic_nmi_resume,
|
||||
.suspend = lapic_nmi_suspend,
|
||||
};
|
||||
|
||||
static struct sys_device device_lapic_nmi = {
|
||||
.id = 0,
|
||||
.cls = &nmi_sysclass,
|
||||
};
|
||||
|
||||
static int __init init_lapic_nmi_sysfs(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
/* should really be a BUG_ON but b/c this is an
|
||||
* init call, it just doesn't work. -dcz
|
||||
*/
|
||||
if (nmi_watchdog != NMI_LOCAL_APIC)
|
||||
return 0;
|
||||
|
||||
if (atomic_read(&nmi_active) < 0)
|
||||
return 0;
|
||||
|
||||
error = sysdev_class_register(&nmi_sysclass);
|
||||
if (!error)
|
||||
error = sysdev_register(&device_lapic_nmi);
|
||||
return error;
|
||||
}
|
||||
/* must come after the local APIC's device_initcall() */
|
||||
late_initcall(init_lapic_nmi_sysfs);
|
||||
|
||||
#endif /* CONFIG_PM */
|
||||
|
||||
static void __acpi_nmi_enable(void *__unused)
|
||||
{
|
||||
apic_write(APIC_LVT0, APIC_DM_NMI);
|
||||
}
|
||||
|
||||
/*
|
||||
* Enable timer based NMIs on all CPUs:
|
||||
*/
|
||||
void acpi_nmi_enable(void)
|
||||
{
|
||||
if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
|
||||
on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
|
||||
}
|
||||
|
||||
static void __acpi_nmi_disable(void *__unused)
|
||||
{
|
||||
apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
|
||||
}
|
||||
|
||||
/*
|
||||
* Disable timer based NMIs on all CPUs:
|
||||
*/
|
||||
void acpi_nmi_disable(void)
|
||||
{
|
||||
if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
|
||||
on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
|
||||
}
|
||||
|
||||
void setup_apic_nmi_watchdog(void *unused)
|
||||
{
|
||||
if (__get_cpu_var(wd_enabled))
|
||||
return;
|
||||
|
||||
/* cheap hack to support suspend/resume */
|
||||
/* if cpu0 is not active neither should the other cpus */
|
||||
if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
|
||||
return;
|
||||
|
||||
switch (nmi_watchdog) {
|
||||
case NMI_LOCAL_APIC:
|
||||
__get_cpu_var(wd_enabled) = 1;
|
||||
if (lapic_watchdog_init(nmi_hz) < 0) {
|
||||
__get_cpu_var(wd_enabled) = 0;
|
||||
return;
|
||||
}
|
||||
/* FALL THROUGH */
|
||||
case NMI_IO_APIC:
|
||||
__get_cpu_var(wd_enabled) = 1;
|
||||
atomic_inc(&nmi_active);
|
||||
}
|
||||
}
|
||||
|
||||
void stop_apic_nmi_watchdog(void *unused)
|
||||
{
|
||||
/* only support LOCAL and IO APICs for now */
|
||||
if ((nmi_watchdog != NMI_LOCAL_APIC) &&
|
||||
(nmi_watchdog != NMI_IO_APIC))
|
||||
return;
|
||||
if (__get_cpu_var(wd_enabled) == 0)
|
||||
return;
|
||||
if (nmi_watchdog == NMI_LOCAL_APIC)
|
||||
lapic_watchdog_stop();
|
||||
__get_cpu_var(wd_enabled) = 0;
|
||||
atomic_dec(&nmi_active);
|
||||
}
|
||||
|
||||
/*
|
||||
* the best way to detect whether a CPU has a 'hard lockup' problem
|
||||
* is to check it's local APIC timer IRQ counts. If they are not
|
||||
* changing then that CPU has some problem.
|
||||
*
|
||||
* as these watchdog NMI IRQs are generated on every CPU, we only
|
||||
* have to check the current processor.
|
||||
*/
|
||||
|
||||
static DEFINE_PER_CPU(unsigned, last_irq_sum);
|
||||
static DEFINE_PER_CPU(local_t, alert_counter);
|
||||
static DEFINE_PER_CPU(int, nmi_touch);
|
||||
|
||||
void touch_nmi_watchdog(void)
|
||||
{
|
||||
if (nmi_watchdog > 0) {
|
||||
unsigned cpu;
|
||||
|
||||
/*
|
||||
* Tell other CPUs to reset their alert counters. We cannot
|
||||
* do it ourselves because the alert count increase is not
|
||||
* atomic.
|
||||
*/
|
||||
for_each_present_cpu(cpu) {
|
||||
if (per_cpu(nmi_touch, cpu) != 1)
|
||||
per_cpu(nmi_touch, cpu) = 1;
|
||||
}
|
||||
}
|
||||
|
||||
touch_softlockup_watchdog();
|
||||
}
|
||||
EXPORT_SYMBOL(touch_nmi_watchdog);
|
||||
|
||||
notrace __kprobes int
|
||||
nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
|
||||
{
|
||||
int sum;
|
||||
int touched = 0;
|
||||
int cpu = smp_processor_id();
|
||||
int rc = 0;
|
||||
|
||||
/* check for other users first */
|
||||
if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
|
||||
== NOTIFY_STOP) {
|
||||
rc = 1;
|
||||
touched = 1;
|
||||
}
|
||||
|
||||
sum = read_pda(apic_timer_irqs) + read_pda(irq0_irqs);
|
||||
if (__get_cpu_var(nmi_touch)) {
|
||||
__get_cpu_var(nmi_touch) = 0;
|
||||
touched = 1;
|
||||
}
|
||||
|
||||
if (cpu_isset(cpu, backtrace_mask)) {
|
||||
static DEFINE_SPINLOCK(lock); /* Serialise the printks */
|
||||
|
||||
spin_lock(&lock);
|
||||
printk("NMI backtrace for cpu %d\n", cpu);
|
||||
dump_stack();
|
||||
spin_unlock(&lock);
|
||||
cpu_clear(cpu, backtrace_mask);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_MCE
|
||||
/* Could check oops_in_progress here too, but it's safer
|
||||
not too */
|
||||
if (atomic_read(&mce_entry) > 0)
|
||||
touched = 1;
|
||||
#endif
|
||||
/* if the apic timer isn't firing, this cpu isn't doing much */
|
||||
if (!touched && __get_cpu_var(last_irq_sum) == sum) {
|
||||
/*
|
||||
* Ayiee, looks like this CPU is stuck ...
|
||||
* wait a few IRQs (5 seconds) before doing the oops ...
|
||||
*/
|
||||
local_inc(&__get_cpu_var(alert_counter));
|
||||
if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz)
|
||||
die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs,
|
||||
panic_on_timeout);
|
||||
} else {
|
||||
__get_cpu_var(last_irq_sum) = sum;
|
||||
local_set(&__get_cpu_var(alert_counter), 0);
|
||||
}
|
||||
|
||||
/* see if the nmi watchdog went off */
|
||||
if (!__get_cpu_var(wd_enabled))
|
||||
return rc;
|
||||
switch (nmi_watchdog) {
|
||||
case NMI_LOCAL_APIC:
|
||||
rc |= lapic_wd_event(nmi_hz);
|
||||
break;
|
||||
case NMI_IO_APIC:
|
||||
/* don't know how to accurately check for this.
|
||||
* just assume it was a watchdog timer interrupt
|
||||
* This matches the old behaviour.
|
||||
*/
|
||||
rc = 1;
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
static unsigned ignore_nmis;
|
||||
|
||||
asmlinkage notrace __kprobes void
|
||||
do_nmi(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
nmi_enter();
|
||||
add_pda(__nmi_count,1);
|
||||
if (!ignore_nmis)
|
||||
default_do_nmi(regs);
|
||||
nmi_exit();
|
||||
}
|
||||
|
||||
void stop_nmi(void)
|
||||
{
|
||||
acpi_nmi_disable();
|
||||
ignore_nmis++;
|
||||
}
|
||||
|
||||
void restart_nmi(void)
|
||||
{
|
||||
ignore_nmis--;
|
||||
acpi_nmi_enable();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
|
||||
static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
|
||||
{
|
||||
unsigned char reason = get_nmi_reason();
|
||||
char buf[64];
|
||||
|
||||
sprintf(buf, "NMI received for unknown reason %02x\n", reason);
|
||||
die_nmi(buf, regs, 1); /* Always panic here */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* proc handler for /proc/sys/kernel/nmi
|
||||
*/
|
||||
int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
|
||||
void __user *buffer, size_t *length, loff_t *ppos)
|
||||
{
|
||||
int old_state;
|
||||
|
||||
nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
|
||||
old_state = nmi_watchdog_enabled;
|
||||
proc_dointvec(table, write, file, buffer, length, ppos);
|
||||
if (!!old_state == !!nmi_watchdog_enabled)
|
||||
return 0;
|
||||
|
||||
if (atomic_read(&nmi_active) < 0 || nmi_watchdog == NMI_DISABLED) {
|
||||
printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/* if nmi_watchdog is not set yet, then set it */
|
||||
nmi_watchdog_default();
|
||||
|
||||
if (nmi_watchdog == NMI_LOCAL_APIC) {
|
||||
if (nmi_watchdog_enabled)
|
||||
enable_lapic_nmi_watchdog();
|
||||
else
|
||||
disable_lapic_nmi_watchdog();
|
||||
} else {
|
||||
printk( KERN_WARNING
|
||||
"NMI watchdog doesn't know what hardware to touch\n");
|
||||
return -EIO;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int do_nmi_callback(struct pt_regs *regs, int cpu)
|
||||
{
|
||||
#ifdef CONFIG_SYSCTL
|
||||
if (unknown_nmi_panic)
|
||||
return unknown_nmi_panic_callback(regs, cpu);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __trigger_all_cpu_backtrace(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
backtrace_mask = cpu_online_map;
|
||||
/* Wait for up to 10 seconds for all CPUs to do the backtrace */
|
||||
for (i = 0; i < 10 * 1000; i++) {
|
||||
if (cpus_empty(backtrace_mask))
|
||||
break;
|
||||
mdelay(1);
|
||||
}
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(nmi_active);
|
||||
EXPORT_SYMBOL(nmi_watchdog);
|
|
@ -31,6 +31,8 @@
|
|||
#include <asm/numaq.h>
|
||||
#include <asm/topology.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/mpspec.h>
|
||||
#include <asm/e820.h>
|
||||
|
||||
#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
|
||||
|
||||
|
@ -58,6 +60,8 @@ static void __init smp_dump_qct(void)
|
|||
node_end_pfn[node] = MB_TO_PAGES(
|
||||
eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
|
||||
|
||||
e820_register_active_regions(node, node_start_pfn[node],
|
||||
node_end_pfn[node]);
|
||||
memory_present(node,
|
||||
node_start_pfn[node], node_end_pfn[node]);
|
||||
node_remap_size[node] = node_memmap_size_bytes(node,
|
||||
|
@ -67,23 +71,35 @@ static void __init smp_dump_qct(void)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Unlike Summit, we don't really care to let the NUMA-Q
|
||||
* fall back to flat mode. Don't compile for NUMA-Q
|
||||
* unless you really need it!
|
||||
*/
|
||||
static __init void early_check_numaq(void)
|
||||
{
|
||||
/*
|
||||
* Find possible boot-time SMP configuration:
|
||||
*/
|
||||
early_find_smp_config();
|
||||
/*
|
||||
* get boot-time SMP configuration:
|
||||
*/
|
||||
if (smp_found_config)
|
||||
early_get_smp_config();
|
||||
}
|
||||
|
||||
int __init get_memcfg_numaq(void)
|
||||
{
|
||||
early_check_numaq();
|
||||
if (!found_numaq)
|
||||
return 0;
|
||||
smp_dump_qct();
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int __init numaq_tsc_disable(void)
|
||||
void __init numaq_tsc_disable(void)
|
||||
{
|
||||
if (!found_numaq)
|
||||
return -1;
|
||||
|
||||
if (num_online_nodes() > 1) {
|
||||
printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
|
||||
setup_clear_cpu_cap(X86_FEATURE_TSC);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
arch_initcall(numaq_tsc_disable);
|
||||
|
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче