Pull release into acpica branch
This commit is contained in:
Коммит
292dd876ee
2
CREDITS
2
CREDITS
|
@ -3203,7 +3203,7 @@ N: Eugene Surovegin
|
|||
E: ebs@ebshome.net
|
||||
W: http://kernel.ebshome.net/
|
||||
P: 1024D/AE5467F1 FF22 39F1 6728 89F6 6E6C 2365 7602 F33D AE54 67F1
|
||||
D: Embedded PowerPC 4xx: I2C, PIC and random hacks/fixes
|
||||
D: Embedded PowerPC 4xx: EMAC, I2C, PIC and random hacks/fixes
|
||||
S: Sunnyvale, California 94085
|
||||
S: USA
|
||||
|
||||
|
|
|
@ -31,8 +31,6 @@ al espa
|
|||
Eine deutsche Version dieser Datei finden Sie unter
|
||||
<http://www.stefan-winter.de/Changes-2.4.0.txt>.
|
||||
|
||||
Last updated: October 29th, 2002
|
||||
|
||||
Chris Ricker (kaboom@gatech.edu or chris.ricker@genetics.utah.edu).
|
||||
|
||||
Current Minimal Requirements
|
||||
|
@ -48,7 +46,7 @@ necessary on all systems; obviously, if you don't have any ISDN
|
|||
hardware, for example, you probably needn't concern yourself with
|
||||
isdn4k-utils.
|
||||
|
||||
o Gnu C 2.95.3 # gcc --version
|
||||
o Gnu C 3.2 # gcc --version
|
||||
o Gnu make 3.79.1 # make --version
|
||||
o binutils 2.12 # ld -v
|
||||
o util-linux 2.10o # fdformat --version
|
||||
|
@ -74,26 +72,7 @@ GCC
|
|||
---
|
||||
|
||||
The gcc version requirements may vary depending on the type of CPU in your
|
||||
computer. The next paragraph applies to users of x86 CPUs, but not
|
||||
necessarily to users of other CPUs. Users of other CPUs should obtain
|
||||
information about their gcc version requirements from another source.
|
||||
|
||||
The recommended compiler for the kernel is gcc 2.95.x (x >= 3), and it
|
||||
should be used when you need absolute stability. You may use gcc 3.0.x
|
||||
instead if you wish, although it may cause problems. Later versions of gcc
|
||||
have not received much testing for Linux kernel compilation, and there are
|
||||
almost certainly bugs (mainly, but not exclusively, in the kernel) that
|
||||
will need to be fixed in order to use these compilers. In any case, using
|
||||
pgcc instead of plain gcc is just asking for trouble.
|
||||
|
||||
The Red Hat gcc 2.96 compiler subtree can also be used to build this tree.
|
||||
You should ensure you use gcc-2.96-74 or later. gcc-2.96-54 will not build
|
||||
the kernel correctly.
|
||||
|
||||
In addition, please pay attention to compiler optimization. Anything
|
||||
greater than -O2 may not be wise. Similarly, if you choose to use gcc-2.95.x
|
||||
or derivatives, be sure not to use -fstrict-aliasing (which, depending on
|
||||
your version of gcc 2.95.x, may necessitate using -fno-strict-aliasing).
|
||||
computer.
|
||||
|
||||
Make
|
||||
----
|
||||
|
@ -322,9 +301,9 @@ Getting updated software
|
|||
Kernel compilation
|
||||
******************
|
||||
|
||||
gcc 2.95.3
|
||||
----------
|
||||
o <ftp://ftp.gnu.org/gnu/gcc/gcc-2.95.3.tar.gz>
|
||||
gcc
|
||||
---
|
||||
o <ftp://ftp.gnu.org/gnu/gcc/>
|
||||
|
||||
Make
|
||||
----
|
||||
|
|
|
@ -199,7 +199,7 @@ The rationale is:
|
|||
modifications are prevented
|
||||
- saves the compiler work to optimize redundant code away ;)
|
||||
|
||||
int fun(int )
|
||||
int fun(int a)
|
||||
{
|
||||
int result = 0;
|
||||
char *buffer = kmalloc(SIZE);
|
||||
|
@ -344,7 +344,7 @@ Remember: if another thread can find your data structure, and you don't
|
|||
have a reference count on it, you almost certainly have a bug.
|
||||
|
||||
|
||||
Chapter 11: Macros, Enums, Inline functions and RTL
|
||||
Chapter 11: Macros, Enums and RTL
|
||||
|
||||
Names of macros defining constants and labels in enums are capitalized.
|
||||
|
||||
|
@ -429,7 +429,35 @@ from void pointer to any other pointer type is guaranteed by the C programming
|
|||
language.
|
||||
|
||||
|
||||
Chapter 14: References
|
||||
Chapter 14: The inline disease
|
||||
|
||||
There appears to be a common misperception that gcc has a magic "make me
|
||||
faster" speedup option called "inline". While the use of inlines can be
|
||||
appropriate (for example as a means of replacing macros, see Chapter 11), it
|
||||
very often is not. Abundant use of the inline keyword leads to a much bigger
|
||||
kernel, which in turn slows the system as a whole down, due to a bigger
|
||||
icache footprint for the CPU and simply because there is less memory
|
||||
available for the pagecache. Just think about it; a pagecache miss causes a
|
||||
disk seek, which easily takes 5 miliseconds. There are a LOT of cpu cycles
|
||||
that can go into these 5 miliseconds.
|
||||
|
||||
A reasonable rule of thumb is to not put inline at functions that have more
|
||||
than 3 lines of code in them. An exception to this rule are the cases where
|
||||
a parameter is known to be a compiletime constant, and as a result of this
|
||||
constantness you *know* the compiler will be able to optimize most of your
|
||||
function away at compile time. For a good example of this later case, see
|
||||
the kmalloc() inline function.
|
||||
|
||||
Often people argue that adding inline to functions that are static and used
|
||||
only once is always a win since there is no space tradeoff. While this is
|
||||
technically correct, gcc is capable of inlining these automatically without
|
||||
help, and the maintenance issue of removing the inline when a second user
|
||||
appears outweighs the potential value of the hint that tells gcc to do
|
||||
something it would have done anyway.
|
||||
|
||||
|
||||
|
||||
Chapter 15: References
|
||||
|
||||
The C Programming Language, Second Edition
|
||||
by Brian W. Kernighan and Dennis M. Ritchie.
|
||||
|
@ -444,10 +472,13 @@ ISBN 0-201-61586-X.
|
|||
URL: http://cm.bell-labs.com/cm/cs/tpop/
|
||||
|
||||
GNU manuals - where in compliance with K&R and this text - for cpp, gcc,
|
||||
gcc internals and indent, all available from http://www.gnu.org
|
||||
gcc internals and indent, all available from http://www.gnu.org/manual/
|
||||
|
||||
WG14 is the international standardization working group for the programming
|
||||
language C, URL: http://std.dkuug.dk/JTC1/SC22/WG14/
|
||||
language C, URL: http://www.open-std.org/JTC1/SC22/WG14/
|
||||
|
||||
Kernel CodingStyle, by greg@kroah.com at OLS 2002:
|
||||
http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/
|
||||
|
||||
--
|
||||
Last updated on 16 February 2004 by a community effort on LKML.
|
||||
Last updated on 30 December 2005 by a community effort on LKML.
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
*.xml
|
||||
*.ps
|
||||
*.pdf
|
||||
*.html
|
||||
*.9.gz
|
||||
*.9
|
|
@ -53,6 +53,11 @@
|
|||
!Iinclude/linux/sched.h
|
||||
!Ekernel/sched.c
|
||||
!Ekernel/timer.c
|
||||
</sect1>
|
||||
<sect1><title>High-resolution timers</title>
|
||||
!Iinclude/linux/ktime.h
|
||||
!Iinclude/linux/hrtimer.h
|
||||
!Ekernel/hrtimer.c
|
||||
</sect1>
|
||||
<sect1><title>Internal Functions</title>
|
||||
!Ikernel/exit.c
|
||||
|
@ -369,6 +374,7 @@ X!Edrivers/acpi/motherboard.c
|
|||
X!Edrivers/acpi/bus.c
|
||||
-->
|
||||
!Edrivers/acpi/scan.c
|
||||
!Idrivers/acpi/scan.c
|
||||
<!-- No correct structured comments
|
||||
X!Edrivers/acpi/pci_bind.c
|
||||
-->
|
||||
|
|
|
@ -222,7 +222,7 @@
|
|||
<title>Two Main Types of Kernel Locks: Spinlocks and Semaphores</title>
|
||||
|
||||
<para>
|
||||
There are two main types of kernel locks. The fundamental type
|
||||
There are three main types of kernel locks. The fundamental type
|
||||
is the spinlock
|
||||
(<filename class="headerfile">include/asm/spinlock.h</filename>),
|
||||
which is a very simple single-holder lock: if you can't get the
|
||||
|
@ -230,16 +230,22 @@
|
|||
very small and fast, and can be used anywhere.
|
||||
</para>
|
||||
<para>
|
||||
The second type is a semaphore
|
||||
The second type is a mutex
|
||||
(<filename class="headerfile">include/linux/mutex.h</filename>): it
|
||||
is like a spinlock, but you may block holding a mutex.
|
||||
If you can't lock a mutex, your task will suspend itself, and be woken
|
||||
up when the mutex is released. This means the CPU can do something
|
||||
else while you are waiting. There are many cases when you simply
|
||||
can't sleep (see <xref linkend="sleeping-things"/>), and so have to
|
||||
use a spinlock instead.
|
||||
</para>
|
||||
<para>
|
||||
The third type is a semaphore
|
||||
(<filename class="headerfile">include/asm/semaphore.h</filename>): it
|
||||
can have more than one holder at any time (the number decided at
|
||||
initialization time), although it is most commonly used as a
|
||||
single-holder lock (a mutex). If you can't get a semaphore,
|
||||
your task will put itself on the queue, and be woken up when the
|
||||
semaphore is released. This means the CPU will do something
|
||||
else while you are waiting, but there are many cases when you
|
||||
simply can't sleep (see <xref linkend="sleeping-things"/>), and so
|
||||
have to use a spinlock instead.
|
||||
single-holder lock (a mutex). If you can't get a semaphore, your
|
||||
task will be suspended and later on woken up - just like for mutexes.
|
||||
</para>
|
||||
<para>
|
||||
Neither type of lock is recursive: see
|
||||
|
|
|
@ -229,7 +229,7 @@ int __init myradio_init(struct video_init *v)
|
|||
|
||||
static int users = 0;
|
||||
|
||||
static int radio_open(stuct video_device *dev, int flags)
|
||||
static int radio_open(struct video_device *dev, int flags)
|
||||
{
|
||||
if(users)
|
||||
return -EBUSY;
|
||||
|
@ -949,7 +949,7 @@ int __init mycamera_init(struct video_init *v)
|
|||
|
||||
static int users = 0;
|
||||
|
||||
static int camera_open(stuct video_device *dev, int flags)
|
||||
static int camera_open(struct video_device *dev, int flags)
|
||||
{
|
||||
if(users)
|
||||
return -EBUSY;
|
||||
|
|
|
@ -1,74 +1,67 @@
|
|||
Refcounter framework for elements of lists/arrays protected by
|
||||
RCU.
|
||||
Refcounter design for elements of lists/arrays protected by RCU.
|
||||
|
||||
Refcounting on elements of lists which are protected by traditional
|
||||
reader/writer spinlocks or semaphores are straight forward as in:
|
||||
|
||||
1. 2.
|
||||
add() search_and_reference()
|
||||
{ {
|
||||
alloc_object read_lock(&list_lock);
|
||||
... search_for_element
|
||||
atomic_set(&el->rc, 1); atomic_inc(&el->rc);
|
||||
write_lock(&list_lock); ...
|
||||
add_element read_unlock(&list_lock);
|
||||
... ...
|
||||
write_unlock(&list_lock); }
|
||||
1. 2.
|
||||
add() search_and_reference()
|
||||
{ {
|
||||
alloc_object read_lock(&list_lock);
|
||||
... search_for_element
|
||||
atomic_set(&el->rc, 1); atomic_inc(&el->rc);
|
||||
write_lock(&list_lock); ...
|
||||
add_element read_unlock(&list_lock);
|
||||
... ...
|
||||
write_unlock(&list_lock); }
|
||||
}
|
||||
|
||||
3. 4.
|
||||
release_referenced() delete()
|
||||
{ {
|
||||
... write_lock(&list_lock);
|
||||
atomic_dec(&el->rc, relfunc) ...
|
||||
... delete_element
|
||||
} write_unlock(&list_lock);
|
||||
...
|
||||
if (atomic_dec_and_test(&el->rc))
|
||||
kfree(el);
|
||||
...
|
||||
... write_lock(&list_lock);
|
||||
atomic_dec(&el->rc, relfunc) ...
|
||||
... delete_element
|
||||
} write_unlock(&list_lock);
|
||||
...
|
||||
if (atomic_dec_and_test(&el->rc))
|
||||
kfree(el);
|
||||
...
|
||||
}
|
||||
|
||||
If this list/array is made lock free using rcu as in changing the
|
||||
write_lock in add() and delete() to spin_lock and changing read_lock
|
||||
in search_and_reference to rcu_read_lock(), the rcuref_get in
|
||||
in search_and_reference to rcu_read_lock(), the atomic_get in
|
||||
search_and_reference could potentially hold reference to an element which
|
||||
has already been deleted from the list/array. rcuref_lf_get_rcu takes
|
||||
has already been deleted from the list/array. atomic_inc_not_zero takes
|
||||
care of this scenario. search_and_reference should look as;
|
||||
|
||||
1. 2.
|
||||
add() search_and_reference()
|
||||
{ {
|
||||
alloc_object rcu_read_lock();
|
||||
... search_for_element
|
||||
atomic_set(&el->rc, 1); if (rcuref_inc_lf(&el->rc)) {
|
||||
write_lock(&list_lock); rcu_read_unlock();
|
||||
return FAIL;
|
||||
add_element }
|
||||
... ...
|
||||
write_unlock(&list_lock); rcu_read_unlock();
|
||||
alloc_object rcu_read_lock();
|
||||
... search_for_element
|
||||
atomic_set(&el->rc, 1); if (atomic_inc_not_zero(&el->rc)) {
|
||||
write_lock(&list_lock); rcu_read_unlock();
|
||||
return FAIL;
|
||||
add_element }
|
||||
... ...
|
||||
write_unlock(&list_lock); rcu_read_unlock();
|
||||
} }
|
||||
3. 4.
|
||||
release_referenced() delete()
|
||||
{ {
|
||||
... write_lock(&list_lock);
|
||||
rcuref_dec(&el->rc, relfunc) ...
|
||||
... delete_element
|
||||
} write_unlock(&list_lock);
|
||||
...
|
||||
if (rcuref_dec_and_test(&el->rc))
|
||||
call_rcu(&el->head, el_free);
|
||||
...
|
||||
... write_lock(&list_lock);
|
||||
atomic_dec(&el->rc, relfunc) ...
|
||||
... delete_element
|
||||
} write_unlock(&list_lock);
|
||||
...
|
||||
if (atomic_dec_and_test(&el->rc))
|
||||
call_rcu(&el->head, el_free);
|
||||
...
|
||||
}
|
||||
|
||||
Sometimes, reference to the element need to be obtained in the
|
||||
update (write) stream. In such cases, rcuref_inc_lf might be an overkill
|
||||
since the spinlock serialising list updates are held. rcuref_inc
|
||||
update (write) stream. In such cases, atomic_inc_not_zero might be an
|
||||
overkill since the spinlock serialising list updates are held. atomic_inc
|
||||
is to be used in such cases.
|
||||
For arches which do not have cmpxchg rcuref_inc_lf
|
||||
api uses a hashed spinlock implementation and the same hashed spinlock
|
||||
is acquired in all rcuref_xxx primitives to preserve atomicity.
|
||||
Note: Use rcuref_inc api only if you need to use rcuref_inc_lf on the
|
||||
refcounter atleast at one place. Mixing rcuref_inc and atomic_xxx api
|
||||
might lead to races. rcuref_inc_lf() must be used in lockfree
|
||||
RCU critical sections only.
|
||||
|
||||
|
|
|
@ -27,18 +27,17 @@ Who To Submit Drivers To
|
|||
------------------------
|
||||
|
||||
Linux 2.0:
|
||||
No new drivers are accepted for this kernel tree
|
||||
No new drivers are accepted for this kernel tree.
|
||||
|
||||
Linux 2.2:
|
||||
No new drivers are accepted for this kernel tree.
|
||||
|
||||
Linux 2.4:
|
||||
If the code area has a general maintainer then please submit it to
|
||||
the maintainer listed in MAINTAINERS in the kernel file. If the
|
||||
maintainer does not respond or you cannot find the appropriate
|
||||
maintainer then please contact the 2.2 kernel maintainer:
|
||||
Marc-Christian Petersen <m.c.p@wolk-project.de>.
|
||||
|
||||
Linux 2.4:
|
||||
The same rules apply as 2.2. The final contact point for Linux 2.4
|
||||
submissions is Marcelo Tosatti <marcelo.tosatti@cyclades.com>.
|
||||
maintainer then please contact Marcelo Tosatti
|
||||
<marcelo.tosatti@cyclades.com>.
|
||||
|
||||
Linux 2.6:
|
||||
The same rules apply as 2.4 except that you should follow linux-kernel
|
||||
|
@ -53,6 +52,7 @@ Licensing: The code must be released to us under the
|
|||
of exclusive GPL licensing, and if you wish the driver
|
||||
to be useful to other communities such as BSD you may well
|
||||
wish to release under multiple licenses.
|
||||
See accepted licenses at include/linux/module.h
|
||||
|
||||
Copyright: The copyright owner must agree to use of GPL.
|
||||
It's best if the submitter and copyright owner
|
||||
|
@ -143,5 +143,13 @@ KernelNewbies:
|
|||
http://kernelnewbies.org/
|
||||
|
||||
Linux USB project:
|
||||
http://sourceforge.net/projects/linux-usb/
|
||||
http://www.linux-usb.org/
|
||||
|
||||
How to NOT write kernel driver by arjanv@redhat.com
|
||||
http://people.redhat.com/arjanv/olspaper.pdf
|
||||
|
||||
Kernel Janitor:
|
||||
http://janitor.kernelnewbies.org/
|
||||
|
||||
--
|
||||
Last updated on 17 Nov 2005.
|
||||
|
|
|
@ -78,7 +78,9 @@ Randy Dunlap's patch scripts:
|
|||
http://www.xenotime.net/linux/scripts/patching-scripts-002.tar.gz
|
||||
|
||||
Andrew Morton's patch scripts:
|
||||
http://www.zip.com.au/~akpm/linux/patches/patch-scripts-0.20
|
||||
http://www.zip.com.au/~akpm/linux/patches/
|
||||
Instead of these scripts, quilt is the recommended patch management
|
||||
tool (see above).
|
||||
|
||||
|
||||
|
||||
|
@ -97,7 +99,7 @@ need to split up your patch. See #3, next.
|
|||
|
||||
3) Separate your changes.
|
||||
|
||||
Separate each logical change into its own patch.
|
||||
Separate _logical changes_ into a single patch file.
|
||||
|
||||
For example, if your changes include both bug fixes and performance
|
||||
enhancements for a single driver, separate those changes into two
|
||||
|
@ -112,6 +114,10 @@ If one patch depends on another patch in order for a change to be
|
|||
complete, that is OK. Simply note "this patch depends on patch X"
|
||||
in your patch description.
|
||||
|
||||
If you cannot condense your patch set into a smaller set of patches,
|
||||
then only post say 15 or so at a time and wait for review and integration.
|
||||
|
||||
|
||||
|
||||
4) Select e-mail destination.
|
||||
|
||||
|
@ -124,6 +130,10 @@ your patch to the primary Linux kernel developer's mailing list,
|
|||
linux-kernel@vger.kernel.org. Most kernel developers monitor this
|
||||
e-mail list, and can comment on your changes.
|
||||
|
||||
|
||||
Do not send more than 15 patches at once to the vger mailing lists!!!
|
||||
|
||||
|
||||
Linus Torvalds is the final arbiter of all changes accepted into the
|
||||
Linux kernel. His e-mail address is <torvalds@osdl.org>. He gets
|
||||
a lot of e-mail, so typically you should do your best to -avoid- sending
|
||||
|
@ -149,6 +159,9 @@ USB, framebuffer devices, the VFS, the SCSI subsystem, etc. See the
|
|||
MAINTAINERS file for a mailing list that relates specifically to
|
||||
your change.
|
||||
|
||||
Majordomo lists of VGER.KERNEL.ORG at:
|
||||
<http://vger.kernel.org/vger-lists.html>
|
||||
|
||||
If changes affect userland-kernel interfaces, please send
|
||||
the MAN-PAGES maintainer (as listed in the MAINTAINERS file)
|
||||
a man-pages patch, or at least a notification of the change,
|
||||
|
@ -373,27 +386,14 @@ a diffstat, to show what files have changed, and the number of inserted
|
|||
and deleted lines per file. A diffstat is especially useful on bigger
|
||||
patches. Other comments relevant only to the moment or the maintainer,
|
||||
not suitable for the permanent changelog, should also go here.
|
||||
Use diffstat options "-p 1 -w 70" so that filenames are listed from the
|
||||
top of the kernel source tree and don't use too much horizontal space
|
||||
(easily fit in 80 columns, maybe with some indentation).
|
||||
|
||||
See more details on the proper patch format in the following
|
||||
references.
|
||||
|
||||
|
||||
13) More references for submitting patches
|
||||
|
||||
Andrew Morton, "The perfect patch" (tpp).
|
||||
<http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt>
|
||||
|
||||
Jeff Garzik, "Linux kernel patch submission format."
|
||||
<http://linux.yyz.us/patch-format.html>
|
||||
|
||||
Greg KH, "How to piss off a kernel subsystem maintainer"
|
||||
<http://www.kroah.com/log/2005/03/31/>
|
||||
|
||||
Kernel Documentation/CodingStyle
|
||||
<http://sosdg.org/~coywolf/lxr/source/Documentation/CodingStyle>
|
||||
|
||||
Linus Torvald's mail on the canonical patch format:
|
||||
<http://lkml.org/lkml/2005/4/7/183>
|
||||
|
||||
|
||||
-----------------------------------
|
||||
|
@ -466,3 +466,31 @@ and 'extern __inline__'.
|
|||
Don't try to anticipate nebulous future cases which may or may not
|
||||
be useful: "Make it as simple as you can, and no simpler."
|
||||
|
||||
|
||||
|
||||
----------------------
|
||||
SECTION 3 - REFERENCES
|
||||
----------------------
|
||||
|
||||
Andrew Morton, "The perfect patch" (tpp).
|
||||
<http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt>
|
||||
|
||||
Jeff Garzik, "Linux kernel patch submission format."
|
||||
<http://linux.yyz.us/patch-format.html>
|
||||
|
||||
Greg Kroah-Hartman "How to piss off a kernel subsystem maintainer".
|
||||
<http://www.kroah.com/log/2005/03/31/>
|
||||
<http://www.kroah.com/log/2005/07/08/>
|
||||
<http://www.kroah.com/log/2005/10/19/>
|
||||
<http://www.kroah.com/log/2006/01/11/>
|
||||
|
||||
NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people!.
|
||||
<http://marc.theaimsgroup.com/?l=linux-kernel&m=112112749912944&w=2>
|
||||
|
||||
Kernel Documentation/CodingStyle
|
||||
<http://sosdg.org/~coywolf/lxr/source/Documentation/CodingStyle>
|
||||
|
||||
Linus Torvald's mail on the canonical patch format:
|
||||
<http://lkml.org/lkml/2005/4/7/183>
|
||||
--
|
||||
Last updated on 17 Nov 2005.
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
Applying Patches To The Linux Kernel
|
||||
------------------------------------
|
||||
|
||||
(Written by Jesper Juhl, August 2005)
|
||||
|
||||
Original by: Jesper Juhl, August 2005
|
||||
Last update: 2006-01-05
|
||||
|
||||
|
||||
A frequently asked question on the Linux Kernel Mailing List is how to apply
|
||||
|
@ -76,7 +76,7 @@ instead:
|
|||
|
||||
If you wish to uncompress the patch file by hand first before applying it
|
||||
(what I assume you've done in the examples below), then you simply run
|
||||
gunzip or bunzip2 on the file - like this:
|
||||
gunzip or bunzip2 on the file -- like this:
|
||||
gunzip patch-x.y.z.gz
|
||||
bunzip2 patch-x.y.z.bz2
|
||||
|
||||
|
@ -94,7 +94,7 @@ Common errors when patching
|
|||
---
|
||||
When patch applies a patch file it attempts to verify the sanity of the
|
||||
file in different ways.
|
||||
Checking that the file looks like a valid patch file, checking the code
|
||||
Checking that the file looks like a valid patch file & checking the code
|
||||
around the bits being modified matches the context provided in the patch are
|
||||
just two of the basic sanity checks patch does.
|
||||
|
||||
|
@ -118,16 +118,16 @@ wrong.
|
|||
|
||||
When patch encounters a change that it can't fix up with fuzz it rejects it
|
||||
outright and leaves a file with a .rej extension (a reject file). You can
|
||||
read this file to see exactely what change couldn't be applied, so you can
|
||||
read this file to see exactly what change couldn't be applied, so you can
|
||||
go fix it up by hand if you wish.
|
||||
|
||||
If you don't have any third party patches applied to your kernel source, but
|
||||
If you don't have any third-party patches applied to your kernel source, but
|
||||
only patches from kernel.org and you apply the patches in the correct order,
|
||||
and have made no modifications yourself to the source files, then you should
|
||||
never see a fuzz or reject message from patch. If you do see such messages
|
||||
anyway, then there's a high risk that either your local source tree or the
|
||||
patch file is corrupted in some way. In that case you should probably try
|
||||
redownloading the patch and if things are still not OK then you'd be advised
|
||||
re-downloading the patch and if things are still not OK then you'd be advised
|
||||
to start with a fresh tree downloaded in full from kernel.org.
|
||||
|
||||
Let's look a bit more at some of the messages patch can produce.
|
||||
|
@ -136,7 +136,7 @@ If patch stops and presents a "File to patch:" prompt, then patch could not
|
|||
find a file to be patched. Most likely you forgot to specify -p1 or you are
|
||||
in the wrong directory. Less often, you'll find patches that need to be
|
||||
applied with -p0 instead of -p1 (reading the patch file should reveal if
|
||||
this is the case - if so, then this is an error by the person who created
|
||||
this is the case -- if so, then this is an error by the person who created
|
||||
the patch but is not fatal).
|
||||
|
||||
If you get "Hunk #2 succeeded at 1887 with fuzz 2 (offset 7 lines)." or a
|
||||
|
@ -167,22 +167,28 @@ the patch will in fact apply it.
|
|||
|
||||
A message similar to "patch: **** unexpected end of file in patch" or "patch
|
||||
unexpectedly ends in middle of line" means that patch could make no sense of
|
||||
the file you fed to it. Either your download is broken or you tried to feed
|
||||
patch a compressed patch file without uncompressing it first.
|
||||
the file you fed to it. Either your download is broken, you tried to feed
|
||||
patch a compressed patch file without uncompressing it first, or the patch
|
||||
file that you are using has been mangled by a mail client or mail transfer
|
||||
agent along the way somewhere, e.g., by splitting a long line into two lines.
|
||||
Often these warnings can easily be fixed by joining (concatenating) the
|
||||
two lines that had been split.
|
||||
|
||||
As I already mentioned above, these errors should never happen if you apply
|
||||
a patch from kernel.org to the correct version of an unmodified source tree.
|
||||
So if you get these errors with kernel.org patches then you should probably
|
||||
assume that either your patch file or your tree is broken and I'd advice you
|
||||
assume that either your patch file or your tree is broken and I'd advise you
|
||||
to start over with a fresh download of a full kernel tree and the patch you
|
||||
wish to apply.
|
||||
|
||||
|
||||
Are there any alternatives to `patch'?
|
||||
---
|
||||
Yes there are alternatives. You can use the `interdiff' program
|
||||
(http://cyberelk.net/tim/patchutils/) to generate a patch representing the
|
||||
differences between two patches and then apply the result.
|
||||
Yes there are alternatives.
|
||||
|
||||
You can use the `interdiff' program (http://cyberelk.net/tim/patchutils/) to
|
||||
generate a patch representing the differences between two patches and then
|
||||
apply the result.
|
||||
This will let you move from something like 2.6.12.2 to 2.6.12.3 in a single
|
||||
step. The -z flag to interdiff will even let you feed it patches in gzip or
|
||||
bzip2 compressed form directly without the use of zcat or bzcat or manual
|
||||
|
@ -197,10 +203,10 @@ do the additional steps since interdiff can get things wrong in some cases.
|
|||
Another alternative is `ketchup', which is a python script for automatic
|
||||
downloading and applying of patches (http://www.selenic.com/ketchup/).
|
||||
|
||||
Other nice tools are diffstat which shows a summary of changes made by a
|
||||
patch, lsdiff which displays a short listing of affected files in a patch
|
||||
file, along with (optionally) the line numbers of the start of each patch
|
||||
and grepdiff which displays a list of the files modified by a patch where
|
||||
Other nice tools are diffstat, which shows a summary of changes made by a
|
||||
patch; lsdiff, which displays a short listing of affected files in a patch
|
||||
file, along with (optionally) the line numbers of the start of each patch;
|
||||
and grepdiff, which displays a list of the files modified by a patch where
|
||||
the patch contains a given regular expression.
|
||||
|
||||
|
||||
|
@ -225,8 +231,8 @@ The -mm kernels live at
|
|||
In place of ftp.kernel.org you can use ftp.cc.kernel.org, where cc is a
|
||||
country code. This way you'll be downloading from a mirror site that's most
|
||||
likely geographically closer to you, resulting in faster downloads for you,
|
||||
less bandwidth used globally and less load on the main kernel.org servers -
|
||||
these are good things, do use mirrors when possible.
|
||||
less bandwidth used globally and less load on the main kernel.org servers --
|
||||
these are good things, so do use mirrors when possible.
|
||||
|
||||
|
||||
The 2.6.x kernels
|
||||
|
@ -234,14 +240,14 @@ The 2.6.x kernels
|
|||
These are the base stable releases released by Linus. The highest numbered
|
||||
release is the most recent.
|
||||
|
||||
If regressions or other serious flaws are found then a -stable fix patch
|
||||
If regressions or other serious flaws are found, then a -stable fix patch
|
||||
will be released (see below) on top of this base. Once a new 2.6.x base
|
||||
kernel is released, a patch is made available that is a delta between the
|
||||
previous 2.6.x kernel and the new one.
|
||||
|
||||
To apply a patch moving from 2.6.11 to 2.6.12 you'd do the following (note
|
||||
To apply a patch moving from 2.6.11 to 2.6.12, you'd do the following (note
|
||||
that such patches do *NOT* apply on top of 2.6.x.y kernels but on top of the
|
||||
base 2.6.x kernel - if you need to move from 2.6.x.y to 2.6.x+1 you need to
|
||||
base 2.6.x kernel -- if you need to move from 2.6.x.y to 2.6.x+1 you need to
|
||||
first revert the 2.6.x.y patch).
|
||||
|
||||
Here are some examples:
|
||||
|
@ -258,12 +264,12 @@ $ patch -p1 -R < ../patch-2.6.11.1 # revert the 2.6.11.1 patch
|
|||
# source dir is now 2.6.11
|
||||
$ patch -p1 < ../patch-2.6.12 # apply new 2.6.12 patch
|
||||
$ cd ..
|
||||
$ mv linux-2.6.11.1 inux-2.6.12 # rename source dir
|
||||
$ mv linux-2.6.11.1 linux-2.6.12 # rename source dir
|
||||
|
||||
|
||||
The 2.6.x.y kernels
|
||||
---
|
||||
Kernels with 4 digit versions are -stable kernels. They contain small(ish)
|
||||
Kernels with 4-digit versions are -stable kernels. They contain small(ish)
|
||||
critical fixes for security problems or significant regressions discovered
|
||||
in a given 2.6.x kernel.
|
||||
|
||||
|
@ -274,9 +280,14 @@ versions.
|
|||
If no 2.6.x.y kernel is available, then the highest numbered 2.6.x kernel is
|
||||
the current stable kernel.
|
||||
|
||||
note: the -stable team usually do make incremental patches available as well
|
||||
as patches against the latest mainline release, but I only cover the
|
||||
non-incremental ones below. The incremental ones can be found at
|
||||
ftp://ftp.kernel.org/pub/linux/kernel/v2.6/incr/
|
||||
|
||||
These patches are not incremental, meaning that for example the 2.6.12.3
|
||||
patch does not apply on top of the 2.6.12.2 kernel source, but rather on top
|
||||
of the base 2.6.12 kernel source.
|
||||
of the base 2.6.12 kernel source .
|
||||
So, in order to apply the 2.6.12.3 patch to your existing 2.6.12.2 kernel
|
||||
source you have to first back out the 2.6.12.2 patch (so you are left with a
|
||||
base 2.6.12 kernel source) and then apply the new 2.6.12.3 patch.
|
||||
|
@ -342,12 +353,12 @@ The -git kernels
|
|||
repository, hence the name).
|
||||
|
||||
These patches are usually released daily and represent the current state of
|
||||
Linus' tree. They are more experimental than -rc kernels since they are
|
||||
Linus's tree. They are more experimental than -rc kernels since they are
|
||||
generated automatically without even a cursory glance to see if they are
|
||||
sane.
|
||||
|
||||
-git patches are not incremental and apply either to a base 2.6.x kernel or
|
||||
a base 2.6.x-rc kernel - you can see which from their name.
|
||||
a base 2.6.x-rc kernel -- you can see which from their name.
|
||||
A patch named 2.6.12-git1 applies to the 2.6.12 kernel source and a patch
|
||||
named 2.6.13-rc3-git2 applies to the source of the 2.6.13-rc3 kernel.
|
||||
|
||||
|
@ -390,12 +401,12 @@ You should generally strive to get your patches into mainline via -mm to
|
|||
ensure maximum testing.
|
||||
|
||||
This branch is in constant flux and contains many experimental features, a
|
||||
lot of debugging patches not appropriate for mainline etc and is the most
|
||||
lot of debugging patches not appropriate for mainline etc., and is the most
|
||||
experimental of the branches described in this document.
|
||||
|
||||
These kernels are not appropriate for use on systems that are supposed to be
|
||||
stable and they are more risky to run than any of the other branches (make
|
||||
sure you have up-to-date backups - that goes for any experimental kernel but
|
||||
sure you have up-to-date backups -- that goes for any experimental kernel but
|
||||
even more so for -mm kernels).
|
||||
|
||||
These kernels in addition to all the other experimental patches they contain
|
||||
|
@ -433,7 +444,11 @@ $ cd ..
|
|||
$ mv linux-2.6.12-mm1 linux-2.6.13-rc3-mm3 # rename the source dir
|
||||
|
||||
|
||||
This concludes this list of explanations of the various kernel trees and I
|
||||
hope you are now crystal clear on how to apply the various patches and help
|
||||
testing the kernel.
|
||||
This concludes this list of explanations of the various kernel trees.
|
||||
I hope you are now clear on how to apply the various patches and help testing
|
||||
the kernel.
|
||||
|
||||
Thank you's to Randy Dunlap, Rolf Eike Beer, Linus Torvalds, Bodo Eggert,
|
||||
Johannes Stezenbach, Grant Coady, Pavel Machek and others that I may have
|
||||
forgotten for their reviews and contributions to this document.
|
||||
|
||||
|
|
|
@ -0,0 +1,271 @@
|
|||
I/O Barriers
|
||||
============
|
||||
Tejun Heo <htejun@gmail.com>, July 22 2005
|
||||
|
||||
I/O barrier requests are used to guarantee ordering around the barrier
|
||||
requests. Unless you're crazy enough to use disk drives for
|
||||
implementing synchronization constructs (wow, sounds interesting...),
|
||||
the ordering is meaningful only for write requests for things like
|
||||
journal checkpoints. All requests queued before a barrier request
|
||||
must be finished (made it to the physical medium) before the barrier
|
||||
request is started, and all requests queued after the barrier request
|
||||
must be started only after the barrier request is finished (again,
|
||||
made it to the physical medium).
|
||||
|
||||
In other words, I/O barrier requests have the following two properties.
|
||||
|
||||
1. Request ordering
|
||||
|
||||
Requests cannot pass the barrier request. Preceding requests are
|
||||
processed before the barrier and following requests after.
|
||||
|
||||
Depending on what features a drive supports, this can be done in one
|
||||
of the following three ways.
|
||||
|
||||
i. For devices which have queue depth greater than 1 (TCQ devices) and
|
||||
support ordered tags, block layer can just issue the barrier as an
|
||||
ordered request and the lower level driver, controller and drive
|
||||
itself are responsible for making sure that the ordering contraint is
|
||||
met. Most modern SCSI controllers/drives should support this.
|
||||
|
||||
NOTE: SCSI ordered tag isn't currently used due to limitation in the
|
||||
SCSI midlayer, see the following random notes section.
|
||||
|
||||
ii. For devices which have queue depth greater than 1 but don't
|
||||
support ordered tags, block layer ensures that the requests preceding
|
||||
a barrier request finishes before issuing the barrier request. Also,
|
||||
it defers requests following the barrier until the barrier request is
|
||||
finished. Older SCSI controllers/drives and SATA drives fall in this
|
||||
category.
|
||||
|
||||
iii. Devices which have queue depth of 1. This is a degenerate case
|
||||
of ii. Just keeping issue order suffices. Ancient SCSI
|
||||
controllers/drives and IDE drives are in this category.
|
||||
|
||||
2. Forced flushing to physcial medium
|
||||
|
||||
Again, if you're not gonna do synchronization with disk drives (dang,
|
||||
it sounds even more appealing now!), the reason you use I/O barriers
|
||||
is mainly to protect filesystem integrity when power failure or some
|
||||
other events abruptly stop the drive from operating and possibly make
|
||||
the drive lose data in its cache. So, I/O barriers need to guarantee
|
||||
that requests actually get written to non-volatile medium in order.
|
||||
|
||||
There are four cases,
|
||||
|
||||
i. No write-back cache. Keeping requests ordered is enough.
|
||||
|
||||
ii. Write-back cache but no flush operation. There's no way to
|
||||
gurantee physical-medium commit order. This kind of devices can't to
|
||||
I/O barriers.
|
||||
|
||||
iii. Write-back cache and flush operation but no FUA (forced unit
|
||||
access). We need two cache flushes - before and after the barrier
|
||||
request.
|
||||
|
||||
iv. Write-back cache, flush operation and FUA. We still need one
|
||||
flush to make sure requests preceding a barrier are written to medium,
|
||||
but post-barrier flush can be avoided by using FUA write on the
|
||||
barrier itself.
|
||||
|
||||
|
||||
How to support barrier requests in drivers
|
||||
------------------------------------------
|
||||
|
||||
All barrier handling is done inside block layer proper. All low level
|
||||
drivers have to are implementing its prepare_flush_fn and using one
|
||||
the following two functions to indicate what barrier type it supports
|
||||
and how to prepare flush requests. Note that the term 'ordered' is
|
||||
used to indicate the whole sequence of performing barrier requests
|
||||
including draining and flushing.
|
||||
|
||||
typedef void (prepare_flush_fn)(request_queue_t *q, struct request *rq);
|
||||
|
||||
int blk_queue_ordered(request_queue_t *q, unsigned ordered,
|
||||
prepare_flush_fn *prepare_flush_fn,
|
||||
unsigned gfp_mask);
|
||||
|
||||
int blk_queue_ordered_locked(request_queue_t *q, unsigned ordered,
|
||||
prepare_flush_fn *prepare_flush_fn,
|
||||
unsigned gfp_mask);
|
||||
|
||||
The only difference between the two functions is whether or not the
|
||||
caller is holding q->queue_lock on entry. The latter expects the
|
||||
caller is holding the lock.
|
||||
|
||||
@q : the queue in question
|
||||
@ordered : the ordered mode the driver/device supports
|
||||
@prepare_flush_fn : this function should prepare @rq such that it
|
||||
flushes cache to physical medium when executed
|
||||
@gfp_mask : gfp_mask used when allocating data structures
|
||||
for ordered processing
|
||||
|
||||
For example, SCSI disk driver's prepare_flush_fn looks like the
|
||||
following.
|
||||
|
||||
static void sd_prepare_flush(request_queue_t *q, struct request *rq)
|
||||
{
|
||||
memset(rq->cmd, 0, sizeof(rq->cmd));
|
||||
rq->flags |= REQ_BLOCK_PC;
|
||||
rq->timeout = SD_TIMEOUT;
|
||||
rq->cmd[0] = SYNCHRONIZE_CACHE;
|
||||
}
|
||||
|
||||
The following seven ordered modes are supported. The following table
|
||||
shows which mode should be used depending on what features a
|
||||
device/driver supports. In the leftmost column of table,
|
||||
QUEUE_ORDERED_ prefix is omitted from the mode names to save space.
|
||||
|
||||
The table is followed by description of each mode. Note that in the
|
||||
descriptions of QUEUE_ORDERED_DRAIN*, '=>' is used whereas '->' is
|
||||
used for QUEUE_ORDERED_TAG* descriptions. '=>' indicates that the
|
||||
preceding step must be complete before proceeding to the next step.
|
||||
'->' indicates that the next step can start as soon as the previous
|
||||
step is issued.
|
||||
|
||||
write-back cache ordered tag flush FUA
|
||||
-----------------------------------------------------------------------
|
||||
NONE yes/no N/A no N/A
|
||||
DRAIN no no N/A N/A
|
||||
DRAIN_FLUSH yes no yes no
|
||||
DRAIN_FUA yes no yes yes
|
||||
TAG no yes N/A N/A
|
||||
TAG_FLUSH yes yes yes no
|
||||
TAG_FUA yes yes yes yes
|
||||
|
||||
|
||||
QUEUE_ORDERED_NONE
|
||||
I/O barriers are not needed and/or supported.
|
||||
|
||||
Sequence: N/A
|
||||
|
||||
QUEUE_ORDERED_DRAIN
|
||||
Requests are ordered by draining the request queue and cache
|
||||
flushing isn't needed.
|
||||
|
||||
Sequence: drain => barrier
|
||||
|
||||
QUEUE_ORDERED_DRAIN_FLUSH
|
||||
Requests are ordered by draining the request queue and both
|
||||
pre-barrier and post-barrier cache flushings are needed.
|
||||
|
||||
Sequence: drain => preflush => barrier => postflush
|
||||
|
||||
QUEUE_ORDERED_DRAIN_FUA
|
||||
Requests are ordered by draining the request queue and
|
||||
pre-barrier cache flushing is needed. By using FUA on barrier
|
||||
request, post-barrier flushing can be skipped.
|
||||
|
||||
Sequence: drain => preflush => barrier
|
||||
|
||||
QUEUE_ORDERED_TAG
|
||||
Requests are ordered by ordered tag and cache flushing isn't
|
||||
needed.
|
||||
|
||||
Sequence: barrier
|
||||
|
||||
QUEUE_ORDERED_TAG_FLUSH
|
||||
Requests are ordered by ordered tag and both pre-barrier and
|
||||
post-barrier cache flushings are needed.
|
||||
|
||||
Sequence: preflush -> barrier -> postflush
|
||||
|
||||
QUEUE_ORDERED_TAG_FUA
|
||||
Requests are ordered by ordered tag and pre-barrier cache
|
||||
flushing is needed. By using FUA on barrier request,
|
||||
post-barrier flushing can be skipped.
|
||||
|
||||
Sequence: preflush -> barrier
|
||||
|
||||
|
||||
Random notes/caveats
|
||||
--------------------
|
||||
|
||||
* SCSI layer currently can't use TAG ordering even if the drive,
|
||||
controller and driver support it. The problem is that SCSI midlayer
|
||||
request dispatch function is not atomic. It releases queue lock and
|
||||
switch to SCSI host lock during issue and it's possible and likely to
|
||||
happen in time that requests change their relative positions. Once
|
||||
this problem is solved, TAG ordering can be enabled.
|
||||
|
||||
* Currently, no matter which ordered mode is used, there can be only
|
||||
one barrier request in progress. All I/O barriers are held off by
|
||||
block layer until the previous I/O barrier is complete. This doesn't
|
||||
make any difference for DRAIN ordered devices, but, for TAG ordered
|
||||
devices with very high command latency, passing multiple I/O barriers
|
||||
to low level *might* be helpful if they are very frequent. Well, this
|
||||
certainly is a non-issue. I'm writing this just to make clear that no
|
||||
two I/O barrier is ever passed to low-level driver.
|
||||
|
||||
* Completion order. Requests in ordered sequence are issued in order
|
||||
but not required to finish in order. Barrier implementation can
|
||||
handle out-of-order completion of ordered sequence. IOW, the requests
|
||||
MUST be processed in order but the hardware/software completion paths
|
||||
are allowed to reorder completion notifications - eg. current SCSI
|
||||
midlayer doesn't preserve completion order during error handling.
|
||||
|
||||
* Requeueing order. Low-level drivers are free to requeue any request
|
||||
after they removed it from the request queue with
|
||||
blkdev_dequeue_request(). As barrier sequence should be kept in order
|
||||
when requeued, generic elevator code takes care of putting requests in
|
||||
order around barrier. See blk_ordered_req_seq() and
|
||||
ELEVATOR_INSERT_REQUEUE handling in __elv_add_request() for details.
|
||||
|
||||
Note that block drivers must not requeue preceding requests while
|
||||
completing latter requests in an ordered sequence. Currently, no
|
||||
error checking is done against this.
|
||||
|
||||
* Error handling. Currently, block layer will report error to upper
|
||||
layer if any of requests in an ordered sequence fails. Unfortunately,
|
||||
this doesn't seem to be enough. Look at the following request flow.
|
||||
QUEUE_ORDERED_TAG_FLUSH is in use.
|
||||
|
||||
[0] [1] [2] [3] [pre] [barrier] [post] < [4] [5] [6] ... >
|
||||
still in elevator
|
||||
|
||||
Let's say request [2], [3] are write requests to update file system
|
||||
metadata (journal or whatever) and [barrier] is used to mark that
|
||||
those updates are valid. Consider the following sequence.
|
||||
|
||||
i. Requests [0] ~ [post] leaves the request queue and enters
|
||||
low-level driver.
|
||||
ii. After a while, unfortunately, something goes wrong and the
|
||||
drive fails [2]. Note that any of [0], [1] and [3] could have
|
||||
completed by this time, but [pre] couldn't have been finished
|
||||
as the drive must process it in order and it failed before
|
||||
processing that command.
|
||||
iii. Error handling kicks in and determines that the error is
|
||||
unrecoverable and fails [2], and resumes operation.
|
||||
iv. [pre] [barrier] [post] gets processed.
|
||||
v. *BOOM* power fails
|
||||
|
||||
The problem here is that the barrier request is *supposed* to indicate
|
||||
that filesystem update requests [2] and [3] made it safely to the
|
||||
physical medium and, if the machine crashes after the barrier is
|
||||
written, filesystem recovery code can depend on that. Sadly, that
|
||||
isn't true in this case anymore. IOW, the success of a I/O barrier
|
||||
should also be dependent on success of some of the preceding requests,
|
||||
where only upper layer (filesystem) knows what 'some' is.
|
||||
|
||||
This can be solved by implementing a way to tell the block layer which
|
||||
requests affect the success of the following barrier request and
|
||||
making lower lever drivers to resume operation on error only after
|
||||
block layer tells it to do so.
|
||||
|
||||
As the probability of this happening is very low and the drive should
|
||||
be faulty, implementing the fix is probably an overkill. But, still,
|
||||
it's there.
|
||||
|
||||
* In previous drafts of barrier implementation, there was fallback
|
||||
mechanism such that, if FUA or ordered TAG fails, less fancy ordered
|
||||
mode can be selected and the failed barrier request is retried
|
||||
automatically. The rationale for this feature was that as FUA is
|
||||
pretty new in ATA world and ordered tag was never used widely, there
|
||||
could be devices which report to support those features but choke when
|
||||
actually given such requests.
|
||||
|
||||
This was removed for two reasons 1. it's an overkill 2. it's
|
||||
impossible to implement properly when TAG ordering is used as low
|
||||
level drivers resume after an error automatically. If it's ever
|
||||
needed adding it back and modifying low level drivers accordingly
|
||||
shouldn't be difficult.
|
|
@ -0,0 +1,82 @@
|
|||
Block layer statistics in /sys/block/<dev>/stat
|
||||
===============================================
|
||||
|
||||
This file documents the contents of the /sys/block/<dev>/stat file.
|
||||
|
||||
The stat file provides several statistics about the state of block
|
||||
device <dev>.
|
||||
|
||||
Q. Why are there multiple statistics in a single file? Doesn't sysfs
|
||||
normally contain a single value per file?
|
||||
A. By having a single file, the kernel can guarantee that the statistics
|
||||
represent a consistent snapshot of the state of the device. If the
|
||||
statistics were exported as multiple files containing one statistic
|
||||
each, it would be impossible to guarantee that a set of readings
|
||||
represent a single point in time.
|
||||
|
||||
The stat file consists of a single line of text containing 11 decimal
|
||||
values separated by whitespace. The fields are summarized in the
|
||||
following table, and described in more detail below.
|
||||
|
||||
Name units description
|
||||
---- ----- -----------
|
||||
read I/Os requests number of read I/Os processed
|
||||
read merges requests number of read I/Os merged with in-queue I/O
|
||||
read sectors sectors number of sectors read
|
||||
read ticks milliseconds total wait time for read requests
|
||||
write I/Os requests number of write I/Os processed
|
||||
write merges requests number of write I/Os merged with in-queue I/O
|
||||
write sectors sectors number of sectors written
|
||||
write ticks milliseconds total wait time for write requests
|
||||
in_flight requests number of I/Os currently in flight
|
||||
io_ticks milliseconds total time this block device has been active
|
||||
time_in_queue milliseconds total wait time for all requests
|
||||
|
||||
read I/Os, write I/Os
|
||||
=====================
|
||||
|
||||
These values increment when an I/O request completes.
|
||||
|
||||
read merges, write merges
|
||||
=========================
|
||||
|
||||
These values increment when an I/O request is merged with an
|
||||
already-queued I/O request.
|
||||
|
||||
read sectors, write sectors
|
||||
===========================
|
||||
|
||||
These values count the number of sectors read from or written to this
|
||||
block device. The "sectors" in question are the standard UNIX 512-byte
|
||||
sectors, not any device- or filesystem-specific block size. The
|
||||
counters are incremented when the I/O completes.
|
||||
|
||||
read ticks, write ticks
|
||||
=======================
|
||||
|
||||
These values count the number of milliseconds that I/O requests have
|
||||
waited on this block device. If there are multiple I/O requests waiting,
|
||||
these values will increase at a rate greater than 1000/second; for
|
||||
example, if 60 read requests wait for an average of 30 ms, the read_ticks
|
||||
field will increase by 60*30 = 1800.
|
||||
|
||||
in_flight
|
||||
=========
|
||||
|
||||
This value counts the number of I/O requests that have been issued to
|
||||
the device driver but have not yet completed. It does not include I/O
|
||||
requests that are in the queue but not yet issued to the device driver.
|
||||
|
||||
io_ticks
|
||||
========
|
||||
|
||||
This value counts the number of milliseconds during which the device has
|
||||
had I/O requests queued.
|
||||
|
||||
time_in_queue
|
||||
=============
|
||||
|
||||
This value counts the number of milliseconds that I/O requests have waited
|
||||
on this block device. If there are multiple I/O requests waiting, this
|
||||
value will increase as the product of the number of milliseconds times the
|
||||
number of requests waiting (see "read ticks" above for an example).
|
|
@ -136,7 +136,7 @@ changes occur:
|
|||
8) void lazy_mmu_prot_update(pte_t pte)
|
||||
This interface is called whenever the protection on
|
||||
any user PTEs change. This interface provides a notification
|
||||
to architecture specific code to take appropiate action.
|
||||
to architecture specific code to take appropriate action.
|
||||
|
||||
|
||||
Next, we have the cache flushing interfaces. In general, when Linux
|
||||
|
|
|
@ -0,0 +1,357 @@
|
|||
CPU hotplug Support in Linux(tm) Kernel
|
||||
|
||||
Maintainers:
|
||||
CPU Hotplug Core:
|
||||
Rusty Russell <rusty@rustycorp.com.au>
|
||||
Srivatsa Vaddagiri <vatsa@in.ibm.com>
|
||||
i386:
|
||||
Zwane Mwaikambo <zwane@arm.linux.org.uk>
|
||||
ppc64:
|
||||
Nathan Lynch <nathanl@austin.ibm.com>
|
||||
Joel Schopp <jschopp@austin.ibm.com>
|
||||
ia64/x86_64:
|
||||
Ashok Raj <ashok.raj@intel.com>
|
||||
|
||||
Authors: Ashok Raj <ashok.raj@intel.com>
|
||||
Lots of feedback: Nathan Lynch <nathanl@austin.ibm.com>,
|
||||
Joel Schopp <jschopp@austin.ibm.com>
|
||||
|
||||
Introduction
|
||||
|
||||
Modern advances in system architectures have introduced advanced error
|
||||
reporting and correction capabilities in processors. CPU architectures permit
|
||||
partitioning support, where compute resources of a single CPU could be made
|
||||
available to virtual machine environments. There are couple OEMS that
|
||||
support NUMA hardware which are hot pluggable as well, where physical
|
||||
node insertion and removal require support for CPU hotplug.
|
||||
|
||||
Such advances require CPUs available to a kernel to be removed either for
|
||||
provisioning reasons, or for RAS purposes to keep an offending CPU off
|
||||
system execution path. Hence the need for CPU hotplug support in the
|
||||
Linux kernel.
|
||||
|
||||
A more novel use of CPU-hotplug support is its use today in suspend
|
||||
resume support for SMP. Dual-core and HT support makes even
|
||||
a laptop run SMP kernels which didn't support these methods. SMP support
|
||||
for suspend/resume is a work in progress.
|
||||
|
||||
General Stuff about CPU Hotplug
|
||||
--------------------------------
|
||||
|
||||
Command Line Switches
|
||||
---------------------
|
||||
maxcpus=n Restrict boot time cpus to n. Say if you have 4 cpus, using
|
||||
maxcpus=2 will only boot 2. You can choose to bring the
|
||||
other cpus later online, read FAQ's for more info.
|
||||
|
||||
additional_cpus=n [x86_64 only] use this to limit hotpluggable cpus.
|
||||
This option sets
|
||||
cpu_possible_map = cpu_present_map + additional_cpus
|
||||
|
||||
CPU maps and such
|
||||
-----------------
|
||||
[More on cpumaps and primitive to manipulate, please check
|
||||
include/linux/cpumask.h that has more descriptive text.]
|
||||
|
||||
cpu_possible_map: Bitmap of possible CPUs that can ever be available in the
|
||||
system. This is used to allocate some boot time memory for per_cpu variables
|
||||
that aren't designed to grow/shrink as CPUs are made available or removed.
|
||||
Once set during boot time discovery phase, the map is static, i.e no bits
|
||||
are added or removed anytime. Trimming it accurately for your system needs
|
||||
upfront can save some boot time memory. See below for how we use heuristics
|
||||
in x86_64 case to keep this under check.
|
||||
|
||||
cpu_online_map: Bitmap of all CPUs currently online. Its set in __cpu_up()
|
||||
after a cpu is available for kernel scheduling and ready to receive
|
||||
interrupts from devices. Its cleared when a cpu is brought down using
|
||||
__cpu_disable(), before which all OS services including interrupts are
|
||||
migrated to another target CPU.
|
||||
|
||||
cpu_present_map: Bitmap of CPUs currently present in the system. Not all
|
||||
of them may be online. When physical hotplug is processed by the relevant
|
||||
subsystem (e.g ACPI) can change and new bit either be added or removed
|
||||
from the map depending on the event is hot-add/hot-remove. There are currently
|
||||
no locking rules as of now. Typical usage is to init topology during boot,
|
||||
at which time hotplug is disabled.
|
||||
|
||||
You really dont need to manipulate any of the system cpu maps. They should
|
||||
be read-only for most use. When setting up per-cpu resources almost always use
|
||||
cpu_possible_map/for_each_cpu() to iterate.
|
||||
|
||||
Never use anything other than cpumask_t to represent bitmap of CPUs.
|
||||
|
||||
#include <linux/cpumask.h>
|
||||
|
||||
for_each_cpu - Iterate over cpu_possible_map
|
||||
for_each_online_cpu - Iterate over cpu_online_map
|
||||
for_each_present_cpu - Iterate over cpu_present_map
|
||||
for_each_cpu_mask(x,mask) - Iterate over some random collection of cpu mask.
|
||||
|
||||
#include <linux/cpu.h>
|
||||
lock_cpu_hotplug() and unlock_cpu_hotplug():
|
||||
|
||||
The above calls are used to inhibit cpu hotplug operations. While holding the
|
||||
cpucontrol mutex, cpu_online_map will not change. If you merely need to avoid
|
||||
cpus going away, you could also use preempt_disable() and preempt_enable()
|
||||
for those sections. Just remember the critical section cannot call any
|
||||
function that can sleep or schedule this process away. The preempt_disable()
|
||||
will work as long as stop_machine_run() is used to take a cpu down.
|
||||
|
||||
CPU Hotplug - Frequently Asked Questions.
|
||||
|
||||
Q: How to i enable my kernel to support CPU hotplug?
|
||||
A: When doing make defconfig, Enable CPU hotplug support
|
||||
|
||||
"Processor type and Features" -> Support for Hotpluggable CPUs
|
||||
|
||||
Make sure that you have CONFIG_HOTPLUG, and CONFIG_SMP turned on as well.
|
||||
|
||||
You would need to enable CONFIG_HOTPLUG_CPU for SMP suspend/resume support
|
||||
as well.
|
||||
|
||||
Q: What architectures support CPU hotplug?
|
||||
A: As of 2.6.14, the following architectures support CPU hotplug.
|
||||
|
||||
i386 (Intel), ppc, ppc64, parisc, s390, ia64 and x86_64
|
||||
|
||||
Q: How to test if hotplug is supported on the newly built kernel?
|
||||
A: You should now notice an entry in sysfs.
|
||||
|
||||
Check if sysfs is mounted, using the "mount" command. You should notice
|
||||
an entry as shown below in the output.
|
||||
|
||||
....
|
||||
none on /sys type sysfs (rw)
|
||||
....
|
||||
|
||||
if this is not mounted, do the following.
|
||||
|
||||
#mkdir /sysfs
|
||||
#mount -t sysfs sys /sys
|
||||
|
||||
now you should see entries for all present cpu, the following is an example
|
||||
in a 8-way system.
|
||||
|
||||
#pwd
|
||||
#/sys/devices/system/cpu
|
||||
#ls -l
|
||||
total 0
|
||||
drwxr-xr-x 10 root root 0 Sep 19 07:44 .
|
||||
drwxr-xr-x 13 root root 0 Sep 19 07:45 ..
|
||||
drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu0
|
||||
drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu1
|
||||
drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu2
|
||||
drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu3
|
||||
drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu4
|
||||
drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu5
|
||||
drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu6
|
||||
drwxr-xr-x 3 root root 0 Sep 19 07:48 cpu7
|
||||
|
||||
Under each directory you would find an "online" file which is the control
|
||||
file to logically online/offline a processor.
|
||||
|
||||
Q: Does hot-add/hot-remove refer to physical add/remove of cpus?
|
||||
A: The usage of hot-add/remove may not be very consistently used in the code.
|
||||
CONFIG_CPU_HOTPLUG enables logical online/offline capability in the kernel.
|
||||
To support physical addition/removal, one would need some BIOS hooks and
|
||||
the platform should have something like an attention button in PCI hotplug.
|
||||
CONFIG_ACPI_HOTPLUG_CPU enables ACPI support for physical add/remove of CPUs.
|
||||
|
||||
Q: How do i logically offline a CPU?
|
||||
A: Do the following.
|
||||
|
||||
#echo 0 > /sys/devices/system/cpu/cpuX/online
|
||||
|
||||
once the logical offline is successful, check
|
||||
|
||||
#cat /proc/interrupts
|
||||
|
||||
you should now not see the CPU that you removed. Also online file will report
|
||||
the state as 0 when a cpu if offline and 1 when its online.
|
||||
|
||||
#To display the current cpu state.
|
||||
#cat /sys/devices/system/cpu/cpuX/online
|
||||
|
||||
Q: Why cant i remove CPU0 on some systems?
|
||||
A: Some architectures may have some special dependency on a certain CPU.
|
||||
|
||||
For e.g in IA64 platforms we have ability to sent platform interrupts to the
|
||||
OS. a.k.a Corrected Platform Error Interrupts (CPEI). In current ACPI
|
||||
specifications, we didn't have a way to change the target CPU. Hence if the
|
||||
current ACPI version doesn't support such re-direction, we disable that CPU
|
||||
by making it not-removable.
|
||||
|
||||
In such cases you will also notice that the online file is missing under cpu0.
|
||||
|
||||
Q: How do i find out if a particular CPU is not removable?
|
||||
A: Depending on the implementation, some architectures may show this by the
|
||||
absence of the "online" file. This is done if it can be determined ahead of
|
||||
time that this CPU cannot be removed.
|
||||
|
||||
In some situations, this can be a run time check, i.e if you try to remove the
|
||||
last CPU, this will not be permitted. You can find such failures by
|
||||
investigating the return value of the "echo" command.
|
||||
|
||||
Q: What happens when a CPU is being logically offlined?
|
||||
A: The following happen, listed in no particular order :-)
|
||||
|
||||
- A notification is sent to in-kernel registered modules by sending an event
|
||||
CPU_DOWN_PREPARE
|
||||
- All process is migrated away from this outgoing CPU to a new CPU
|
||||
- All interrupts targeted to this CPU is migrated to a new CPU
|
||||
- timers/bottom half/task lets are also migrated to a new CPU
|
||||
- Once all services are migrated, kernel calls an arch specific routine
|
||||
__cpu_disable() to perform arch specific cleanup.
|
||||
- Once this is successful, an event for successful cleanup is sent by an event
|
||||
CPU_DEAD.
|
||||
|
||||
"It is expected that each service cleans up when the CPU_DOWN_PREPARE
|
||||
notifier is called, when CPU_DEAD is called its expected there is nothing
|
||||
running on behalf of this CPU that was offlined"
|
||||
|
||||
Q: If i have some kernel code that needs to be aware of CPU arrival and
|
||||
departure, how to i arrange for proper notification?
|
||||
A: This is what you would need in your kernel code to receive notifications.
|
||||
|
||||
#include <linux/cpu.h>
|
||||
static int __cpuinit foobar_cpu_callback(struct notifier_block *nfb,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
unsigned int cpu = (unsigned long)hcpu;
|
||||
|
||||
switch (action) {
|
||||
case CPU_ONLINE:
|
||||
foobar_online_action(cpu);
|
||||
break;
|
||||
case CPU_DEAD:
|
||||
foobar_dead_action(cpu);
|
||||
break;
|
||||
}
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block foobar_cpu_notifer =
|
||||
{
|
||||
.notifier_call = foobar_cpu_callback,
|
||||
};
|
||||
|
||||
|
||||
In your init function,
|
||||
|
||||
register_cpu_notifier(&foobar_cpu_notifier);
|
||||
|
||||
You can fail PREPARE notifiers if something doesn't work to prepare resources.
|
||||
This will stop the activity and send a following CANCELED event back.
|
||||
|
||||
CPU_DEAD should not be failed, its just a goodness indication, but bad
|
||||
things will happen if a notifier in path sent a BAD notify code.
|
||||
|
||||
Q: I don't see my action being called for all CPUs already up and running?
|
||||
A: Yes, CPU notifiers are called only when new CPUs are on-lined or offlined.
|
||||
If you need to perform some action for each cpu already in the system, then
|
||||
|
||||
for_each_online_cpu(i) {
|
||||
foobar_cpu_callback(&foobar_cpu_notifier, CPU_UP_PREPARE, i);
|
||||
foobar_cpu_callback(&foobar-cpu_notifier, CPU_ONLINE, i);
|
||||
}
|
||||
|
||||
Q: If i would like to develop cpu hotplug support for a new architecture,
|
||||
what do i need at a minimum?
|
||||
A: The following are what is required for CPU hotplug infrastructure to work
|
||||
correctly.
|
||||
|
||||
- Make sure you have an entry in Kconfig to enable CONFIG_HOTPLUG_CPU
|
||||
- __cpu_up() - Arch interface to bring up a CPU
|
||||
- __cpu_disable() - Arch interface to shutdown a CPU, no more interrupts
|
||||
can be handled by the kernel after the routine
|
||||
returns. Including local APIC timers etc are
|
||||
shutdown.
|
||||
- __cpu_die() - This actually supposed to ensure death of the CPU.
|
||||
Actually look at some example code in other arch
|
||||
that implement CPU hotplug. The processor is taken
|
||||
down from the idle() loop for that specific
|
||||
architecture. __cpu_die() typically waits for some
|
||||
per_cpu state to be set, to ensure the processor
|
||||
dead routine is called to be sure positively.
|
||||
|
||||
Q: I need to ensure that a particular cpu is not removed when there is some
|
||||
work specific to this cpu is in progress.
|
||||
A: First switch the current thread context to preferred cpu
|
||||
|
||||
int my_func_on_cpu(int cpu)
|
||||
{
|
||||
cpumask_t saved_mask, new_mask = CPU_MASK_NONE;
|
||||
int curr_cpu, err = 0;
|
||||
|
||||
saved_mask = current->cpus_allowed;
|
||||
cpu_set(cpu, new_mask);
|
||||
err = set_cpus_allowed(current, new_mask);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/*
|
||||
* If we got scheduled out just after the return from
|
||||
* set_cpus_allowed() before running the work, this ensures
|
||||
* we stay locked.
|
||||
*/
|
||||
curr_cpu = get_cpu();
|
||||
|
||||
if (curr_cpu != cpu) {
|
||||
err = -EAGAIN;
|
||||
goto ret;
|
||||
} else {
|
||||
/*
|
||||
* Do work : But cant sleep, since get_cpu() disables preempt
|
||||
*/
|
||||
}
|
||||
ret:
|
||||
put_cpu();
|
||||
set_cpus_allowed(current, saved_mask);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
Q: How do we determine how many CPUs are available for hotplug.
|
||||
A: There is no clear spec defined way from ACPI that can give us that
|
||||
information today. Based on some input from Natalie of Unisys,
|
||||
that the ACPI MADT (Multiple APIC Description Tables) marks those possible
|
||||
CPUs in a system with disabled status.
|
||||
|
||||
Andi implemented some simple heuristics that count the number of disabled
|
||||
CPUs in MADT as hotpluggable CPUS. In the case there are no disabled CPUS
|
||||
we assume 1/2 the number of CPUs currently present can be hotplugged.
|
||||
|
||||
Caveat: Today's ACPI MADT can only provide 256 entries since the apicid field
|
||||
in MADT is only 8 bits.
|
||||
|
||||
User Space Notification
|
||||
|
||||
Hotplug support for devices is common in Linux today. Its being used today to
|
||||
support automatic configuration of network, usb and pci devices. A hotplug
|
||||
event can be used to invoke an agent script to perform the configuration task.
|
||||
|
||||
You can add /etc/hotplug/cpu.agent to handle hotplug notification user space
|
||||
scripts.
|
||||
|
||||
#!/bin/bash
|
||||
# $Id: cpu.agent
|
||||
# Kernel hotplug params include:
|
||||
#ACTION=%s [online or offline]
|
||||
#DEVPATH=%s
|
||||
#
|
||||
cd /etc/hotplug
|
||||
. ./hotplug.functions
|
||||
|
||||
case $ACTION in
|
||||
online)
|
||||
echo `date` ":cpu.agent" add cpu >> /tmp/hotplug.txt
|
||||
;;
|
||||
offline)
|
||||
echo `date` ":cpu.agent" remove cpu >>/tmp/hotplug.txt
|
||||
;;
|
||||
*)
|
||||
debug_mesg CPU $ACTION event not supported
|
||||
exit 1
|
||||
;;
|
||||
esac
|
|
@ -14,7 +14,10 @@ CONTENTS:
|
|||
1.1 What are cpusets ?
|
||||
1.2 Why are cpusets needed ?
|
||||
1.3 How are cpusets implemented ?
|
||||
1.4 How do I use cpusets ?
|
||||
1.4 What are exclusive cpusets ?
|
||||
1.5 What does notify_on_release do ?
|
||||
1.6 What is memory_pressure ?
|
||||
1.7 How do I use cpusets ?
|
||||
2. Usage Examples and Syntax
|
||||
2.1 Basic Usage
|
||||
2.2 Adding/removing cpus
|
||||
|
@ -49,29 +52,6 @@ its cpus_allowed vector, and the kernel page allocator will not
|
|||
allocate a page on a node that is not allowed in the requesting tasks
|
||||
mems_allowed vector.
|
||||
|
||||
If a cpuset is cpu or mem exclusive, no other cpuset, other than a direct
|
||||
ancestor or descendent, may share any of the same CPUs or Memory Nodes.
|
||||
A cpuset that is cpu exclusive has a sched domain associated with it.
|
||||
The sched domain consists of all cpus in the current cpuset that are not
|
||||
part of any exclusive child cpusets.
|
||||
This ensures that the scheduler load balacing code only balances
|
||||
against the cpus that are in the sched domain as defined above and not
|
||||
all of the cpus in the system. This removes any overhead due to
|
||||
load balancing code trying to pull tasks outside of the cpu exclusive
|
||||
cpuset only to be prevented by the tasks' cpus_allowed mask.
|
||||
|
||||
A cpuset that is mem_exclusive restricts kernel allocations for
|
||||
page, buffer and other data commonly shared by the kernel across
|
||||
multiple users. All cpusets, whether mem_exclusive or not, restrict
|
||||
allocations of memory for user space. This enables configuring a
|
||||
system so that several independent jobs can share common kernel
|
||||
data, such as file system pages, while isolating each jobs user
|
||||
allocation in its own cpuset. To do this, construct a large
|
||||
mem_exclusive cpuset to hold all the jobs, and construct child,
|
||||
non-mem_exclusive cpusets for each individual job. Only a small
|
||||
amount of typical kernel memory, such as requests from interrupt
|
||||
handlers, is allowed to be taken outside even a mem_exclusive cpuset.
|
||||
|
||||
User level code may create and destroy cpusets by name in the cpuset
|
||||
virtual file system, manage the attributes and permissions of these
|
||||
cpusets and which CPUs and Memory Nodes are assigned to each cpuset,
|
||||
|
@ -155,7 +135,7 @@ Cpusets extends these two mechanisms as follows:
|
|||
The implementation of cpusets requires a few, simple hooks
|
||||
into the rest of the kernel, none in performance critical paths:
|
||||
|
||||
- in main/init.c, to initialize the root cpuset at system boot.
|
||||
- in init/main.c, to initialize the root cpuset at system boot.
|
||||
- in fork and exit, to attach and detach a task from its cpuset.
|
||||
- in sched_setaffinity, to mask the requested CPUs by what's
|
||||
allowed in that tasks cpuset.
|
||||
|
@ -166,7 +146,7 @@ into the rest of the kernel, none in performance critical paths:
|
|||
and related changes in both sched.c and arch/ia64/kernel/domain.c
|
||||
- in the mbind and set_mempolicy system calls, to mask the requested
|
||||
Memory Nodes by what's allowed in that tasks cpuset.
|
||||
- in page_alloc, to restrict memory to allowed nodes.
|
||||
- in page_alloc.c, to restrict memory to allowed nodes.
|
||||
- in vmscan.c, to restrict page recovery to the current cpuset.
|
||||
|
||||
In addition a new file system, of type "cpuset" may be mounted,
|
||||
|
@ -192,9 +172,15 @@ containing the following files describing that cpuset:
|
|||
|
||||
- cpus: list of CPUs in that cpuset
|
||||
- mems: list of Memory Nodes in that cpuset
|
||||
- memory_migrate flag: if set, move pages to cpusets nodes
|
||||
- cpu_exclusive flag: is cpu placement exclusive?
|
||||
- mem_exclusive flag: is memory placement exclusive?
|
||||
- tasks: list of tasks (by pid) attached to that cpuset
|
||||
- notify_on_release flag: run /sbin/cpuset_release_agent on exit?
|
||||
- memory_pressure: measure of how much paging pressure in cpuset
|
||||
|
||||
In addition, the root cpuset only has the following file:
|
||||
- memory_pressure_enabled flag: compute memory_pressure?
|
||||
|
||||
New cpusets are created using the mkdir system call or shell
|
||||
command. The properties of a cpuset, such as its flags, allowed
|
||||
|
@ -228,7 +214,108 @@ exclusive cpuset. Also, the use of a Linux virtual file system (vfs)
|
|||
to represent the cpuset hierarchy provides for a familiar permission
|
||||
and name space for cpusets, with a minimum of additional kernel code.
|
||||
|
||||
1.4 How do I use cpusets ?
|
||||
|
||||
1.4 What are exclusive cpusets ?
|
||||
--------------------------------
|
||||
|
||||
If a cpuset is cpu or mem exclusive, no other cpuset, other than
|
||||
a direct ancestor or descendent, may share any of the same CPUs or
|
||||
Memory Nodes.
|
||||
|
||||
A cpuset that is cpu_exclusive has a scheduler (sched) domain
|
||||
associated with it. The sched domain consists of all CPUs in the
|
||||
current cpuset that are not part of any exclusive child cpusets.
|
||||
This ensures that the scheduler load balancing code only balances
|
||||
against the CPUs that are in the sched domain as defined above and
|
||||
not all of the CPUs in the system. This removes any overhead due to
|
||||
load balancing code trying to pull tasks outside of the cpu_exclusive
|
||||
cpuset only to be prevented by the tasks' cpus_allowed mask.
|
||||
|
||||
A cpuset that is mem_exclusive restricts kernel allocations for
|
||||
page, buffer and other data commonly shared by the kernel across
|
||||
multiple users. All cpusets, whether mem_exclusive or not, restrict
|
||||
allocations of memory for user space. This enables configuring a
|
||||
system so that several independent jobs can share common kernel data,
|
||||
such as file system pages, while isolating each jobs user allocation in
|
||||
its own cpuset. To do this, construct a large mem_exclusive cpuset to
|
||||
hold all the jobs, and construct child, non-mem_exclusive cpusets for
|
||||
each individual job. Only a small amount of typical kernel memory,
|
||||
such as requests from interrupt handlers, is allowed to be taken
|
||||
outside even a mem_exclusive cpuset.
|
||||
|
||||
|
||||
1.5 What does notify_on_release do ?
|
||||
------------------------------------
|
||||
|
||||
If the notify_on_release flag is enabled (1) in a cpuset, then whenever
|
||||
the last task in the cpuset leaves (exits or attaches to some other
|
||||
cpuset) and the last child cpuset of that cpuset is removed, then
|
||||
the kernel runs the command /sbin/cpuset_release_agent, supplying the
|
||||
pathname (relative to the mount point of the cpuset file system) of the
|
||||
abandoned cpuset. This enables automatic removal of abandoned cpusets.
|
||||
The default value of notify_on_release in the root cpuset at system
|
||||
boot is disabled (0). The default value of other cpusets at creation
|
||||
is the current value of their parents notify_on_release setting.
|
||||
|
||||
|
||||
1.6 What is memory_pressure ?
|
||||
-----------------------------
|
||||
The memory_pressure of a cpuset provides a simple per-cpuset metric
|
||||
of the rate that the tasks in a cpuset are attempting to free up in
|
||||
use memory on the nodes of the cpuset to satisfy additional memory
|
||||
requests.
|
||||
|
||||
This enables batch managers monitoring jobs running in dedicated
|
||||
cpusets to efficiently detect what level of memory pressure that job
|
||||
is causing.
|
||||
|
||||
This is useful both on tightly managed systems running a wide mix of
|
||||
submitted jobs, which may choose to terminate or re-prioritize jobs that
|
||||
are trying to use more memory than allowed on the nodes assigned them,
|
||||
and with tightly coupled, long running, massively parallel scientific
|
||||
computing jobs that will dramatically fail to meet required performance
|
||||
goals if they start to use more memory than allowed to them.
|
||||
|
||||
This mechanism provides a very economical way for the batch manager
|
||||
to monitor a cpuset for signs of memory pressure. It's up to the
|
||||
batch manager or other user code to decide what to do about it and
|
||||
take action.
|
||||
|
||||
==> Unless this feature is enabled by writing "1" to the special file
|
||||
/dev/cpuset/memory_pressure_enabled, the hook in the rebalance
|
||||
code of __alloc_pages() for this metric reduces to simply noticing
|
||||
that the cpuset_memory_pressure_enabled flag is zero. So only
|
||||
systems that enable this feature will compute the metric.
|
||||
|
||||
Why a per-cpuset, running average:
|
||||
|
||||
Because this meter is per-cpuset, rather than per-task or mm,
|
||||
the system load imposed by a batch scheduler monitoring this
|
||||
metric is sharply reduced on large systems, because a scan of
|
||||
the tasklist can be avoided on each set of queries.
|
||||
|
||||
Because this meter is a running average, instead of an accumulating
|
||||
counter, a batch scheduler can detect memory pressure with a
|
||||
single read, instead of having to read and accumulate results
|
||||
for a period of time.
|
||||
|
||||
Because this meter is per-cpuset rather than per-task or mm,
|
||||
the batch scheduler can obtain the key information, memory
|
||||
pressure in a cpuset, with a single read, rather than having to
|
||||
query and accumulate results over all the (dynamically changing)
|
||||
set of tasks in the cpuset.
|
||||
|
||||
A per-cpuset simple digital filter (requires a spinlock and 3 words
|
||||
of data per-cpuset) is kept, and updated by any task attached to that
|
||||
cpuset, if it enters the synchronous (direct) page reclaim code.
|
||||
|
||||
A per-cpuset file provides an integer number representing the recent
|
||||
(half-life of 10 seconds) rate of direct page reclaims caused by
|
||||
the tasks in the cpuset, in units of reclaims attempted per second,
|
||||
times 1000.
|
||||
|
||||
|
||||
1.7 How do I use cpusets ?
|
||||
--------------------------
|
||||
|
||||
In order to minimize the impact of cpusets on critical kernel
|
||||
|
@ -277,6 +364,30 @@ rewritten to the 'tasks' file of its cpuset. This is done to avoid
|
|||
impacting the scheduler code in the kernel with a check for changes
|
||||
in a tasks processor placement.
|
||||
|
||||
Normally, once a page is allocated (given a physical page
|
||||
of main memory) then that page stays on whatever node it
|
||||
was allocated, so long as it remains allocated, even if the
|
||||
cpusets memory placement policy 'mems' subsequently changes.
|
||||
If the cpuset flag file 'memory_migrate' is set true, then when
|
||||
tasks are attached to that cpuset, any pages that task had
|
||||
allocated to it on nodes in its previous cpuset are migrated
|
||||
to the tasks new cpuset. Depending on the implementation,
|
||||
this migration may either be done by swapping the page out,
|
||||
so that the next time the page is referenced, it will be paged
|
||||
into the tasks new cpuset, usually on the node where it was
|
||||
referenced, or this migration may be done by directly copying
|
||||
the pages from the tasks previous cpuset to the new cpuset,
|
||||
where possible to the same node, relative to the new cpuset,
|
||||
as the node that held the page, relative to the old cpuset.
|
||||
Also if 'memory_migrate' is set true, then if that cpusets
|
||||
'mems' file is modified, pages allocated to tasks in that
|
||||
cpuset, that were on nodes in the previous setting of 'mems',
|
||||
will be moved to nodes in the new setting of 'mems.' Again,
|
||||
depending on the implementation, this might be done by swapping,
|
||||
or by direct copying. In either case, pages that were not in
|
||||
the tasks prior cpuset, or in the cpusets prior 'mems' setting,
|
||||
will not be moved.
|
||||
|
||||
There is an exception to the above. If hotplug functionality is used
|
||||
to remove all the CPUs that are currently assigned to a cpuset,
|
||||
then the kernel will automatically update the cpus_allowed of all
|
||||
|
|
|
@ -0,0 +1,673 @@
|
|||
|
||||
|
||||
EDAC - Error Detection And Correction
|
||||
|
||||
Written by Doug Thompson <norsk5@xmission.com>
|
||||
7 Dec 2005
|
||||
|
||||
|
||||
EDAC was written by:
|
||||
Thayne Harbaugh,
|
||||
modified by Dave Peterson, Doug Thompson, et al,
|
||||
from the bluesmoke.sourceforge.net project.
|
||||
|
||||
|
||||
============================================================================
|
||||
EDAC PURPOSE
|
||||
|
||||
The 'edac' kernel module goal is to detect and report errors that occur
|
||||
within the computer system. In the initial release, memory Correctable Errors
|
||||
(CE) and Uncorrectable Errors (UE) are the primary errors being harvested.
|
||||
|
||||
Detecting CE events, then harvesting those events and reporting them,
|
||||
CAN be a predictor of future UE events. With CE events, the system can
|
||||
continue to operate, but with less safety. Preventive maintainence and
|
||||
proactive part replacement of memory DIMMs exhibiting CEs can reduce
|
||||
the likelihood of the dreaded UE events and system 'panics'.
|
||||
|
||||
|
||||
In addition, PCI Bus Parity and SERR Errors are scanned for on PCI devices
|
||||
in order to determine if errors are occurring on data transfers.
|
||||
The presence of PCI Parity errors must be examined with a grain of salt.
|
||||
There are several addin adapters that do NOT follow the PCI specification
|
||||
with regards to Parity generation and reporting. The specification says
|
||||
the vendor should tie the parity status bits to 0 if they do not intend
|
||||
to generate parity. Some vendors do not do this, and thus the parity bit
|
||||
can "float" giving false positives.
|
||||
|
||||
The PCI Parity EDAC device has the ability to "skip" known flakey
|
||||
cards during the parity scan. These are set by the parity "blacklist"
|
||||
interface in the sysfs for PCI Parity. (See the PCI section in the sysfs
|
||||
section below.) There is also a parity "whitelist" which is used as
|
||||
an explicit list of devices to scan, while the blacklist is a list
|
||||
of devices to skip.
|
||||
|
||||
EDAC will have future error detectors that will be added or integrated
|
||||
into EDAC in the following list:
|
||||
|
||||
MCE Machine Check Exception
|
||||
MCA Machine Check Architecture
|
||||
NMI NMI notification of ECC errors
|
||||
MSRs Machine Specific Register error cases
|
||||
and other mechanisms.
|
||||
|
||||
These errors are usually bus errors, ECC errors, thermal throttling
|
||||
and the like.
|
||||
|
||||
|
||||
============================================================================
|
||||
EDAC VERSIONING
|
||||
|
||||
EDAC is composed of a "core" module (edac_mc.ko) and several Memory
|
||||
Controller (MC) driver modules. On a given system, the CORE
|
||||
is loaded and one MC driver will be loaded. Both the CORE and
|
||||
the MC driver have individual versions that reflect current release
|
||||
level of their respective modules. Thus, to "report" on what version
|
||||
a system is running, one must report both the CORE's and the
|
||||
MC driver's versions.
|
||||
|
||||
|
||||
LOADING
|
||||
|
||||
If 'edac' was statically linked with the kernel then no loading is
|
||||
necessary. If 'edac' was built as modules then simply modprobe the
|
||||
'edac' pieces that you need. You should be able to modprobe
|
||||
hardware-specific modules and have the dependencies load the necessary core
|
||||
modules.
|
||||
|
||||
Example:
|
||||
|
||||
$> modprobe amd76x_edac
|
||||
|
||||
loads both the amd76x_edac.ko memory controller module and the edac_mc.ko
|
||||
core module.
|
||||
|
||||
|
||||
============================================================================
|
||||
EDAC sysfs INTERFACE
|
||||
|
||||
EDAC presents a 'sysfs' interface for control, reporting and attribute
|
||||
reporting purposes.
|
||||
|
||||
EDAC lives in the /sys/devices/system/edac directory. Within this directory
|
||||
there currently reside 2 'edac' components:
|
||||
|
||||
mc memory controller(s) system
|
||||
pci PCI status system
|
||||
|
||||
|
||||
============================================================================
|
||||
Memory Controller (mc) Model
|
||||
|
||||
First a background on the memory controller's model abstracted in EDAC.
|
||||
Each mc device controls a set of DIMM memory modules. These modules are
|
||||
layed out in a Chip-Select Row (csrowX) and Channel table (chX). There can
|
||||
be multiple csrows and two channels.
|
||||
|
||||
Memory controllers allow for several csrows, with 8 csrows being a typical value.
|
||||
Yet, the actual number of csrows depends on the electrical "loading"
|
||||
of a given motherboard, memory controller and DIMM characteristics.
|
||||
|
||||
Dual channels allows for 128 bit data transfers to the CPU from memory.
|
||||
|
||||
|
||||
Channel 0 Channel 1
|
||||
===================================
|
||||
csrow0 | DIMM_A0 | DIMM_B0 |
|
||||
csrow1 | DIMM_A0 | DIMM_B0 |
|
||||
===================================
|
||||
|
||||
===================================
|
||||
csrow2 | DIMM_A1 | DIMM_B1 |
|
||||
csrow3 | DIMM_A1 | DIMM_B1 |
|
||||
===================================
|
||||
|
||||
In the above example table there are 4 physical slots on the motherboard
|
||||
for memory DIMMs:
|
||||
|
||||
DIMM_A0
|
||||
DIMM_B0
|
||||
DIMM_A1
|
||||
DIMM_B1
|
||||
|
||||
Labels for these slots are usually silk screened on the motherboard. Slots
|
||||
labeled 'A' are channel 0 in this example. Slots labled 'B'
|
||||
are channel 1. Notice that there are two csrows possible on a
|
||||
physical DIMM. These csrows are allocated their csrow assignment
|
||||
based on the slot into which the memory DIMM is placed. Thus, when 1 DIMM
|
||||
is placed in each Channel, the csrows cross both DIMMs.
|
||||
|
||||
Memory DIMMs come single or dual "ranked". A rank is a populated csrow.
|
||||
Thus, 2 single ranked DIMMs, placed in slots DIMM_A0 and DIMM_B0 above
|
||||
will have 1 csrow, csrow0. csrow1 will be empty. On the other hand,
|
||||
when 2 dual ranked DIMMs are similiaryly placed, then both csrow0 and
|
||||
csrow1 will be populated. The pattern repeats itself for csrow2 and
|
||||
csrow3.
|
||||
|
||||
The representation of the above is reflected in the directory tree
|
||||
in EDAC's sysfs interface. Starting in directory
|
||||
/sys/devices/system/edac/mc each memory controller will be represented
|
||||
by its own 'mcX' directory, where 'X" is the index of the MC.
|
||||
|
||||
|
||||
..../edac/mc/
|
||||
|
|
||||
|->mc0
|
||||
|->mc1
|
||||
|->mc2
|
||||
....
|
||||
|
||||
Under each 'mcX' directory each 'csrowX' is again represented by a
|
||||
'csrowX', where 'X" is the csrow index:
|
||||
|
||||
|
||||
.../mc/mc0/
|
||||
|
|
||||
|->csrow0
|
||||
|->csrow2
|
||||
|->csrow3
|
||||
....
|
||||
|
||||
Notice that there is no csrow1, which indicates that csrow0 is
|
||||
composed of a single ranked DIMMs. This should also apply in both
|
||||
Channels, in order to have dual-channel mode be operational. Since
|
||||
both csrow2 and csrow3 are populated, this indicates a dual ranked
|
||||
set of DIMMs for channels 0 and 1.
|
||||
|
||||
|
||||
Within each of the 'mc','mcX' and 'csrowX' directories are several
|
||||
EDAC control and attribute files.
|
||||
|
||||
|
||||
============================================================================
|
||||
DIRECTORY 'mc'
|
||||
|
||||
In directory 'mc' are EDAC system overall control and attribute files:
|
||||
|
||||
|
||||
Panic on UE control file:
|
||||
|
||||
'panic_on_ue'
|
||||
|
||||
An uncorrectable error will cause a machine panic. This is usually
|
||||
desirable. It is a bad idea to continue when an uncorrectable error
|
||||
occurs - it is indeterminate what was uncorrected and the operating
|
||||
system context might be so mangled that continuing will lead to further
|
||||
corruption. If the kernel has MCE configured, then EDAC will never
|
||||
notice the UE.
|
||||
|
||||
LOAD TIME: module/kernel parameter: panic_on_ue=[0|1]
|
||||
|
||||
RUN TIME: echo "1" >/sys/devices/system/edac/mc/panic_on_ue
|
||||
|
||||
|
||||
Log UE control file:
|
||||
|
||||
'log_ue'
|
||||
|
||||
Generate kernel messages describing uncorrectable errors. These errors
|
||||
are reported through the system message log system. UE statistics
|
||||
will be accumulated even when UE logging is disabled.
|
||||
|
||||
LOAD TIME: module/kernel parameter: log_ue=[0|1]
|
||||
|
||||
RUN TIME: echo "1" >/sys/devices/system/edac/mc/log_ue
|
||||
|
||||
|
||||
Log CE control file:
|
||||
|
||||
'log_ce'
|
||||
|
||||
Generate kernel messages describing correctable errors. These
|
||||
errors are reported through the system message log system.
|
||||
CE statistics will be accumulated even when CE logging is disabled.
|
||||
|
||||
LOAD TIME: module/kernel parameter: log_ce=[0|1]
|
||||
|
||||
RUN TIME: echo "1" >/sys/devices/system/edac/mc/log_ce
|
||||
|
||||
|
||||
Polling period control file:
|
||||
|
||||
'poll_msec'
|
||||
|
||||
The time period, in milliseconds, for polling for error information.
|
||||
Too small a value wastes resources. Too large a value might delay
|
||||
necessary handling of errors and might loose valuable information for
|
||||
locating the error. 1000 milliseconds (once each second) is about
|
||||
right for most uses.
|
||||
|
||||
LOAD TIME: module/kernel parameter: poll_msec=[0|1]
|
||||
|
||||
RUN TIME: echo "1000" >/sys/devices/system/edac/mc/poll_msec
|
||||
|
||||
|
||||
Module Version read-only attribute file:
|
||||
|
||||
'mc_version'
|
||||
|
||||
The EDAC CORE modules's version and compile date are shown here to
|
||||
indicate what EDAC is running.
|
||||
|
||||
|
||||
|
||||
============================================================================
|
||||
'mcX' DIRECTORIES
|
||||
|
||||
|
||||
In 'mcX' directories are EDAC control and attribute files for
|
||||
this 'X" instance of the memory controllers:
|
||||
|
||||
|
||||
Counter reset control file:
|
||||
|
||||
'reset_counters'
|
||||
|
||||
This write-only control file will zero all the statistical counters
|
||||
for UE and CE errors. Zeroing the counters will also reset the timer
|
||||
indicating how long since the last counter zero. This is useful
|
||||
for computing errors/time. Since the counters are always reset at
|
||||
driver initialization time, no module/kernel parameter is available.
|
||||
|
||||
RUN TIME: echo "anything" >/sys/devices/system/edac/mc/mc0/counter_reset
|
||||
|
||||
This resets the counters on memory controller 0
|
||||
|
||||
|
||||
Seconds since last counter reset control file:
|
||||
|
||||
'seconds_since_reset'
|
||||
|
||||
This attribute file displays how many seconds have elapsed since the
|
||||
last counter reset. This can be used with the error counters to
|
||||
measure error rates.
|
||||
|
||||
|
||||
|
||||
DIMM capability attribute file:
|
||||
|
||||
'edac_capability'
|
||||
|
||||
The EDAC (Error Detection and Correction) capabilities/modes of
|
||||
the memory controller hardware.
|
||||
|
||||
|
||||
DIMM Current Capability attribute file:
|
||||
|
||||
'edac_current_capability'
|
||||
|
||||
The EDAC capabilities available with the hardware
|
||||
configuration. This may not be the same as "EDAC capability"
|
||||
if the correct memory is not used. If a memory controller is
|
||||
capable of EDAC, but DIMMs without check bits are in use, then
|
||||
Parity, SECDED, S4ECD4ED capabilities will not be available
|
||||
even though the memory controller might be capable of those
|
||||
modes with the proper memory loaded.
|
||||
|
||||
|
||||
Memory Type supported on this controller attribute file:
|
||||
|
||||
'supported_mem_type'
|
||||
|
||||
This attribute file displays the memory type, usually
|
||||
buffered and unbuffered DIMMs.
|
||||
|
||||
|
||||
Memory Controller name attribute file:
|
||||
|
||||
'mc_name'
|
||||
|
||||
This attribute file displays the type of memory controller
|
||||
that is being utilized.
|
||||
|
||||
|
||||
Memory Controller Module name attribute file:
|
||||
|
||||
'module_name'
|
||||
|
||||
This attribute file displays the memory controller module name,
|
||||
version and date built. The name of the memory controller
|
||||
hardware - some drivers work with multiple controllers and
|
||||
this field shows which hardware is present.
|
||||
|
||||
|
||||
Total memory managed by this memory controller attribute file:
|
||||
|
||||
'size_mb'
|
||||
|
||||
This attribute file displays, in count of megabytes, of memory
|
||||
that this instance of memory controller manages.
|
||||
|
||||
|
||||
Total Uncorrectable Errors count attribute file:
|
||||
|
||||
'ue_count'
|
||||
|
||||
This attribute file displays the total count of uncorrectable
|
||||
errors that have occurred on this memory controller. If panic_on_ue
|
||||
is set this counter will not have a chance to increment,
|
||||
since EDAC will panic the system.
|
||||
|
||||
|
||||
Total UE count that had no information attribute fileY:
|
||||
|
||||
'ue_noinfo_count'
|
||||
|
||||
This attribute file displays the number of UEs that
|
||||
have occurred have occurred with no informations as to which DIMM
|
||||
slot is having errors.
|
||||
|
||||
|
||||
Total Correctable Errors count attribute file:
|
||||
|
||||
'ce_count'
|
||||
|
||||
This attribute file displays the total count of correctable
|
||||
errors that have occurred on this memory controller. This
|
||||
count is very important to examine. CEs provide early
|
||||
indications that a DIMM is beginning to fail. This count
|
||||
field should be monitored for non-zero values and report
|
||||
such information to the system administrator.
|
||||
|
||||
|
||||
Total Correctable Errors count attribute file:
|
||||
|
||||
'ce_noinfo_count'
|
||||
|
||||
This attribute file displays the number of CEs that
|
||||
have occurred wherewith no informations as to which DIMM slot
|
||||
is having errors. Memory is handicapped, but operational,
|
||||
yet no information is available to indicate which slot
|
||||
the failing memory is in. This count field should be also
|
||||
be monitored for non-zero values.
|
||||
|
||||
Device Symlink:
|
||||
|
||||
'device'
|
||||
|
||||
Symlink to the memory controller device
|
||||
|
||||
|
||||
|
||||
============================================================================
|
||||
'csrowX' DIRECTORIES
|
||||
|
||||
In the 'csrowX' directories are EDAC control and attribute files for
|
||||
this 'X" instance of csrow:
|
||||
|
||||
|
||||
Total Uncorrectable Errors count attribute file:
|
||||
|
||||
'ue_count'
|
||||
|
||||
This attribute file displays the total count of uncorrectable
|
||||
errors that have occurred on this csrow. If panic_on_ue is set
|
||||
this counter will not have a chance to increment, since EDAC
|
||||
will panic the system.
|
||||
|
||||
|
||||
Total Correctable Errors count attribute file:
|
||||
|
||||
'ce_count'
|
||||
|
||||
This attribute file displays the total count of correctable
|
||||
errors that have occurred on this csrow. This
|
||||
count is very important to examine. CEs provide early
|
||||
indications that a DIMM is beginning to fail. This count
|
||||
field should be monitored for non-zero values and report
|
||||
such information to the system administrator.
|
||||
|
||||
|
||||
Total memory managed by this csrow attribute file:
|
||||
|
||||
'size_mb'
|
||||
|
||||
This attribute file displays, in count of megabytes, of memory
|
||||
that this csrow contatins.
|
||||
|
||||
|
||||
Memory Type attribute file:
|
||||
|
||||
'mem_type'
|
||||
|
||||
This attribute file will display what type of memory is currently
|
||||
on this csrow. Normally, either buffered or unbuffered memory.
|
||||
|
||||
|
||||
EDAC Mode of operation attribute file:
|
||||
|
||||
'edac_mode'
|
||||
|
||||
This attribute file will display what type of Error detection
|
||||
and correction is being utilized.
|
||||
|
||||
|
||||
Device type attribute file:
|
||||
|
||||
'dev_type'
|
||||
|
||||
This attribute file will display what type of DIMM device is
|
||||
being utilized. Example: x4
|
||||
|
||||
|
||||
Channel 0 CE Count attribute file:
|
||||
|
||||
'ch0_ce_count'
|
||||
|
||||
This attribute file will display the count of CEs on this
|
||||
DIMM located in channel 0.
|
||||
|
||||
|
||||
Channel 0 UE Count attribute file:
|
||||
|
||||
'ch0_ue_count'
|
||||
|
||||
This attribute file will display the count of UEs on this
|
||||
DIMM located in channel 0.
|
||||
|
||||
|
||||
Channel 0 DIMM Label control file:
|
||||
|
||||
'ch0_dimm_label'
|
||||
|
||||
This control file allows this DIMM to have a label assigned
|
||||
to it. With this label in the module, when errors occur
|
||||
the output can provide the DIMM label in the system log.
|
||||
This becomes vital for panic events to isolate the
|
||||
cause of the UE event.
|
||||
|
||||
DIMM Labels must be assigned after booting, with information
|
||||
that correctly identifies the physical slot with its
|
||||
silk screen label. This information is currently very
|
||||
motherboard specific and determination of this information
|
||||
must occur in userland at this time.
|
||||
|
||||
|
||||
Channel 1 CE Count attribute file:
|
||||
|
||||
'ch1_ce_count'
|
||||
|
||||
This attribute file will display the count of CEs on this
|
||||
DIMM located in channel 1.
|
||||
|
||||
|
||||
Channel 1 UE Count attribute file:
|
||||
|
||||
'ch1_ue_count'
|
||||
|
||||
This attribute file will display the count of UEs on this
|
||||
DIMM located in channel 0.
|
||||
|
||||
|
||||
Channel 1 DIMM Label control file:
|
||||
|
||||
'ch1_dimm_label'
|
||||
|
||||
This control file allows this DIMM to have a label assigned
|
||||
to it. With this label in the module, when errors occur
|
||||
the output can provide the DIMM label in the system log.
|
||||
This becomes vital for panic events to isolate the
|
||||
cause of the UE event.
|
||||
|
||||
DIMM Labels must be assigned after booting, with information
|
||||
that correctly identifies the physical slot with its
|
||||
silk screen label. This information is currently very
|
||||
motherboard specific and determination of this information
|
||||
must occur in userland at this time.
|
||||
|
||||
|
||||
============================================================================
|
||||
SYSTEM LOGGING
|
||||
|
||||
If logging for UEs and CEs are enabled then system logs will have
|
||||
error notices indicating errors that have been detected:
|
||||
|
||||
MC0: CE page 0x283, offset 0xce0, grain 8, syndrome 0x6ec3, row 0,
|
||||
channel 1 "DIMM_B1": amd76x_edac
|
||||
|
||||
MC0: CE page 0x1e5, offset 0xfb0, grain 8, syndrome 0xb741, row 0,
|
||||
channel 1 "DIMM_B1": amd76x_edac
|
||||
|
||||
|
||||
The structure of the message is:
|
||||
the memory controller (MC0)
|
||||
Error type (CE)
|
||||
memory page (0x283)
|
||||
offset in the page (0xce0)
|
||||
the byte granularity (grain 8)
|
||||
or resolution of the error
|
||||
the error syndrome (0xb741)
|
||||
memory row (row 0)
|
||||
memory channel (channel 1)
|
||||
DIMM label, if set prior (DIMM B1
|
||||
and then an optional, driver-specific message that may
|
||||
have additional information.
|
||||
|
||||
Both UEs and CEs with no info will lack all but memory controller,
|
||||
error type, a notice of "no info" and then an optional,
|
||||
driver-specific error message.
|
||||
|
||||
|
||||
|
||||
============================================================================
|
||||
PCI Bus Parity Detection
|
||||
|
||||
|
||||
On Header Type 00 devices the primary status is looked at
|
||||
for any parity error regardless of whether Parity is enabled on the
|
||||
device. (The spec indicates parity is generated in some cases).
|
||||
On Header Type 01 bridges, the secondary status register is also
|
||||
looked at to see if parity ocurred on the bus on the other side of
|
||||
the bridge.
|
||||
|
||||
|
||||
SYSFS CONFIGURATION
|
||||
|
||||
Under /sys/devices/system/edac/pci are control and attribute files as follows:
|
||||
|
||||
|
||||
Enable/Disable PCI Parity checking control file:
|
||||
|
||||
'check_pci_parity'
|
||||
|
||||
|
||||
This control file enables or disables the PCI Bus Parity scanning
|
||||
operation. Writing a 1 to this file enables the scanning. Writing
|
||||
a 0 to this file disables the scanning.
|
||||
|
||||
Enable:
|
||||
echo "1" >/sys/devices/system/edac/pci/check_pci_parity
|
||||
|
||||
Disable:
|
||||
echo "0" >/sys/devices/system/edac/pci/check_pci_parity
|
||||
|
||||
|
||||
|
||||
Panic on PCI PARITY Error:
|
||||
|
||||
'panic_on_pci_parity'
|
||||
|
||||
|
||||
This control files enables or disables panic'ing when a parity
|
||||
error has been detected.
|
||||
|
||||
|
||||
module/kernel parameter: panic_on_pci_parity=[0|1]
|
||||
|
||||
Enable:
|
||||
echo "1" >/sys/devices/system/edac/pci/panic_on_pci_parity
|
||||
|
||||
Disable:
|
||||
echo "0" >/sys/devices/system/edac/pci/panic_on_pci_parity
|
||||
|
||||
|
||||
Parity Count:
|
||||
|
||||
'pci_parity_count'
|
||||
|
||||
This attribute file will display the number of parity errors that
|
||||
have been detected.
|
||||
|
||||
|
||||
|
||||
PCI Device Whitelist:
|
||||
|
||||
'pci_parity_whitelist'
|
||||
|
||||
This control file allows for an explicit list of PCI devices to be
|
||||
scanned for parity errors. Only devices found on this list will
|
||||
be examined. The list is a line of hexadecimel VENDOR and DEVICE
|
||||
ID tuples:
|
||||
|
||||
1022:7450,1434:16a6
|
||||
|
||||
One or more can be inserted, seperated by a comma.
|
||||
|
||||
To write the above list doing the following as one command line:
|
||||
|
||||
echo "1022:7450,1434:16a6"
|
||||
> /sys/devices/system/edac/pci/pci_parity_whitelist
|
||||
|
||||
|
||||
|
||||
To display what the whitelist is, simply 'cat' the same file.
|
||||
|
||||
|
||||
PCI Device Blacklist:
|
||||
|
||||
'pci_parity_blacklist'
|
||||
|
||||
This control file allows for a list of PCI devices to be
|
||||
skipped for scanning.
|
||||
The list is a line of hexadecimel VENDOR and DEVICE ID tuples:
|
||||
|
||||
1022:7450,1434:16a6
|
||||
|
||||
One or more can be inserted, seperated by a comma.
|
||||
|
||||
To write the above list doing the following as one command line:
|
||||
|
||||
echo "1022:7450,1434:16a6"
|
||||
> /sys/devices/system/edac/pci/pci_parity_blacklist
|
||||
|
||||
|
||||
To display what the whitelist current contatins,
|
||||
simply 'cat' the same file.
|
||||
|
||||
=======================================================================
|
||||
|
||||
PCI Vendor and Devices IDs can be obtained with the lspci command. Using
|
||||
the -n option lspci will display the vendor and device IDs. The system
|
||||
adminstrator will have to determine which devices should be scanned or
|
||||
skipped.
|
||||
|
||||
|
||||
|
||||
The two lists (white and black) are prioritized. blacklist is the lower
|
||||
priority and will NOT be utilized when a whitelist has been set.
|
||||
Turn OFF a whitelist by an empty echo command:
|
||||
|
||||
echo > /sys/devices/system/edac/pci/pci_parity_whitelist
|
||||
|
||||
and any previous blacklist will be utililzed.
|
||||
|
|
@ -150,7 +150,8 @@ Getting the card going
|
|||
|
||||
The frontend module sp887x.o, requires an external firmware.
|
||||
Please use the command "get_dvb_firmware sp887x" to download
|
||||
it. Then copy it to /usr/lib/hotplug/firmware.
|
||||
it. Then copy it to /usr/lib/hotplug/firmware or /lib/firmware/
|
||||
(depending on configuration of firmware hotplug).
|
||||
|
||||
Receiving DVB-T in Australia
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ use IO::Handle;
|
|||
|
||||
@components = ( "sp8870", "sp887x", "tda10045", "tda10046", "av7110", "dec2000t",
|
||||
"dec2540t", "dec3000s", "vp7041", "dibusb", "nxt2002", "nxt2004",
|
||||
"or51211", "or51132_qam", "or51132_vsb");
|
||||
"or51211", "or51132_qam", "or51132_vsb", "bluebird");
|
||||
|
||||
# Check args
|
||||
syntax() if (scalar(@ARGV) != 1);
|
||||
|
@ -34,7 +34,11 @@ for ($i=0; $i < scalar(@components); $i++) {
|
|||
if ($cid eq $components[$i]) {
|
||||
$outfile = eval($cid);
|
||||
die $@ if $@;
|
||||
print STDERR "Firmware $outfile extracted successfully. Now copy it to either /lib/firmware or /usr/lib/hotplug/firmware/ (depending on your hotplug version).\n";
|
||||
print STDERR <<EOF;
|
||||
Firmware $outfile extracted successfully.
|
||||
Now copy it to either /usr/lib/hotplug/firmware or /lib/firmware
|
||||
(depending on configuration of firmware hotplug).
|
||||
EOF
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
@ -243,7 +247,7 @@ sub nxt2002 {
|
|||
my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1);
|
||||
|
||||
checkstandard();
|
||||
|
||||
|
||||
wgetfile($sourcefile, $url);
|
||||
unzip($sourcefile, $tmpdir);
|
||||
verify("$tmpdir/SkyNETU.sys", $hash);
|
||||
|
@ -308,6 +312,19 @@ sub or51132_vsb {
|
|||
$fwfile;
|
||||
}
|
||||
|
||||
sub bluebird {
|
||||
my $url = "http://www.linuxtv.org/download/dvb/firmware/dvb-usb-bluebird-01.fw";
|
||||
my $outfile = "dvb-usb-bluebird-01.fw";
|
||||
my $hash = "658397cb9eba9101af9031302671f49d";
|
||||
|
||||
checkstandard();
|
||||
|
||||
wgetfile($outfile, $url);
|
||||
verify($outfile,$hash);
|
||||
|
||||
$outfile;
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Utilities
|
||||
|
||||
|
|
|
@ -41,4 +41,5 @@ Hotplug Firmware Loading for 2.6 kernels
|
|||
For 2.6 kernels the firmware is loaded at the point that the driver module is
|
||||
loaded. See linux/Documentation/dvb/firmware.txt for more information.
|
||||
|
||||
Copy the three files downloaded above into the /usr/lib/hotplug/firmware directory.
|
||||
Copy the three files downloaded above into the /usr/lib/hotplug/firmware or
|
||||
/lib/firmware directory (depending on configuration of firmware hotplug).
|
||||
|
|
|
@ -11,4 +11,3 @@ Untested features
|
|||
|
||||
All LCD stuff is untested. If it worked in tridentfb, it should work in
|
||||
cyblafb. Please test and report the results to Knut_Petersen@t-online.de.
|
||||
|
||||
|
|
|
@ -14,142 +14,141 @@
|
|||
#
|
||||
|
||||
mode "640x480-50"
|
||||
geometry 640 480 640 3756 8
|
||||
geometry 640 480 2048 4096 8
|
||||
timings 47619 4294967256 24 17 0 216 3
|
||||
endmode
|
||||
|
||||
mode "640x480-60"
|
||||
geometry 640 480 640 3756 8
|
||||
geometry 640 480 2048 4096 8
|
||||
timings 39682 4294967256 24 17 0 216 3
|
||||
endmode
|
||||
|
||||
mode "640x480-70"
|
||||
geometry 640 480 640 3756 8
|
||||
geometry 640 480 2048 4096 8
|
||||
timings 34013 4294967256 24 17 0 216 3
|
||||
endmode
|
||||
|
||||
mode "640x480-72"
|
||||
geometry 640 480 640 3756 8
|
||||
geometry 640 480 2048 4096 8
|
||||
timings 33068 4294967256 24 17 0 216 3
|
||||
endmode
|
||||
|
||||
mode "640x480-75"
|
||||
geometry 640 480 640 3756 8
|
||||
geometry 640 480 2048 4096 8
|
||||
timings 31746 4294967256 24 17 0 216 3
|
||||
endmode
|
||||
|
||||
mode "640x480-80"
|
||||
geometry 640 480 640 3756 8
|
||||
geometry 640 480 2048 4096 8
|
||||
timings 29761 4294967256 24 17 0 216 3
|
||||
endmode
|
||||
|
||||
mode "640x480-85"
|
||||
geometry 640 480 640 3756 8
|
||||
geometry 640 480 2048 4096 8
|
||||
timings 28011 4294967256 24 17 0 216 3
|
||||
endmode
|
||||
|
||||
mode "800x600-50"
|
||||
geometry 800 600 800 3221 8
|
||||
geometry 800 600 2048 4096 8
|
||||
timings 30303 96 24 14 0 136 11
|
||||
endmode
|
||||
|
||||
mode "800x600-60"
|
||||
geometry 800 600 800 3221 8
|
||||
geometry 800 600 2048 4096 8
|
||||
timings 25252 96 24 14 0 136 11
|
||||
endmode
|
||||
|
||||
mode "800x600-70"
|
||||
geometry 800 600 800 3221 8
|
||||
geometry 800 600 2048 4096 8
|
||||
timings 21645 96 24 14 0 136 11
|
||||
endmode
|
||||
|
||||
mode "800x600-72"
|
||||
geometry 800 600 800 3221 8
|
||||
geometry 800 600 2048 4096 8
|
||||
timings 21043 96 24 14 0 136 11
|
||||
endmode
|
||||
|
||||
mode "800x600-75"
|
||||
geometry 800 600 800 3221 8
|
||||
geometry 800 600 2048 4096 8
|
||||
timings 20202 96 24 14 0 136 11
|
||||
endmode
|
||||
|
||||
mode "800x600-80"
|
||||
geometry 800 600 800 3221 8
|
||||
geometry 800 600 2048 4096 8
|
||||
timings 18939 96 24 14 0 136 11
|
||||
endmode
|
||||
|
||||
mode "800x600-85"
|
||||
geometry 800 600 800 3221 8
|
||||
geometry 800 600 2048 4096 8
|
||||
timings 17825 96 24 14 0 136 11
|
||||
endmode
|
||||
|
||||
mode "1024x768-50"
|
||||
geometry 1024 768 1024 2815 8
|
||||
geometry 1024 768 2048 4096 8
|
||||
timings 19054 144 24 29 0 120 3
|
||||
endmode
|
||||
|
||||
mode "1024x768-60"
|
||||
geometry 1024 768 1024 2815 8
|
||||
geometry 1024 768 2048 4096 8
|
||||
timings 15880 144 24 29 0 120 3
|
||||
endmode
|
||||
|
||||
mode "1024x768-70"
|
||||
geometry 1024 768 1024 2815 8
|
||||
geometry 1024 768 2048 4096 8
|
||||
timings 13610 144 24 29 0 120 3
|
||||
endmode
|
||||
|
||||
mode "1024x768-72"
|
||||
geometry 1024 768 1024 2815 8
|
||||
geometry 1024 768 2048 4096 8
|
||||
timings 13232 144 24 29 0 120 3
|
||||
endmode
|
||||
|
||||
mode "1024x768-75"
|
||||
geometry 1024 768 1024 2815 8
|
||||
geometry 1024 768 2048 4096 8
|
||||
timings 12703 144 24 29 0 120 3
|
||||
endmode
|
||||
|
||||
mode "1024x768-80"
|
||||
geometry 1024 768 1024 2815 8
|
||||
geometry 1024 768 2048 4096 8
|
||||
timings 11910 144 24 29 0 120 3
|
||||
endmode
|
||||
|
||||
mode "1024x768-85"
|
||||
geometry 1024 768 1024 2815 8
|
||||
geometry 1024 768 2048 4096 8
|
||||
timings 11209 144 24 29 0 120 3
|
||||
endmode
|
||||
|
||||
mode "1280x1024-50"
|
||||
geometry 1280 1024 1280 2662 8
|
||||
geometry 1280 1024 2048 4096 8
|
||||
timings 11114 232 16 39 0 160 3
|
||||
endmode
|
||||
|
||||
mode "1280x1024-60"
|
||||
geometry 1280 1024 1280 2662 8
|
||||
geometry 1280 1024 2048 4096 8
|
||||
timings 9262 232 16 39 0 160 3
|
||||
endmode
|
||||
|
||||
mode "1280x1024-70"
|
||||
geometry 1280 1024 1280 2662 8
|
||||
geometry 1280 1024 2048 4096 8
|
||||
timings 7939 232 16 39 0 160 3
|
||||
endmode
|
||||
|
||||
mode "1280x1024-72"
|
||||
geometry 1280 1024 1280 2662 8
|
||||
geometry 1280 1024 2048 4096 8
|
||||
timings 7719 232 16 39 0 160 3
|
||||
endmode
|
||||
|
||||
mode "1280x1024-75"
|
||||
geometry 1280 1024 1280 2662 8
|
||||
geometry 1280 1024 2048 4096 8
|
||||
timings 7410 232 16 39 0 160 3
|
||||
endmode
|
||||
|
||||
mode "1280x1024-80"
|
||||
geometry 1280 1024 1280 2662 8
|
||||
geometry 1280 1024 2048 4096 8
|
||||
timings 6946 232 16 39 0 160 3
|
||||
endmode
|
||||
|
||||
mode "1280x1024-85"
|
||||
geometry 1280 1024 1280 2662 8
|
||||
geometry 1280 1024 2048 4096 8
|
||||
timings 6538 232 16 39 0 160 3
|
||||
endmode
|
||||
|
||||
|
|
|
@ -77,4 +77,3 @@ patch that speeds up kernel bitblitting a lot ( > 20%).
|
|||
| | | | |
|
||||
| | | | |
|
||||
+-----------+-----------------+-----------------+-----------------+
|
||||
|
||||
|
|
|
@ -22,11 +22,10 @@ accelerated color blitting Who needs it? The console driver does use color
|
|||
everything else is done using color expanding
|
||||
blitting of 1bpp character bitmaps.
|
||||
|
||||
xpanning Who needs it?
|
||||
|
||||
ioctls Who needs it?
|
||||
|
||||
TV-out Will be done later
|
||||
TV-out Will be done later. Use "vga= " at boot time
|
||||
to set a suitable video mode.
|
||||
|
||||
??? Feel free to contact me if you have any
|
||||
feature requests
|
||||
|
|
|
@ -40,6 +40,16 @@ Selecting Modes
|
|||
None of the modes possible to select as startup modes are affected by
|
||||
the problems described at the end of the next subsection.
|
||||
|
||||
For all startup modes cyblafb chooses a virtual x resolution of 2048,
|
||||
the only exception is mode 1280x1024 in combination with 32 bpp. This
|
||||
allows ywrap scrolling for all those modes if rotation is 0 or 2, and
|
||||
also fast scrolling if rotation is 1 or 3. The default virtual y reso-
|
||||
lution is 4096 for bpp == 8, 2048 for bpp==16 and 1024 for bpp == 32,
|
||||
again with the only exception of 1280x1024 at 32 bpp.
|
||||
|
||||
Please do set your video memory size to 8 Mb in the Bios setup. Other
|
||||
values will work, but performace is decreased for a lot of modes.
|
||||
|
||||
Mode changes using fbset
|
||||
========================
|
||||
|
||||
|
@ -54,20 +64,26 @@ Selecting Modes
|
|||
- if a flat panel is found, cyblafb does not allow you
|
||||
to program a resolution higher than the physical
|
||||
resolution of the flat panel monitor
|
||||
- cyblafb does not allow xres to differ from xres_virtual
|
||||
- cyblafb does not allow vclk to exceed 230 MHz. As 32 bpp
|
||||
and (currently) 24 bit modes use a doubled vclk internally,
|
||||
the dotclock limit as seen by fbset is 115 MHz for those
|
||||
modes and 230 MHz for 8 and 16 bpp modes.
|
||||
- cyblafb will allow you to select very high resolutions as
|
||||
long as the hardware can be programmed to these modes. The
|
||||
documented limit 1600x1200 is not enforced, but don't expect
|
||||
perfect signal quality.
|
||||
|
||||
Any request that violates the rules given above will be ignored and
|
||||
fbset will return an error.
|
||||
Any request that violates the rules given above will be either changed
|
||||
to something the hardware supports or an error value will be returned.
|
||||
|
||||
If you program a virtual y resolution higher than the hardware limit,
|
||||
cyblafb will silently decrease that value to the highest possible
|
||||
value.
|
||||
value. The same is true for a virtual x resolution that is not
|
||||
supported by the hardware. Cyblafb tries to adapt vyres first because
|
||||
vxres decides if ywrap scrolling is possible or not.
|
||||
|
||||
Attempts to disable acceleration are ignored.
|
||||
Attempts to disable acceleration are ignored, I believe that this is
|
||||
safe.
|
||||
|
||||
Some video modes that should work do not work as expected. If you use
|
||||
the standard fb.modes, fbset 640x480-60 will program that mode, but
|
||||
|
@ -129,10 +145,6 @@ mode 640x480 or 800x600 or 1024x768 or 1280x1024
|
|||
verbosity 0 is the default, increase to at least 2 for every
|
||||
bug report!
|
||||
|
||||
vesafb allows cyblafb to be loaded after vesafb has been
|
||||
loaded. See sections "Module unloading ...".
|
||||
|
||||
|
||||
Development hints
|
||||
=================
|
||||
|
||||
|
@ -195,7 +207,7 @@ a graphics mode.
|
|||
After booting, load cyblafb without any mode and bpp parameter and assign
|
||||
cyblafb to individual ttys using con2fb, e.g.:
|
||||
|
||||
modprobe cyblafb vesafb=1
|
||||
modprobe cyblafb
|
||||
con2fb /dev/fb1 /dev/tty1
|
||||
|
||||
Unloading cyblafb works without problems after you assign vesafb to all
|
||||
|
@ -203,4 +215,3 @@ ttys again, e.g.:
|
|||
|
||||
con2fb /dev/fb0 /dev/tty1
|
||||
rmmod cyblafb
|
||||
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
0.62
|
||||
====
|
||||
|
||||
- the vesafb parameter has been removed as I decided to allow the
|
||||
feature without any special parameter.
|
||||
|
||||
- Cyblafb does not use the vga style of panning any longer, now the
|
||||
"right view" register in the graphics engine IO space is used. Without
|
||||
that change it was impossible to use all available memory, and without
|
||||
access to all available memory it is impossible to ywrap.
|
||||
|
||||
- The imageblit function now uses hardware acceleration for all font
|
||||
widths. Hardware blitting across pixel column 2048 is broken in the
|
||||
cyberblade/i1 graphics core, but we work around that hardware bug.
|
||||
|
||||
- modes with vxres != xres are supported now.
|
||||
|
||||
- ywrap scrolling is supported now and the default. This is a big
|
||||
performance gain.
|
||||
|
||||
- default video modes use vyres > yres and vxres > xres to allow
|
||||
almost optimal scrolling speed for normal and rotated screens
|
||||
|
||||
- some features mainly usefull for debugging the upper layers of the
|
||||
framebuffer system have been added, have a look at the code
|
||||
|
||||
- fixed: Oops after unloading cyblafb when reading /proc/io*
|
||||
|
||||
- we work around some bugs of the higher framebuffer layers.
|
|
@ -123,6 +123,15 @@ Who: Christoph Hellwig <hch@lst.de>
|
|||
|
||||
---------------------------
|
||||
|
||||
What: CONFIG_FORCED_INLINING
|
||||
When: June 2006
|
||||
Why: Config option is there to see if gcc is good enough. (in january
|
||||
2006). If it is, the behavior should just be the default. If it's not,
|
||||
the option should just go away entirely.
|
||||
Who: Arjan van de Ven
|
||||
|
||||
---------------------------
|
||||
|
||||
What: START_ARRAY ioctl for md
|
||||
When: July 2006
|
||||
Files: drivers/md/md.c
|
||||
|
|
|
@ -2,11 +2,11 @@
|
|||
Ext3 Filesystem
|
||||
===============
|
||||
|
||||
ext3 was originally released in September 1999. Written by Stephen Tweedie
|
||||
for 2.2 branch, and ported to 2.4 kernels by Peter Braam, Andreas Dilger,
|
||||
Ext3 was originally released in September 1999. Written by Stephen Tweedie
|
||||
for the 2.2 branch, and ported to 2.4 kernels by Peter Braam, Andreas Dilger,
|
||||
Andrew Morton, Alexander Viro, Ted Ts'o and Stephen Tweedie.
|
||||
|
||||
ext3 is ext2 filesystem enhanced with journalling capabilities.
|
||||
Ext3 is the ext2 filesystem enhanced with journalling capabilities.
|
||||
|
||||
Options
|
||||
=======
|
||||
|
@ -14,76 +14,81 @@ Options
|
|||
When mounting an ext3 filesystem, the following option are accepted:
|
||||
(*) == default
|
||||
|
||||
jounal=update Update the ext3 file system's journal to the
|
||||
current format.
|
||||
journal=update Update the ext3 file system's journal to the current
|
||||
format.
|
||||
|
||||
journal=inum When a journal already exists, this option is
|
||||
ignored. Otherwise, it specifies the number of
|
||||
the inode which will represent the ext3 file
|
||||
system's journal file.
|
||||
journal=inum When a journal already exists, this option is ignored.
|
||||
Otherwise, it specifies the number of the inode which
|
||||
will represent the ext3 file system's journal file.
|
||||
|
||||
journal_dev=devnum When the external journal device's major/minor numbers
|
||||
have changed, this option allows the user to specify
|
||||
the new journal location. The journal device is
|
||||
identified through its new major/minor numbers encoded
|
||||
in devnum.
|
||||
|
||||
noload Don't load the journal on mounting.
|
||||
|
||||
data=journal All data are committed into the journal prior
|
||||
to being written into the main file system.
|
||||
data=journal All data are committed into the journal prior to being
|
||||
written into the main file system.
|
||||
|
||||
data=ordered (*) All data are forced directly out to the main file
|
||||
system prior to its metadata being committed to
|
||||
the journal.
|
||||
system prior to its metadata being committed to the
|
||||
journal.
|
||||
|
||||
data=writeback Data ordering is not preserved, data may be
|
||||
written into the main file system after its
|
||||
metadata has been committed to the journal.
|
||||
data=writeback Data ordering is not preserved, data may be written
|
||||
into the main file system after its metadata has been
|
||||
committed to the journal.
|
||||
|
||||
commit=nrsec (*) Ext3 can be told to sync all its data and metadata
|
||||
every 'nrsec' seconds. The default value is 5 seconds.
|
||||
This means that if you lose your power, you will lose,
|
||||
as much, the latest 5 seconds of work (your filesystem
|
||||
will not be damaged though, thanks to journaling). This
|
||||
default value (or any low value) will hurt performance,
|
||||
but it's good for data-safety. Setting it to 0 will
|
||||
have the same effect than leaving the default 5 sec.
|
||||
This means that if you lose your power, you will lose
|
||||
as much as the latest 5 seconds of work (your
|
||||
filesystem will not be damaged though, thanks to the
|
||||
journaling). This default value (or any low value)
|
||||
will hurt performance, but it's good for data-safety.
|
||||
Setting it to 0 will have the same effect as leaving
|
||||
it at the default (5 seconds).
|
||||
Setting it to very large values will improve
|
||||
performance.
|
||||
|
||||
barrier=1 This enables/disables barriers. barrier=0 disables it,
|
||||
barrier=1 enables it.
|
||||
barrier=1 This enables/disables barriers. barrier=0 disables
|
||||
it, barrier=1 enables it.
|
||||
|
||||
orlov (*) This enables the new Orlov block allocator. It's enabled
|
||||
by default.
|
||||
orlov (*) This enables the new Orlov block allocator. It is
|
||||
enabled by default.
|
||||
|
||||
oldalloc This disables the Orlov block allocator and enables the
|
||||
old block allocator. Orlov should have better performance,
|
||||
we'd like to get some feedback if it's the contrary for
|
||||
you.
|
||||
oldalloc This disables the Orlov block allocator and enables
|
||||
the old block allocator. Orlov should have better
|
||||
performance - we'd like to get some feedback if it's
|
||||
the contrary for you.
|
||||
|
||||
user_xattr Enables Extended User Attributes. Additionally, you need
|
||||
to have extended attribute support enabled in the kernel
|
||||
configuration (CONFIG_EXT3_FS_XATTR). See the attr(5)
|
||||
manual page and http://acl.bestbits.at to learn more
|
||||
about extended attributes.
|
||||
user_xattr Enables Extended User Attributes. Additionally, you
|
||||
need to have extended attribute support enabled in the
|
||||
kernel configuration (CONFIG_EXT3_FS_XATTR). See the
|
||||
attr(5) manual page and http://acl.bestbits.at/ to
|
||||
learn more about extended attributes.
|
||||
|
||||
nouser_xattr Disables Extended User Attributes.
|
||||
|
||||
acl Enables POSIX Access Control Lists support. Additionally,
|
||||
you need to have ACL support enabled in the kernel
|
||||
configuration (CONFIG_EXT3_FS_POSIX_ACL). See the acl(5)
|
||||
manual page and http://acl.bestbits.at for more
|
||||
information.
|
||||
acl Enables POSIX Access Control Lists support.
|
||||
Additionally, you need to have ACL support enabled in
|
||||
the kernel configuration (CONFIG_EXT3_FS_POSIX_ACL).
|
||||
See the acl(5) manual page and http://acl.bestbits.at/
|
||||
for more information.
|
||||
|
||||
noacl This option disables POSIX Access Control List support.
|
||||
noacl This option disables POSIX Access Control List
|
||||
support.
|
||||
|
||||
reservation
|
||||
|
||||
noreservation
|
||||
|
||||
resize=
|
||||
|
||||
bsddf (*) Make 'df' act like BSD.
|
||||
minixdf Make 'df' act like Minix.
|
||||
|
||||
check=none Don't do extra checking of bitmaps on mount.
|
||||
nocheck
|
||||
nocheck
|
||||
|
||||
debug Extra debugging information is sent to syslog.
|
||||
|
||||
|
@ -92,7 +97,7 @@ errors=continue Keep going on a filesystem error.
|
|||
errors=panic Panic and halt the machine if an error occurs.
|
||||
|
||||
grpid Give objects the same group ID as their creator.
|
||||
bsdgroups
|
||||
bsdgroups
|
||||
|
||||
nogrpid (*) New objects have the group ID of their creator.
|
||||
sysvgroups
|
||||
|
@ -103,81 +108,83 @@ resuid=n The user ID which may use the reserved blocks.
|
|||
|
||||
sb=n Use alternate superblock at this location.
|
||||
|
||||
quota Quota options are currently silently ignored.
|
||||
noquota (see fs/ext3/super.c, line 594)
|
||||
quota
|
||||
noquota
|
||||
grpquota
|
||||
usrquota
|
||||
|
||||
|
||||
Specification
|
||||
=============
|
||||
ext3 shares all disk implementation with ext2 filesystem, and add
|
||||
transactions capabilities to ext2. Journaling is done by the
|
||||
Journaling block device layer.
|
||||
Ext3 shares all disk implementation with the ext2 filesystem, and adds
|
||||
transactions capabilities to ext2. Journaling is done by the Journaling Block
|
||||
Device layer.
|
||||
|
||||
Journaling Block Device layer
|
||||
-----------------------------
|
||||
The Journaling Block Device layer (JBD) isn't ext3 specific. It was
|
||||
design to add journaling capabilities on a block device. The ext3
|
||||
filesystem code will inform the JBD of modifications it is performing
|
||||
(Call a transaction). the journal support the transactions start and
|
||||
stop, and in case of crash, the journal can replayed the transactions
|
||||
to put the partition on a consistent state fastly.
|
||||
The Journaling Block Device layer (JBD) isn't ext3 specific. It was design to
|
||||
add journaling capabilities on a block device. The ext3 filesystem code will
|
||||
inform the JBD of modifications it is performing (called a transaction). The
|
||||
journal supports the transactions start and stop, and in case of crash, the
|
||||
journal can replayed the transactions to put the partition back in a
|
||||
consistent state fast.
|
||||
|
||||
handles represent a single atomic update to a filesystem. JBD can
|
||||
handle external journal on a block device.
|
||||
Handles represent a single atomic update to a filesystem. JBD can handle an
|
||||
external journal on a block device.
|
||||
|
||||
Data Mode
|
||||
---------
|
||||
There's 3 different data modes:
|
||||
There are 3 different data modes:
|
||||
|
||||
* writeback mode
|
||||
In data=writeback mode, ext3 does not journal data at all. This mode
|
||||
provides a similar level of journaling as XFS, JFS, and ReiserFS in its
|
||||
default mode - metadata journaling. A crash+recovery can cause
|
||||
incorrect data to appear in files which were written shortly before the
|
||||
crash. This mode will typically provide the best ext3 performance.
|
||||
In data=writeback mode, ext3 does not journal data at all. This mode provides
|
||||
a similar level of journaling as that of XFS, JFS, and ReiserFS in its default
|
||||
mode - metadata journaling. A crash+recovery can cause incorrect data to
|
||||
appear in files which were written shortly before the crash. This mode will
|
||||
typically provide the best ext3 performance.
|
||||
|
||||
* ordered mode
|
||||
In data=ordered mode, ext3 only officially journals metadata, but it
|
||||
logically groups metadata and data blocks into a single unit called a
|
||||
transaction. When it's time to write the new metadata out to disk, the
|
||||
associated data blocks are written first. In general, this mode
|
||||
perform slightly slower than writeback but significantly faster than
|
||||
journal mode.
|
||||
In data=ordered mode, ext3 only officially journals metadata, but it logically
|
||||
groups metadata and data blocks into a single unit called a transaction. When
|
||||
it's time to write the new metadata out to disk, the associated data blocks
|
||||
are written first. In general, this mode performs slightly slower than
|
||||
writeback but significantly faster than journal mode.
|
||||
|
||||
* journal mode
|
||||
data=journal mode provides full data and metadata journaling. All new
|
||||
data is written to the journal first, and then to its final location.
|
||||
In the event of a crash, the journal can be replayed, bringing both
|
||||
data and metadata into a consistent state. This mode is the slowest
|
||||
except when data needs to be read from and written to disk at the same
|
||||
time where it outperform all others mode.
|
||||
data=journal mode provides full data and metadata journaling. All new data is
|
||||
written to the journal first, and then to its final location.
|
||||
In the event of a crash, the journal can be replayed, bringing both data and
|
||||
metadata into a consistent state. This mode is the slowest except when data
|
||||
needs to be read from and written to disk at the same time where it
|
||||
outperforms all others modes.
|
||||
|
||||
Compatibility
|
||||
-------------
|
||||
|
||||
Ext2 partitions can be easily convert to ext3, with `tune2fs -j <dev>`.
|
||||
Ext3 is fully compatible with Ext2. Ext3 partitions can easily be
|
||||
mounted as Ext2.
|
||||
Ext3 is fully compatible with Ext2. Ext3 partitions can easily be mounted as
|
||||
Ext2.
|
||||
|
||||
|
||||
External Tools
|
||||
==============
|
||||
see manual pages to know more.
|
||||
See manual pages to learn more.
|
||||
|
||||
tune2fs: create a ext3 journal on a ext2 partition with the -j flag.
|
||||
mke2fs: create a ext3 partition with the -j flag.
|
||||
debugfs: ext2 and ext3 file system debugger.
|
||||
ext2online: online (mounted) ext2 and ext3 filesystem resizer
|
||||
|
||||
tune2fs: create a ext3 journal on a ext2 partition with the -j flags
|
||||
mke2fs: create a ext3 partition with the -j flags
|
||||
debugfs: ext2 and ext3 file system debugger
|
||||
|
||||
References
|
||||
==========
|
||||
|
||||
kernel source: file:/usr/src/linux/fs/ext3
|
||||
file:/usr/src/linux/fs/jbd
|
||||
kernel source: <file:fs/ext3/>
|
||||
<file:fs/jbd/>
|
||||
|
||||
programs: http://e2fsprogs.sourceforge.net
|
||||
programs: http://e2fsprogs.sourceforge.net/
|
||||
http://ext2resize.sourceforge.net
|
||||
|
||||
useful link:
|
||||
http://www.zip.com.au/~akpm/linux/ext3/ext3-usage.html
|
||||
useful links: http://www.zip.com.au/~akpm/linux/ext3/ext3-usage.html
|
||||
http://www-106.ibm.com/developerworks/linux/library/l-fs7/
|
||||
http://www-106.ibm.com/developerworks/linux/library/l-fs8/
|
||||
|
|
|
@ -86,6 +86,62 @@ Mount options
|
|||
The default is infinite. Note that the size of read requests is
|
||||
limited anyway to 32 pages (which is 128kbyte on i386).
|
||||
|
||||
Sysfs
|
||||
~~~~~
|
||||
|
||||
FUSE sets up the following hierarchy in sysfs:
|
||||
|
||||
/sys/fs/fuse/connections/N/
|
||||
|
||||
where N is an increasing number allocated to each new connection.
|
||||
|
||||
For each connection the following attributes are defined:
|
||||
|
||||
'waiting'
|
||||
|
||||
The number of requests which are waiting to be transfered to
|
||||
userspace or being processed by the filesystem daemon. If there is
|
||||
no filesystem activity and 'waiting' is non-zero, then the
|
||||
filesystem is hung or deadlocked.
|
||||
|
||||
'abort'
|
||||
|
||||
Writing anything into this file will abort the filesystem
|
||||
connection. This means that all waiting requests will be aborted an
|
||||
error returned for all aborted and new requests.
|
||||
|
||||
Only a privileged user may read or write these attributes.
|
||||
|
||||
Aborting a filesystem connection
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
It is possible to get into certain situations where the filesystem is
|
||||
not responding. Reasons for this may be:
|
||||
|
||||
a) Broken userspace filesystem implementation
|
||||
|
||||
b) Network connection down
|
||||
|
||||
c) Accidental deadlock
|
||||
|
||||
d) Malicious deadlock
|
||||
|
||||
(For more on c) and d) see later sections)
|
||||
|
||||
In either of these cases it may be useful to abort the connection to
|
||||
the filesystem. There are several ways to do this:
|
||||
|
||||
- Kill the filesystem daemon. Works in case of a) and b)
|
||||
|
||||
- Kill the filesystem daemon and all users of the filesystem. Works
|
||||
in all cases except some malicious deadlocks
|
||||
|
||||
- Use forced umount (umount -f). Works in all cases but only if
|
||||
filesystem is still attached (it hasn't been lazy unmounted)
|
||||
|
||||
- Abort filesystem through the sysfs interface. Most powerful
|
||||
method, always works.
|
||||
|
||||
How do non-privileged mounts work?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|
@ -313,3 +369,10 @@ faulted with get_user_pages(). The 'req->locked' flag indicates
|
|||
when the copy is taking place, and interruption is delayed until
|
||||
this flag is unset.
|
||||
|
||||
Scenario 3 - Tricky deadlock with asynchronous read
|
||||
---------------------------------------------------
|
||||
|
||||
The same situation as above, except thread-1 will wait on page lock
|
||||
and hence it will be uninterruptible as well. The solution is to
|
||||
abort the connection with forced umount (if mount is attached) or
|
||||
through the abort attribute in sysfs.
|
||||
|
|
|
@ -418,7 +418,7 @@ VmallocChunk: 111088 kB
|
|||
Dirty: Memory which is waiting to get written back to the disk
|
||||
Writeback: Memory which is actively being written back to the disk
|
||||
Mapped: files which have been mmaped, such as libraries
|
||||
Slab: in-kernel data structures cache
|
||||
Slab: in-kernel data structures cache
|
||||
CommitLimit: Based on the overcommit ratio ('vm.overcommit_ratio'),
|
||||
this is the total amount of memory currently available to
|
||||
be allocated on the system. This limit is only adhered to
|
||||
|
@ -1302,6 +1302,23 @@ VM has token based thrashing control mechanism and uses the token to prevent
|
|||
unnecessary page faults in thrashing situation. The unit of the value is
|
||||
second. The value would be useful to tune thrashing behavior.
|
||||
|
||||
drop_caches
|
||||
-----------
|
||||
|
||||
Writing to this will cause the kernel to drop clean caches, dentries and
|
||||
inodes from memory, causing that memory to become free.
|
||||
|
||||
To free pagecache:
|
||||
echo 1 > /proc/sys/vm/drop_caches
|
||||
To free dentries and inodes:
|
||||
echo 2 > /proc/sys/vm/drop_caches
|
||||
To free pagecache, dentries and inodes:
|
||||
echo 3 > /proc/sys/vm/drop_caches
|
||||
|
||||
As this is a non-destructive operation and dirty objects are not freeable, the
|
||||
user should run `sync' first.
|
||||
|
||||
|
||||
2.5 /proc/sys/dev - Device specific parameters
|
||||
----------------------------------------------
|
||||
|
||||
|
|
|
@ -143,12 +143,26 @@ as the following example:
|
|||
dir /mnt 755 0 0
|
||||
file /init initramfs/init.sh 755 0 0
|
||||
|
||||
Run "usr/gen_init_cpio" (after the kernel build) to get a usage message
|
||||
documenting the above file format.
|
||||
|
||||
One advantage of the text file is that root access is not required to
|
||||
set permissions or create device nodes in the new archive. (Note that those
|
||||
two example "file" entries expect to find files named "init.sh" and "busybox" in
|
||||
a directory called "initramfs", under the linux-2.6.* directory. See
|
||||
Documentation/early-userspace/README for more details.)
|
||||
|
||||
The kernel does not depend on external cpio tools, gen_init_cpio is created
|
||||
from usr/gen_init_cpio.c which is entirely self-contained, and the kernel's
|
||||
boot-time extractor is also (obviously) self-contained. However, if you _do_
|
||||
happen to have cpio installed, the following command line can extract the
|
||||
generated cpio image back into its component files:
|
||||
|
||||
cpio -i -d -H newc -F initramfs_data.cpio --no-absolute-filenames
|
||||
|
||||
Contents of initramfs:
|
||||
----------------------
|
||||
|
||||
If you don't already understand what shared libraries, devices, and paths
|
||||
you need to get a minimal root filesystem up and running, here are some
|
||||
references:
|
||||
|
@ -161,13 +175,69 @@ designed to be a tiny C library to statically link early userspace
|
|||
code against, along with some related utilities. It is BSD licensed.
|
||||
|
||||
I use uClibc (http://www.uclibc.org) and busybox (http://www.busybox.net)
|
||||
myself. These are LGPL and GPL, respectively.
|
||||
myself. These are LGPL and GPL, respectively. (A self-contained initramfs
|
||||
package is planned for the busybox 1.2 release.)
|
||||
|
||||
In theory you could use glibc, but that's not well suited for small embedded
|
||||
uses like this. (A "hello world" program statically linked against glibc is
|
||||
over 400k. With uClibc it's 7k. Also note that glibc dlopens libnss to do
|
||||
name lookups, even when otherwise statically linked.)
|
||||
|
||||
Why cpio rather than tar?
|
||||
-------------------------
|
||||
|
||||
This decision was made back in December, 2001. The discussion started here:
|
||||
|
||||
http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1538.html
|
||||
|
||||
And spawned a second thread (specifically on tar vs cpio), starting here:
|
||||
|
||||
http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1587.html
|
||||
|
||||
The quick and dirty summary version (which is no substitute for reading
|
||||
the above threads) is:
|
||||
|
||||
1) cpio is a standard. It's decades old (from the AT&T days), and already
|
||||
widely used on Linux (inside RPM, Red Hat's device driver disks). Here's
|
||||
a Linux Journal article about it from 1996:
|
||||
|
||||
http://www.linuxjournal.com/article/1213
|
||||
|
||||
It's not as popular as tar because the traditional cpio command line tools
|
||||
require _truly_hideous_ command line arguments. But that says nothing
|
||||
either way about the archive format, and there are alternative tools,
|
||||
such as:
|
||||
|
||||
http://freshmeat.net/projects/afio/
|
||||
|
||||
2) The cpio archive format chosen by the kernel is simpler and cleaner (and
|
||||
thus easier to create and parse) than any of the (literally dozens of)
|
||||
various tar archive formats. The complete initramfs archive format is
|
||||
explained in buffer-format.txt, created in usr/gen_init_cpio.c, and
|
||||
extracted in init/initramfs.c. All three together come to less than 26k
|
||||
total of human-readable text.
|
||||
|
||||
3) The GNU project standardizing on tar is approximately as relevant as
|
||||
Windows standardizing on zip. Linux is not part of either, and is free
|
||||
to make its own technical decisions.
|
||||
|
||||
4) Since this is a kernel internal format, it could easily have been
|
||||
something brand new. The kernel provides its own tools to create and
|
||||
extract this format anyway. Using an existing standard was preferable,
|
||||
but not essential.
|
||||
|
||||
5) Al Viro made the decision (quote: "tar is ugly as hell and not going to be
|
||||
supported on the kernel side"):
|
||||
|
||||
http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1540.html
|
||||
|
||||
explained his reasoning:
|
||||
|
||||
http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1550.html
|
||||
http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1638.html
|
||||
|
||||
and, most importantly, designed and implemented the initramfs code.
|
||||
|
||||
Future directions:
|
||||
------------------
|
||||
|
||||
|
|
|
@ -44,30 +44,41 @@ relayfs can operate in a mode where it will overwrite data not yet
|
|||
collected by userspace, and not wait for it to consume it.
|
||||
|
||||
relayfs itself does not provide for communication of such data between
|
||||
userspace and kernel, allowing the kernel side to remain simple and not
|
||||
impose a single interface on userspace. It does provide a separate
|
||||
helper though, described below.
|
||||
userspace and kernel, allowing the kernel side to remain simple and
|
||||
not impose a single interface on userspace. It does provide a set of
|
||||
examples and a separate helper though, described below.
|
||||
|
||||
klog, relay-app & librelay
|
||||
==========================
|
||||
klog and relay-apps example code
|
||||
================================
|
||||
|
||||
relayfs itself is ready to use, but to make things easier, two
|
||||
additional systems are provided. klog is a simple wrapper to make
|
||||
writing formatted text or raw data to a channel simpler, regardless of
|
||||
whether a channel to write into exists or not, or whether relayfs is
|
||||
compiled into the kernel or is configured as a module. relay-app is
|
||||
the kernel counterpart of userspace librelay.c, combined these two
|
||||
files provide glue to easily stream data to disk, without having to
|
||||
bother with housekeeping. klog and relay-app can be used together,
|
||||
with klog providing high-level logging functions to the kernel and
|
||||
relay-app taking care of kernel-user control and disk-logging chores.
|
||||
relayfs itself is ready to use, but to make things easier, a couple
|
||||
simple utility functions and a set of examples are provided.
|
||||
|
||||
It is possible to use relayfs without relay-app & librelay, but you'll
|
||||
have to implement communication between userspace and kernel, allowing
|
||||
both to convey the state of buffers (full, empty, amount of padding).
|
||||
The relay-apps example tarball, available on the relayfs sourceforge
|
||||
site, contains a set of self-contained examples, each consisting of a
|
||||
pair of .c files containing boilerplate code for each of the user and
|
||||
kernel sides of a relayfs application; combined these two sets of
|
||||
boilerplate code provide glue to easily stream data to disk, without
|
||||
having to bother with mundane housekeeping chores.
|
||||
|
||||
The 'klog debugging functions' patch (klog.patch in the relay-apps
|
||||
tarball) provides a couple of high-level logging functions to the
|
||||
kernel which allow writing formatted text or raw data to a channel,
|
||||
regardless of whether a channel to write into exists or not, or
|
||||
whether relayfs is compiled into the kernel or is configured as a
|
||||
module. These functions allow you to put unconditional 'trace'
|
||||
statements anywhere in the kernel or kernel modules; only when there
|
||||
is a 'klog handler' registered will data actually be logged (see the
|
||||
klog and kleak examples for details).
|
||||
|
||||
It is of course possible to use relayfs from scratch i.e. without
|
||||
using any of the relay-apps example code or klog, but you'll have to
|
||||
implement communication between userspace and kernel, allowing both to
|
||||
convey the state of buffers (full, empty, amount of padding).
|
||||
|
||||
klog and the relay-apps examples can be found in the relay-apps
|
||||
tarball on http://relayfs.sourceforge.net
|
||||
|
||||
klog, relay-app and librelay can be found in the relay-apps tarball on
|
||||
http://relayfs.sourceforge.net
|
||||
|
||||
The relayfs user space API
|
||||
==========================
|
||||
|
@ -125,6 +136,8 @@ Here's a summary of the API relayfs provides to in-kernel clients:
|
|||
relay_reset(chan)
|
||||
relayfs_create_dir(name, parent)
|
||||
relayfs_remove_dir(dentry)
|
||||
relayfs_create_file(name, parent, mode, fops, data)
|
||||
relayfs_remove_file(dentry)
|
||||
|
||||
channel management typically called on instigation of userspace:
|
||||
|
||||
|
@ -141,6 +154,8 @@ Here's a summary of the API relayfs provides to in-kernel clients:
|
|||
subbuf_start(buf, subbuf, prev_subbuf, prev_padding)
|
||||
buf_mapped(buf, filp)
|
||||
buf_unmapped(buf, filp)
|
||||
create_buf_file(filename, parent, mode, buf, is_global)
|
||||
remove_buf_file(dentry)
|
||||
|
||||
helper functions:
|
||||
|
||||
|
@ -320,6 +335,71 @@ forces a sub-buffer switch on all the channel buffers, and can be used
|
|||
to finalize and process the last sub-buffers before the channel is
|
||||
closed.
|
||||
|
||||
Creating non-relay files
|
||||
------------------------
|
||||
|
||||
relay_open() automatically creates files in the relayfs filesystem to
|
||||
represent the per-cpu kernel buffers; it's often useful for
|
||||
applications to be able to create their own files alongside the relay
|
||||
files in the relayfs filesystem as well e.g. 'control' files much like
|
||||
those created in /proc or debugfs for similar purposes, used to
|
||||
communicate control information between the kernel and user sides of a
|
||||
relayfs application. For this purpose the relayfs_create_file() and
|
||||
relayfs_remove_file() API functions exist. For relayfs_create_file(),
|
||||
the caller passes in a set of user-defined file operations to be used
|
||||
for the file and an optional void * to a user-specified data item,
|
||||
which will be accessible via inode->u.generic_ip (see the relay-apps
|
||||
tarball for examples). The file_operations are a required parameter
|
||||
to relayfs_create_file() and thus the semantics of these files are
|
||||
completely defined by the caller.
|
||||
|
||||
See the relay-apps tarball at http://relayfs.sourceforge.net for
|
||||
examples of how these non-relay files are meant to be used.
|
||||
|
||||
Creating relay files in other filesystems
|
||||
-----------------------------------------
|
||||
|
||||
By default of course, relay_open() creates relay files in the relayfs
|
||||
filesystem. Because relay_file_operations is exported, however, it's
|
||||
also possible to create and use relay files in other pseudo-filesytems
|
||||
such as debugfs.
|
||||
|
||||
For this purpose, two callback functions are provided,
|
||||
create_buf_file() and remove_buf_file(). create_buf_file() is called
|
||||
once for each per-cpu buffer from relay_open() to allow the client to
|
||||
create a file to be used to represent the corresponding buffer; if
|
||||
this callback is not defined, the default implementation will create
|
||||
and return a file in the relayfs filesystem to represent the buffer.
|
||||
The callback should return the dentry of the file created to represent
|
||||
the relay buffer. Note that the parent directory passed to
|
||||
relay_open() (and passed along to the callback), if specified, must
|
||||
exist in the same filesystem the new relay file is created in. If
|
||||
create_buf_file() is defined, remove_buf_file() must also be defined;
|
||||
it's responsible for deleting the file(s) created in create_buf_file()
|
||||
and is called during relay_close().
|
||||
|
||||
The create_buf_file() implementation can also be defined in such a way
|
||||
as to allow the creation of a single 'global' buffer instead of the
|
||||
default per-cpu set. This can be useful for applications interested
|
||||
mainly in seeing the relative ordering of system-wide events without
|
||||
the need to bother with saving explicit timestamps for the purpose of
|
||||
merging/sorting per-cpu files in a postprocessing step.
|
||||
|
||||
To have relay_open() create a global buffer, the create_buf_file()
|
||||
implementation should set the value of the is_global outparam to a
|
||||
non-zero value in addition to creating the file that will be used to
|
||||
represent the single buffer. In the case of a global buffer,
|
||||
create_buf_file() and remove_buf_file() will be called only once. The
|
||||
normal channel-writing functions e.g. relay_write() can still be used
|
||||
- writes from any cpu will transparently end up in the global buffer -
|
||||
but since it is a global buffer, callers should make sure they use the
|
||||
proper locking for such a buffer, either by wrapping writes in a
|
||||
spinlock, or by copying a write function from relayfs_fs.h and
|
||||
creating a local version that internally does the proper locking.
|
||||
|
||||
See the 'exported-relayfile' examples in the relay-apps tarball for
|
||||
examples of creating and using relay files in debugfs.
|
||||
|
||||
Misc
|
||||
----
|
||||
|
||||
|
|
|
@ -0,0 +1,521 @@
|
|||
SPUFS(2) Linux Programmer's Manual SPUFS(2)
|
||||
|
||||
|
||||
|
||||
NAME
|
||||
spufs - the SPU file system
|
||||
|
||||
|
||||
DESCRIPTION
|
||||
The SPU file system is used on PowerPC machines that implement the Cell
|
||||
Broadband Engine Architecture in order to access Synergistic Processor
|
||||
Units (SPUs).
|
||||
|
||||
The file system provides a name space similar to posix shared memory or
|
||||
message queues. Users that have write permissions on the file system
|
||||
can use spu_create(2) to establish SPU contexts in the spufs root.
|
||||
|
||||
Every SPU context is represented by a directory containing a predefined
|
||||
set of files. These files can be used for manipulating the state of the
|
||||
logical SPU. Users can change permissions on those files, but not actu-
|
||||
ally add or remove files.
|
||||
|
||||
|
||||
MOUNT OPTIONS
|
||||
uid=<uid>
|
||||
set the user owning the mount point, the default is 0 (root).
|
||||
|
||||
gid=<gid>
|
||||
set the group owning the mount point, the default is 0 (root).
|
||||
|
||||
|
||||
FILES
|
||||
The files in spufs mostly follow the standard behavior for regular sys-
|
||||
tem calls like read(2) or write(2), but often support only a subset of
|
||||
the operations supported on regular file systems. This list details the
|
||||
supported operations and the deviations from the behaviour in the
|
||||
respective man pages.
|
||||
|
||||
All files that support the read(2) operation also support readv(2) and
|
||||
all files that support the write(2) operation also support writev(2).
|
||||
All files support the access(2) and stat(2) family of operations, but
|
||||
only the st_mode, st_nlink, st_uid and st_gid fields of struct stat
|
||||
contain reliable information.
|
||||
|
||||
All files support the chmod(2)/fchmod(2) and chown(2)/fchown(2) opera-
|
||||
tions, but will not be able to grant permissions that contradict the
|
||||
possible operations, e.g. read access on the wbox file.
|
||||
|
||||
The current set of files is:
|
||||
|
||||
|
||||
/mem
|
||||
the contents of the local storage memory of the SPU. This can be
|
||||
accessed like a regular shared memory file and contains both code and
|
||||
data in the address space of the SPU. The possible operations on an
|
||||
open mem file are:
|
||||
|
||||
read(2), pread(2), write(2), pwrite(2), lseek(2)
|
||||
These operate as documented, with the exception that seek(2),
|
||||
write(2) and pwrite(2) are not supported beyond the end of the
|
||||
file. The file size is the size of the local storage of the SPU,
|
||||
which normally is 256 kilobytes.
|
||||
|
||||
mmap(2)
|
||||
Mapping mem into the process address space gives access to the
|
||||
SPU local storage within the process address space. Only
|
||||
MAP_SHARED mappings are allowed.
|
||||
|
||||
|
||||
/mbox
|
||||
The first SPU to CPU communication mailbox. This file is read-only and
|
||||
can be read in units of 32 bits. The file can only be used in non-
|
||||
blocking mode and it even poll() will not block on it. The possible
|
||||
operations on an open mbox file are:
|
||||
|
||||
read(2)
|
||||
If a count smaller than four is requested, read returns -1 and
|
||||
sets errno to EINVAL. If there is no data available in the mail
|
||||
box, the return value is set to -1 and errno becomes EAGAIN.
|
||||
When data has been read successfully, four bytes are placed in
|
||||
the data buffer and the value four is returned.
|
||||
|
||||
|
||||
/ibox
|
||||
The second SPU to CPU communication mailbox. This file is similar to
|
||||
the first mailbox file, but can be read in blocking I/O mode, and the
|
||||
poll familiy of system calls can be used to wait for it. The possible
|
||||
operations on an open ibox file are:
|
||||
|
||||
read(2)
|
||||
If a count smaller than four is requested, read returns -1 and
|
||||
sets errno to EINVAL. If there is no data available in the mail
|
||||
box and the file descriptor has been opened with O_NONBLOCK, the
|
||||
return value is set to -1 and errno becomes EAGAIN.
|
||||
|
||||
If there is no data available in the mail box and the file
|
||||
descriptor has been opened without O_NONBLOCK, the call will
|
||||
block until the SPU writes to its interrupt mailbox channel.
|
||||
When data has been read successfully, four bytes are placed in
|
||||
the data buffer and the value four is returned.
|
||||
|
||||
poll(2)
|
||||
Poll on the ibox file returns (POLLIN | POLLRDNORM) whenever
|
||||
data is available for reading.
|
||||
|
||||
|
||||
/wbox
|
||||
The CPU to SPU communation mailbox. It is write-only can can be written
|
||||
in units of 32 bits. If the mailbox is full, write() will block and
|
||||
poll can be used to wait for it becoming empty again. The possible
|
||||
operations on an open wbox file are: write(2) If a count smaller than
|
||||
four is requested, write returns -1 and sets errno to EINVAL. If there
|
||||
is no space available in the mail box and the file descriptor has been
|
||||
opened with O_NONBLOCK, the return value is set to -1 and errno becomes
|
||||
EAGAIN.
|
||||
|
||||
If there is no space available in the mail box and the file descriptor
|
||||
has been opened without O_NONBLOCK, the call will block until the SPU
|
||||
reads from its PPE mailbox channel. When data has been read success-
|
||||
fully, four bytes are placed in the data buffer and the value four is
|
||||
returned.
|
||||
|
||||
poll(2)
|
||||
Poll on the ibox file returns (POLLOUT | POLLWRNORM) whenever
|
||||
space is available for writing.
|
||||
|
||||
|
||||
/mbox_stat
|
||||
/ibox_stat
|
||||
/wbox_stat
|
||||
Read-only files that contain the length of the current queue, i.e. how
|
||||
many words can be read from mbox or ibox or how many words can be
|
||||
written to wbox without blocking. The files can be read only in 4-byte
|
||||
units and return a big-endian binary integer number. The possible
|
||||
operations on an open *box_stat file are:
|
||||
|
||||
read(2)
|
||||
If a count smaller than four is requested, read returns -1 and
|
||||
sets errno to EINVAL. Otherwise, a four byte value is placed in
|
||||
the data buffer, containing the number of elements that can be
|
||||
read from (for mbox_stat and ibox_stat) or written to (for
|
||||
wbox_stat) the respective mail box without blocking or resulting
|
||||
in EAGAIN.
|
||||
|
||||
|
||||
/npc
|
||||
/decr
|
||||
/decr_status
|
||||
/spu_tag_mask
|
||||
/event_mask
|
||||
/srr0
|
||||
Internal registers of the SPU. The representation is an ASCII string
|
||||
with the numeric value of the next instruction to be executed. These
|
||||
can be used in read/write mode for debugging, but normal operation of
|
||||
programs should not rely on them because access to any of them except
|
||||
npc requires an SPU context save and is therefore very inefficient.
|
||||
|
||||
The contents of these files are:
|
||||
|
||||
npc Next Program Counter
|
||||
|
||||
decr SPU Decrementer
|
||||
|
||||
decr_status Decrementer Status
|
||||
|
||||
spu_tag_mask MFC tag mask for SPU DMA
|
||||
|
||||
event_mask Event mask for SPU interrupts
|
||||
|
||||
srr0 Interrupt Return address register
|
||||
|
||||
|
||||
The possible operations on an open npc, decr, decr_status,
|
||||
spu_tag_mask, event_mask or srr0 file are:
|
||||
|
||||
read(2)
|
||||
When the count supplied to the read call is shorter than the
|
||||
required length for the pointer value plus a newline character,
|
||||
subsequent reads from the same file descriptor will result in
|
||||
completing the string, regardless of changes to the register by
|
||||
a running SPU task. When a complete string has been read, all
|
||||
subsequent read operations will return zero bytes and a new file
|
||||
descriptor needs to be opened to read the value again.
|
||||
|
||||
write(2)
|
||||
A write operation on the file results in setting the register to
|
||||
the value given in the string. The string is parsed from the
|
||||
beginning to the first non-numeric character or the end of the
|
||||
buffer. Subsequent writes to the same file descriptor overwrite
|
||||
the previous setting.
|
||||
|
||||
|
||||
/fpcr
|
||||
This file gives access to the Floating Point Status and Control Regis-
|
||||
ter as a four byte long file. The operations on the fpcr file are:
|
||||
|
||||
read(2)
|
||||
If a count smaller than four is requested, read returns -1 and
|
||||
sets errno to EINVAL. Otherwise, a four byte value is placed in
|
||||
the data buffer, containing the current value of the fpcr regis-
|
||||
ter.
|
||||
|
||||
write(2)
|
||||
If a count smaller than four is requested, write returns -1 and
|
||||
sets errno to EINVAL. Otherwise, a four byte value is copied
|
||||
from the data buffer, updating the value of the fpcr register.
|
||||
|
||||
|
||||
/signal1
|
||||
/signal2
|
||||
The two signal notification channels of an SPU. These are read-write
|
||||
files that operate on a 32 bit word. Writing to one of these files
|
||||
triggers an interrupt on the SPU. The value writting to the signal
|
||||
files can be read from the SPU through a channel read or from host user
|
||||
space through the file. After the value has been read by the SPU, it
|
||||
is reset to zero. The possible operations on an open signal1 or sig-
|
||||
nal2 file are:
|
||||
|
||||
read(2)
|
||||
If a count smaller than four is requested, read returns -1 and
|
||||
sets errno to EINVAL. Otherwise, a four byte value is placed in
|
||||
the data buffer, containing the current value of the specified
|
||||
signal notification register.
|
||||
|
||||
write(2)
|
||||
If a count smaller than four is requested, write returns -1 and
|
||||
sets errno to EINVAL. Otherwise, a four byte value is copied
|
||||
from the data buffer, updating the value of the specified signal
|
||||
notification register. The signal notification register will
|
||||
either be replaced with the input data or will be updated to the
|
||||
bitwise OR or the old value and the input data, depending on the
|
||||
contents of the signal1_type, or signal2_type respectively,
|
||||
file.
|
||||
|
||||
|
||||
/signal1_type
|
||||
/signal2_type
|
||||
These two files change the behavior of the signal1 and signal2 notifi-
|
||||
cation files. The contain a numerical ASCII string which is read as
|
||||
either "1" or "0". In mode 0 (overwrite), the hardware replaces the
|
||||
contents of the signal channel with the data that is written to it. in
|
||||
mode 1 (logical OR), the hardware accumulates the bits that are subse-
|
||||
quently written to it. The possible operations on an open signal1_type
|
||||
or signal2_type file are:
|
||||
|
||||
read(2)
|
||||
When the count supplied to the read call is shorter than the
|
||||
required length for the digit plus a newline character, subse-
|
||||
quent reads from the same file descriptor will result in com-
|
||||
pleting the string. When a complete string has been read, all
|
||||
subsequent read operations will return zero bytes and a new file
|
||||
descriptor needs to be opened to read the value again.
|
||||
|
||||
write(2)
|
||||
A write operation on the file results in setting the register to
|
||||
the value given in the string. The string is parsed from the
|
||||
beginning to the first non-numeric character or the end of the
|
||||
buffer. Subsequent writes to the same file descriptor overwrite
|
||||
the previous setting.
|
||||
|
||||
|
||||
EXAMPLES
|
||||
/etc/fstab entry
|
||||
none /spu spufs gid=spu 0 0
|
||||
|
||||
|
||||
AUTHORS
|
||||
Arnd Bergmann <arndb@de.ibm.com>, Mark Nutter <mnutter@us.ibm.com>,
|
||||
Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
|
||||
|
||||
SEE ALSO
|
||||
capabilities(7), close(2), spu_create(2), spu_run(2), spufs(7)
|
||||
|
||||
|
||||
|
||||
Linux 2005-09-28 SPUFS(2)
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
SPU_RUN(2) Linux Programmer's Manual SPU_RUN(2)
|
||||
|
||||
|
||||
|
||||
NAME
|
||||
spu_run - execute an spu context
|
||||
|
||||
|
||||
SYNOPSIS
|
||||
#include <sys/spu.h>
|
||||
|
||||
int spu_run(int fd, unsigned int *npc, unsigned int *event);
|
||||
|
||||
DESCRIPTION
|
||||
The spu_run system call is used on PowerPC machines that implement the
|
||||
Cell Broadband Engine Architecture in order to access Synergistic Pro-
|
||||
cessor Units (SPUs). It uses the fd that was returned from spu_cre-
|
||||
ate(2) to address a specific SPU context. When the context gets sched-
|
||||
uled to a physical SPU, it starts execution at the instruction pointer
|
||||
passed in npc.
|
||||
|
||||
Execution of SPU code happens synchronously, meaning that spu_run does
|
||||
not return while the SPU is still running. If there is a need to exe-
|
||||
cute SPU code in parallel with other code on either the main CPU or
|
||||
other SPUs, you need to create a new thread of execution first, e.g.
|
||||
using the pthread_create(3) call.
|
||||
|
||||
When spu_run returns, the current value of the SPU instruction pointer
|
||||
is written back to npc, so you can call spu_run again without updating
|
||||
the pointers.
|
||||
|
||||
event can be a NULL pointer or point to an extended status code that
|
||||
gets filled when spu_run returns. It can be one of the following con-
|
||||
stants:
|
||||
|
||||
SPE_EVENT_DMA_ALIGNMENT
|
||||
A DMA alignment error
|
||||
|
||||
SPE_EVENT_SPE_DATA_SEGMENT
|
||||
A DMA segmentation error
|
||||
|
||||
SPE_EVENT_SPE_DATA_STORAGE
|
||||
A DMA storage error
|
||||
|
||||
If NULL is passed as the event argument, these errors will result in a
|
||||
signal delivered to the calling process.
|
||||
|
||||
RETURN VALUE
|
||||
spu_run returns the value of the spu_status register or -1 to indicate
|
||||
an error and set errno to one of the error codes listed below. The
|
||||
spu_status register value contains a bit mask of status codes and
|
||||
optionally a 14 bit code returned from the stop-and-signal instruction
|
||||
on the SPU. The bit masks for the status codes are:
|
||||
|
||||
0x02 SPU was stopped by stop-and-signal.
|
||||
|
||||
0x04 SPU was stopped by halt.
|
||||
|
||||
0x08 SPU is waiting for a channel.
|
||||
|
||||
0x10 SPU is in single-step mode.
|
||||
|
||||
0x20 SPU has tried to execute an invalid instruction.
|
||||
|
||||
0x40 SPU has tried to access an invalid channel.
|
||||
|
||||
0x3fff0000
|
||||
The bits masked with this value contain the code returned from
|
||||
stop-and-signal.
|
||||
|
||||
There are always one or more of the lower eight bits set or an error
|
||||
code is returned from spu_run.
|
||||
|
||||
ERRORS
|
||||
EAGAIN or EWOULDBLOCK
|
||||
fd is in non-blocking mode and spu_run would block.
|
||||
|
||||
EBADF fd is not a valid file descriptor.
|
||||
|
||||
EFAULT npc is not a valid pointer or status is neither NULL nor a valid
|
||||
pointer.
|
||||
|
||||
EINTR A signal occured while spu_run was in progress. The npc value
|
||||
has been updated to the new program counter value if necessary.
|
||||
|
||||
EINVAL fd is not a file descriptor returned from spu_create(2).
|
||||
|
||||
ENOMEM Insufficient memory was available to handle a page fault result-
|
||||
ing from an MFC direct memory access.
|
||||
|
||||
ENOSYS the functionality is not provided by the current system, because
|
||||
either the hardware does not provide SPUs or the spufs module is
|
||||
not loaded.
|
||||
|
||||
|
||||
NOTES
|
||||
spu_run is meant to be used from libraries that implement a more
|
||||
abstract interface to SPUs, not to be used from regular applications.
|
||||
See http://www.bsc.es/projects/deepcomputing/linuxoncell/ for the rec-
|
||||
ommended libraries.
|
||||
|
||||
|
||||
CONFORMING TO
|
||||
This call is Linux specific and only implemented by the ppc64 architec-
|
||||
ture. Programs using this system call are not portable.
|
||||
|
||||
|
||||
BUGS
|
||||
The code does not yet fully implement all features lined out here.
|
||||
|
||||
|
||||
AUTHOR
|
||||
Arnd Bergmann <arndb@de.ibm.com>
|
||||
|
||||
SEE ALSO
|
||||
capabilities(7), close(2), spu_create(2), spufs(7)
|
||||
|
||||
|
||||
|
||||
Linux 2005-09-28 SPU_RUN(2)
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
SPU_CREATE(2) Linux Programmer's Manual SPU_CREATE(2)
|
||||
|
||||
|
||||
|
||||
NAME
|
||||
spu_create - create a new spu context
|
||||
|
||||
|
||||
SYNOPSIS
|
||||
#include <sys/types.h>
|
||||
#include <sys/spu.h>
|
||||
|
||||
int spu_create(const char *pathname, int flags, mode_t mode);
|
||||
|
||||
DESCRIPTION
|
||||
The spu_create system call is used on PowerPC machines that implement
|
||||
the Cell Broadband Engine Architecture in order to access Synergistic
|
||||
Processor Units (SPUs). It creates a new logical context for an SPU in
|
||||
pathname and returns a handle to associated with it. pathname must
|
||||
point to a non-existing directory in the mount point of the SPU file
|
||||
system (spufs). When spu_create is successful, a directory gets cre-
|
||||
ated on pathname and it is populated with files.
|
||||
|
||||
The returned file handle can only be passed to spu_run(2) or closed,
|
||||
other operations are not defined on it. When it is closed, all associ-
|
||||
ated directory entries in spufs are removed. When the last file handle
|
||||
pointing either inside of the context directory or to this file
|
||||
descriptor is closed, the logical SPU context is destroyed.
|
||||
|
||||
The parameter flags can be zero or any bitwise or'd combination of the
|
||||
following constants:
|
||||
|
||||
SPU_RAWIO
|
||||
Allow mapping of some of the hardware registers of the SPU into
|
||||
user space. This flag requires the CAP_SYS_RAWIO capability, see
|
||||
capabilities(7).
|
||||
|
||||
The mode parameter specifies the permissions used for creating the new
|
||||
directory in spufs. mode is modified with the user's umask(2) value
|
||||
and then used for both the directory and the files contained in it. The
|
||||
file permissions mask out some more bits of mode because they typically
|
||||
support only read or write access. See stat(2) for a full list of the
|
||||
possible mode values.
|
||||
|
||||
|
||||
RETURN VALUE
|
||||
spu_create returns a new file descriptor. It may return -1 to indicate
|
||||
an error condition and set errno to one of the error codes listed
|
||||
below.
|
||||
|
||||
|
||||
ERRORS
|
||||
EACCESS
|
||||
The current user does not have write access on the spufs mount
|
||||
point.
|
||||
|
||||
EEXIST An SPU context already exists at the given path name.
|
||||
|
||||
EFAULT pathname is not a valid string pointer in the current address
|
||||
space.
|
||||
|
||||
EINVAL pathname is not a directory in the spufs mount point.
|
||||
|
||||
ELOOP Too many symlinks were found while resolving pathname.
|
||||
|
||||
EMFILE The process has reached its maximum open file limit.
|
||||
|
||||
ENAMETOOLONG
|
||||
pathname was too long.
|
||||
|
||||
ENFILE The system has reached the global open file limit.
|
||||
|
||||
ENOENT Part of pathname could not be resolved.
|
||||
|
||||
ENOMEM The kernel could not allocate all resources required.
|
||||
|
||||
ENOSPC There are not enough SPU resources available to create a new
|
||||
context or the user specific limit for the number of SPU con-
|
||||
texts has been reached.
|
||||
|
||||
ENOSYS the functionality is not provided by the current system, because
|
||||
either the hardware does not provide SPUs or the spufs module is
|
||||
not loaded.
|
||||
|
||||
ENOTDIR
|
||||
A part of pathname is not a directory.
|
||||
|
||||
|
||||
|
||||
NOTES
|
||||
spu_create is meant to be used from libraries that implement a more
|
||||
abstract interface to SPUs, not to be used from regular applications.
|
||||
See http://www.bsc.es/projects/deepcomputing/linuxoncell/ for the rec-
|
||||
ommended libraries.
|
||||
|
||||
|
||||
FILES
|
||||
pathname must point to a location beneath the mount point of spufs. By
|
||||
convention, it gets mounted in /spu.
|
||||
|
||||
|
||||
CONFORMING TO
|
||||
This call is Linux specific and only implemented by the ppc64 architec-
|
||||
ture. Programs using this system call are not portable.
|
||||
|
||||
|
||||
BUGS
|
||||
The code does not yet fully implement all features lined out here.
|
||||
|
||||
|
||||
AUTHOR
|
||||
Arnd Bergmann <arndb@de.ibm.com>
|
||||
|
||||
SEE ALSO
|
||||
capabilities(7), close(2), spu_run(2), spufs(7)
|
||||
|
||||
|
||||
|
||||
Linux 2005-09-28 SPU_CREATE(2)
|
|
@ -1,4 +1,5 @@
|
|||
Accessing PCI device resources through sysfs
|
||||
--------------------------------------------
|
||||
|
||||
sysfs, usually mounted at /sys, provides access to PCI resources on platforms
|
||||
that support it. For example, a given bus might look like this:
|
||||
|
@ -47,14 +48,21 @@ files, each with their own function.
|
|||
binary - file contains binary data
|
||||
cpumask - file contains a cpumask type
|
||||
|
||||
The read only files are informational, writes to them will be ignored.
|
||||
Writable files can be used to perform actions on the device (e.g. changing
|
||||
config space, detaching a device). mmapable files are available via an
|
||||
mmap of the file at offset 0 and can be used to do actual device programming
|
||||
from userspace. Note that some platforms don't support mmapping of certain
|
||||
resources, so be sure to check the return value from any attempted mmap.
|
||||
The read only files are informational, writes to them will be ignored, with
|
||||
the exception of the 'rom' file. Writable files can be used to perform
|
||||
actions on the device (e.g. changing config space, detaching a device).
|
||||
mmapable files are available via an mmap of the file at offset 0 and can be
|
||||
used to do actual device programming from userspace. Note that some platforms
|
||||
don't support mmapping of certain resources, so be sure to check the return
|
||||
value from any attempted mmap.
|
||||
|
||||
The 'rom' file is special in that it provides read-only access to the device's
|
||||
ROM file, if available. It's disabled by default, however, so applications
|
||||
should write the string "1" to the file to enable it before attempting a read
|
||||
call, and disable it following the access by writing "0" to the file.
|
||||
|
||||
Accessing legacy resources through sysfs
|
||||
----------------------------------------
|
||||
|
||||
Legacy I/O port and ISA memory resources are also provided in sysfs if the
|
||||
underlying platform supports them. They're located in the PCI class heirarchy,
|
||||
|
@ -75,6 +83,7 @@ simply dereference the returned pointer (after checking for errors of course)
|
|||
to access legacy memory space.
|
||||
|
||||
Supporting PCI access on new platforms
|
||||
--------------------------------------
|
||||
|
||||
In order to support PCI resource mapping as described above, Linux platform
|
||||
code must define HAVE_PCI_MMAP and provide a pci_mmap_page_range function.
|
||||
|
|
|
@ -78,6 +78,18 @@ use up all the memory on the machine; but enhances the scalability of
|
|||
that instance in a system with many cpus making intensive use of it.
|
||||
|
||||
|
||||
tmpfs has a mount option to set the NUMA memory allocation policy for
|
||||
all files in that instance:
|
||||
mpol=interleave prefers to allocate memory from each node in turn
|
||||
mpol=default prefers to allocate memory from the local node
|
||||
mpol=bind prefers to allocate from mpol_nodelist
|
||||
mpol=preferred prefers to allocate from first node in mpol_nodelist
|
||||
|
||||
The following mount option is used in conjunction with mpol=interleave,
|
||||
mpol=bind or mpol=preferred:
|
||||
mpol_nodelist: nodelist suitable for parsing with nodelist_parse.
|
||||
|
||||
|
||||
To specify the initial root directory you can use the following mount
|
||||
options:
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
The High Precision Event Timer (HPET) hardware is the future replacement
|
||||
for the 8254 and Real Time Clock (RTC) periodic timer functionality.
|
||||
Each HPET can have up two 32 timers. It is possible to configure the
|
||||
Each HPET can have up to 32 timers. It is possible to configure the
|
||||
first two timers as legacy replacements for 8254 and RTC periodic timers.
|
||||
A specification done by Intel and Microsoft can be found at
|
||||
<http://www.intel.com/hardwaredesign/hpetspec.htm>.
|
||||
|
|
|
@ -0,0 +1,178 @@
|
|||
|
||||
hrtimers - subsystem for high-resolution kernel timers
|
||||
----------------------------------------------------
|
||||
|
||||
This patch introduces a new subsystem for high-resolution kernel timers.
|
||||
|
||||
One might ask the question: we already have a timer subsystem
|
||||
(kernel/timers.c), why do we need two timer subsystems? After a lot of
|
||||
back and forth trying to integrate high-resolution and high-precision
|
||||
features into the existing timer framework, and after testing various
|
||||
such high-resolution timer implementations in practice, we came to the
|
||||
conclusion that the timer wheel code is fundamentally not suitable for
|
||||
such an approach. We initially didnt believe this ('there must be a way
|
||||
to solve this'), and spent a considerable effort trying to integrate
|
||||
things into the timer wheel, but we failed. In hindsight, there are
|
||||
several reasons why such integration is hard/impossible:
|
||||
|
||||
- the forced handling of low-resolution and high-resolution timers in
|
||||
the same way leads to a lot of compromises, macro magic and #ifdef
|
||||
mess. The timers.c code is very "tightly coded" around jiffies and
|
||||
32-bitness assumptions, and has been honed and micro-optimized for a
|
||||
relatively narrow use case (jiffies in a relatively narrow HZ range)
|
||||
for many years - and thus even small extensions to it easily break
|
||||
the wheel concept, leading to even worse compromises. The timer wheel
|
||||
code is very good and tight code, there's zero problems with it in its
|
||||
current usage - but it is simply not suitable to be extended for
|
||||
high-res timers.
|
||||
|
||||
- the unpredictable [O(N)] overhead of cascading leads to delays which
|
||||
necessiate a more complex handling of high resolution timers, which
|
||||
in turn decreases robustness. Such a design still led to rather large
|
||||
timing inaccuracies. Cascading is a fundamental property of the timer
|
||||
wheel concept, it cannot be 'designed out' without unevitably
|
||||
degrading other portions of the timers.c code in an unacceptable way.
|
||||
|
||||
- the implementation of the current posix-timer subsystem on top of
|
||||
the timer wheel has already introduced a quite complex handling of
|
||||
the required readjusting of absolute CLOCK_REALTIME timers at
|
||||
settimeofday or NTP time - further underlying our experience by
|
||||
example: that the timer wheel data structure is too rigid for high-res
|
||||
timers.
|
||||
|
||||
- the timer wheel code is most optimal for use cases which can be
|
||||
identified as "timeouts". Such timeouts are usually set up to cover
|
||||
error conditions in various I/O paths, such as networking and block
|
||||
I/O. The vast majority of those timers never expire and are rarely
|
||||
recascaded because the expected correct event arrives in time so they
|
||||
can be removed from the timer wheel before any further processing of
|
||||
them becomes necessary. Thus the users of these timeouts can accept
|
||||
the granularity and precision tradeoffs of the timer wheel, and
|
||||
largely expect the timer subsystem to have near-zero overhead.
|
||||
Accurate timing for them is not a core purpose - in fact most of the
|
||||
timeout values used are ad-hoc. For them it is at most a necessary
|
||||
evil to guarantee the processing of actual timeout completions
|
||||
(because most of the timeouts are deleted before completion), which
|
||||
should thus be as cheap and unintrusive as possible.
|
||||
|
||||
The primary users of precision timers are user-space applications that
|
||||
utilize nanosleep, posix-timers and itimer interfaces. Also, in-kernel
|
||||
users like drivers and subsystems which require precise timed events
|
||||
(e.g. multimedia) can benefit from the availability of a seperate
|
||||
high-resolution timer subsystem as well.
|
||||
|
||||
While this subsystem does not offer high-resolution clock sources just
|
||||
yet, the hrtimer subsystem can be easily extended with high-resolution
|
||||
clock capabilities, and patches for that exist and are maturing quickly.
|
||||
The increasing demand for realtime and multimedia applications along
|
||||
with other potential users for precise timers gives another reason to
|
||||
separate the "timeout" and "precise timer" subsystems.
|
||||
|
||||
Another potential benefit is that such a seperation allows even more
|
||||
special-purpose optimization of the existing timer wheel for the low
|
||||
resolution and low precision use cases - once the precision-sensitive
|
||||
APIs are separated from the timer wheel and are migrated over to
|
||||
hrtimers. E.g. we could decrease the frequency of the timeout subsystem
|
||||
from 250 Hz to 100 HZ (or even smaller).
|
||||
|
||||
hrtimer subsystem implementation details
|
||||
----------------------------------------
|
||||
|
||||
the basic design considerations were:
|
||||
|
||||
- simplicity
|
||||
|
||||
- data structure not bound to jiffies or any other granularity. All the
|
||||
kernel logic works at 64-bit nanoseconds resolution - no compromises.
|
||||
|
||||
- simplification of existing, timing related kernel code
|
||||
|
||||
another basic requirement was the immediate enqueueing and ordering of
|
||||
timers at activation time. After looking at several possible solutions
|
||||
such as radix trees and hashes, we chose the red black tree as the basic
|
||||
data structure. Rbtrees are available as a library in the kernel and are
|
||||
used in various performance-critical areas of e.g. memory management and
|
||||
file systems. The rbtree is solely used for time sorted ordering, while
|
||||
a separate list is used to give the expiry code fast access to the
|
||||
queued timers, without having to walk the rbtree.
|
||||
|
||||
(This seperate list is also useful for later when we'll introduce
|
||||
high-resolution clocks, where we need seperate pending and expired
|
||||
queues while keeping the time-order intact.)
|
||||
|
||||
Time-ordered enqueueing is not purely for the purposes of
|
||||
high-resolution clocks though, it also simplifies the handling of
|
||||
absolute timers based on a low-resolution CLOCK_REALTIME. The existing
|
||||
implementation needed to keep an extra list of all armed absolute
|
||||
CLOCK_REALTIME timers along with complex locking. In case of
|
||||
settimeofday and NTP, all the timers (!) had to be dequeued, the
|
||||
time-changing code had to fix them up one by one, and all of them had to
|
||||
be enqueued again. The time-ordered enqueueing and the storage of the
|
||||
expiry time in absolute time units removes all this complex and poorly
|
||||
scaling code from the posix-timer implementation - the clock can simply
|
||||
be set without having to touch the rbtree. This also makes the handling
|
||||
of posix-timers simpler in general.
|
||||
|
||||
The locking and per-CPU behavior of hrtimers was mostly taken from the
|
||||
existing timer wheel code, as it is mature and well suited. Sharing code
|
||||
was not really a win, due to the different data structures. Also, the
|
||||
hrtimer functions now have clearer behavior and clearer names - such as
|
||||
hrtimer_try_to_cancel() and hrtimer_cancel() [which are roughly
|
||||
equivalent to del_timer() and del_timer_sync()] - so there's no direct
|
||||
1:1 mapping between them on the algorithmical level, and thus no real
|
||||
potential for code sharing either.
|
||||
|
||||
Basic data types: every time value, absolute or relative, is in a
|
||||
special nanosecond-resolution type: ktime_t. The kernel-internal
|
||||
representation of ktime_t values and operations is implemented via
|
||||
macros and inline functions, and can be switched between a "hybrid
|
||||
union" type and a plain "scalar" 64bit nanoseconds representation (at
|
||||
compile time). The hybrid union type optimizes time conversions on 32bit
|
||||
CPUs. This build-time-selectable ktime_t storage format was implemented
|
||||
to avoid the performance impact of 64-bit multiplications and divisions
|
||||
on 32bit CPUs. Such operations are frequently necessary to convert
|
||||
between the storage formats provided by kernel and userspace interfaces
|
||||
and the internal time format. (See include/linux/ktime.h for further
|
||||
details.)
|
||||
|
||||
hrtimers - rounding of timer values
|
||||
-----------------------------------
|
||||
|
||||
the hrtimer code will round timer events to lower-resolution clocks
|
||||
because it has to. Otherwise it will do no artificial rounding at all.
|
||||
|
||||
one question is, what resolution value should be returned to the user by
|
||||
the clock_getres() interface. This will return whatever real resolution
|
||||
a given clock has - be it low-res, high-res, or artificially-low-res.
|
||||
|
||||
hrtimers - testing and verification
|
||||
----------------------------------
|
||||
|
||||
We used the high-resolution clock subsystem ontop of hrtimers to verify
|
||||
the hrtimer implementation details in praxis, and we also ran the posix
|
||||
timer tests in order to ensure specification compliance. We also ran
|
||||
tests on low-resolution clocks.
|
||||
|
||||
The hrtimer patch converts the following kernel functionality to use
|
||||
hrtimers:
|
||||
|
||||
- nanosleep
|
||||
- itimers
|
||||
- posix-timers
|
||||
|
||||
The conversion of nanosleep and posix-timers enabled the unification of
|
||||
nanosleep and clock_nanosleep.
|
||||
|
||||
The code was successfully compiled for the following platforms:
|
||||
|
||||
i386, x86_64, ARM, PPC, PPC64, IA64
|
||||
|
||||
The code was run-tested on the following platforms:
|
||||
|
||||
i386(UP/SMP), x86_64(UP/SMP), ARM, PPC
|
||||
|
||||
hrtimers were also integrated into the -rt tree, along with a
|
||||
hrtimers-based high-resolution clock implementation, so the hrtimers
|
||||
code got a healthy amount of testing and use in practice.
|
||||
|
||||
Thomas Gleixner, Ingo Molnar
|
|
@ -185,7 +185,7 @@ VII. Getting Parameters
|
|||
ENOMEM Kernel memory allocation error
|
||||
|
||||
A return value of 0 does not mean that the value was actually
|
||||
properly retreived. The user should check the result list
|
||||
properly retrieved. The user should check the result list
|
||||
to determine the specific status of the transaction.
|
||||
|
||||
VIII. Downloading Software
|
||||
|
|
|
@ -3,7 +3,7 @@ Apple Touchpad Driver (appletouch)
|
|||
Copyright (C) 2005 Stelian Pop <stelian@popies.net>
|
||||
|
||||
appletouch is a Linux kernel driver for the USB touchpad found on post
|
||||
February 2005 Apple Alu Powerbooks.
|
||||
February 2005 and October 2005 Apple Aluminium Powerbooks.
|
||||
|
||||
This driver is derived from Johannes Berg's appletrackpad driver[1], but it has
|
||||
been improved in some areas:
|
||||
|
@ -13,7 +13,8 @@ been improved in some areas:
|
|||
|
||||
Credits go to Johannes Berg for reverse-engineering the touchpad protocol,
|
||||
Frank Arnold for further improvements, and Alex Harper for some additional
|
||||
information about the inner workings of the touchpad sensors.
|
||||
information about the inner workings of the touchpad sensors. Michael
|
||||
Hanselmann added support for the October 2005 models.
|
||||
|
||||
Usage:
|
||||
------
|
||||
|
|
|
@ -120,7 +120,7 @@ to the unique id assigned by the driver. This data is required for performing
|
|||
some operations (removing an effect, controlling the playback).
|
||||
This if field must be set to -1 by the user in order to tell the driver to
|
||||
allocate a new effect.
|
||||
See <linux/input.h> for a description of the ff_effect stuct. You should also
|
||||
See <linux/input.h> for a description of the ff_effect struct. You should also
|
||||
find help in a few sketches, contained in files shape.fig and interactive.fig.
|
||||
You need xfig to visualize these files.
|
||||
|
||||
|
|
|
@ -946,7 +946,7 @@ HDIO_SCAN_HWIF register and (re)scan interface
|
|||
|
||||
This ioctl initializes the addresses and irq for a disk
|
||||
controller, probes for drives, and creates /proc/ide
|
||||
interfaces as appropiate.
|
||||
interfaces as appropriate.
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -1033,9 +1033,9 @@ When kbuild executes the following steps are followed (roughly):
|
|||
|
||||
Example:
|
||||
#arch/i386/Makefile
|
||||
GCC_VERSION := $(call cc-version)
|
||||
cflags-y += $(shell \
|
||||
if [ $(GCC_VERSION) -ge 0300 ] ; then echo "-mregparm=3"; fi ;)
|
||||
if [ $(call cc-version) -ge 0300 ] ; then \
|
||||
echo "-mregparm=3"; fi ;)
|
||||
|
||||
In the above example -mregparm=3 is only used for gcc version greater
|
||||
than or equal to gcc 3.0.
|
||||
|
|
|
@ -177,3 +177,25 @@ document trapinfo
|
|||
'trapinfo <pid>' will tell you by which trap & possibly
|
||||
addresthe kernel paniced.
|
||||
end
|
||||
|
||||
|
||||
define dmesg
|
||||
set $i = 0
|
||||
set $end_idx = (log_end - 1) & (log_buf_len - 1)
|
||||
|
||||
while ($i < logged_chars)
|
||||
set $idx = (log_end - 1 - logged_chars + $i) & (log_buf_len - 1)
|
||||
|
||||
if ($idx + 100 <= $end_idx) || \
|
||||
($end_idx <= $idx && $idx + 100 < log_buf_len)
|
||||
printf "%.100s", &log_buf[$idx]
|
||||
set $i = $i + 100
|
||||
else
|
||||
printf "%c", log_buf[$idx]
|
||||
set $i = $i + 1
|
||||
end
|
||||
end
|
||||
end
|
||||
document dmesg
|
||||
print the kernel ring buffer
|
||||
end
|
||||
|
|
|
@ -4,10 +4,10 @@ Documentation for kdump - the kexec-based crash dumping solution
|
|||
DESIGN
|
||||
======
|
||||
|
||||
Kdump uses kexec to reboot to a second kernel whenever a dump needs to be taken.
|
||||
This second kernel is booted with very little memory. The first kernel reserves
|
||||
the section of memory that the second kernel uses. This ensures that on-going
|
||||
DMA from the first kernel does not corrupt the second kernel.
|
||||
Kdump uses kexec to reboot to a second kernel whenever a dump needs to be
|
||||
taken. This second kernel is booted with very little memory. The first kernel
|
||||
reserves the section of memory that the second kernel uses. This ensures that
|
||||
on-going DMA from the first kernel does not corrupt the second kernel.
|
||||
|
||||
All the necessary information about Core image is encoded in ELF format and
|
||||
stored in reserved area of memory before crash. Physical address of start of
|
||||
|
@ -35,77 +35,82 @@ In the second kernel, "old memory" can be accessed in two ways.
|
|||
SETUP
|
||||
=====
|
||||
|
||||
1) Download http://www.xmission.com/~ebiederm/files/kexec/kexec-tools-1.101.tar.gz
|
||||
and apply http://lse.sourceforge.net/kdump/patches/kexec-tools-1.101-kdump.patch
|
||||
and after that build the source.
|
||||
1) Download the upstream kexec-tools userspace package from
|
||||
http://www.xmission.com/~ebiederm/files/kexec/kexec-tools-1.101.tar.gz.
|
||||
|
||||
2) Download and build the appropriate (2.6.13-rc1 onwards) vanilla kernel.
|
||||
Apply the latest consolidated kdump patch on top of kexec-tools-1.101
|
||||
from http://lse.sourceforge.net/kdump/. This arrangment has been made
|
||||
till all the userspace patches supporting kdump are integrated with
|
||||
upstream kexec-tools userspace.
|
||||
|
||||
2) Download and build the appropriate (2.6.13-rc1 onwards) vanilla kernels.
|
||||
Two kernels need to be built in order to get this feature working.
|
||||
Following are the steps to properly configure the two kernels specific
|
||||
to kexec and kdump features:
|
||||
|
||||
A) First kernel:
|
||||
A) First kernel or regular kernel:
|
||||
----------------------------------
|
||||
a) Enable "kexec system call" feature (in Processor type and features).
|
||||
CONFIG_KEXEC=y
|
||||
b) This kernel's physical load address should be the default value of
|
||||
0x100000 (0x100000, 1 MB) (in Processor type and features).
|
||||
CONFIG_PHYSICAL_START=0x100000
|
||||
c) Enable "sysfs file system support" (in Pseudo filesystems).
|
||||
CONFIG_SYSFS=y
|
||||
CONFIG_KEXEC=y
|
||||
b) Enable "sysfs file system support" (in Pseudo filesystems).
|
||||
CONFIG_SYSFS=y
|
||||
c) make
|
||||
d) Boot into first kernel with the command line parameter "crashkernel=Y@X".
|
||||
Use appropriate values for X and Y. Y denotes how much memory to reserve
|
||||
for the second kernel, and X denotes at what physical address the reserved
|
||||
memory section starts. For example: "crashkernel=64M@16M".
|
||||
for the second kernel, and X denotes at what physical address the
|
||||
reserved memory section starts. For example: "crashkernel=64M@16M".
|
||||
|
||||
B) Second kernel:
|
||||
a) Enable "kernel crash dumps" feature (in Processor type and features).
|
||||
CONFIG_CRASH_DUMP=y
|
||||
b) Specify a suitable value for "Physical address where the kernel is
|
||||
loaded" (in Processor type and features). Typically this value
|
||||
should be same as X (See option d) above, e.g., 16 MB or 0x1000000.
|
||||
CONFIG_PHYSICAL_START=0x1000000
|
||||
c) Enable "/proc/vmcore support" (Optional, in Pseudo filesystems).
|
||||
CONFIG_PROC_VMCORE=y
|
||||
d) Disable SMP support and build a UP kernel (Until it is fixed).
|
||||
CONFIG_SMP=n
|
||||
e) Enable "Local APIC support on uniprocessors".
|
||||
CONFIG_X86_UP_APIC=y
|
||||
f) Enable "IO-APIC support on uniprocessors"
|
||||
CONFIG_X86_UP_IOAPIC=y
|
||||
|
||||
Note: i) Options a) and b) depend upon "Configure standard kernel features
|
||||
(for small systems)" (under General setup).
|
||||
ii) Option a) also depends on CONFIG_HIGHMEM (under Processor
|
||||
type and features).
|
||||
iii) Both option a) and b) are under "Processor type and features".
|
||||
B) Second kernel or dump capture kernel:
|
||||
---------------------------------------
|
||||
a) For i386 architecture enable Highmem support
|
||||
CONFIG_HIGHMEM=y
|
||||
b) Enable "kernel crash dumps" feature (under "Processor type and features")
|
||||
CONFIG_CRASH_DUMP=y
|
||||
c) Make sure a suitable value for "Physical address where the kernel is
|
||||
loaded" (under "Processor type and features"). By default this value
|
||||
is 0x1000000 (16MB) and it should be same as X (See option d above),
|
||||
e.g., 16 MB or 0x1000000.
|
||||
CONFIG_PHYSICAL_START=0x1000000
|
||||
d) Enable "/proc/vmcore support" (Optional, under "Pseudo filesystems").
|
||||
CONFIG_PROC_VMCORE=y
|
||||
|
||||
3) Boot into the first kernel. You are now ready to try out kexec-based crash
|
||||
dumps.
|
||||
|
||||
4) Load the second kernel to be booted using:
|
||||
3) After booting to regular kernel or first kernel, load the second kernel
|
||||
using the following command:
|
||||
|
||||
kexec -p <second-kernel> --args-linux --elf32-core-headers
|
||||
--append="root=<root-dev> init 1 irqpoll"
|
||||
--append="root=<root-dev> init 1 irqpoll maxcpus=1"
|
||||
|
||||
Note: i) <second-kernel> has to be a vmlinux image. bzImage will not work,
|
||||
as of now.
|
||||
ii) By default ELF headers are stored in ELF64 format. Option
|
||||
--elf32-core-headers forces generation of ELF32 headers. gdb can
|
||||
not open ELF64 headers on 32 bit systems. So creating ELF32
|
||||
headers can come handy for users who have got non-PAE systems and
|
||||
hence have memory less than 4GB.
|
||||
iii) Specify "irqpoll" as command line parameter. This reduces driver
|
||||
initialization failures in second kernel due to shared interrupts.
|
||||
iv) <root-dev> needs to be specified in a format corresponding to
|
||||
the root device name in the output of mount command.
|
||||
v) If you have built the drivers required to mount root file
|
||||
system as modules in <second-kernel>, then, specify
|
||||
--initrd=<initrd-for-second-kernel>.
|
||||
Notes:
|
||||
======
|
||||
i) <second-kernel> has to be a vmlinux image ie uncompressed elf image.
|
||||
bzImage will not work, as of now.
|
||||
ii) --args-linux has to be speicfied as if kexec it loading an elf image,
|
||||
it needs to know that the arguments supplied are of linux type.
|
||||
iii) By default ELF headers are stored in ELF64 format to support systems
|
||||
with more than 4GB memory. Option --elf32-core-headers forces generation
|
||||
of ELF32 headers. The reason for this option being, as of now gdb can
|
||||
not open vmcore file with ELF64 headers on a 32 bit systems. So ELF32
|
||||
headers can be used if one has non-PAE systems and hence memory less
|
||||
than 4GB.
|
||||
iv) Specify "irqpoll" as command line parameter. This reduces driver
|
||||
initialization failures in second kernel due to shared interrupts.
|
||||
v) <root-dev> needs to be specified in a format corresponding to the root
|
||||
device name in the output of mount command.
|
||||
vi) If you have built the drivers required to mount root file system as
|
||||
modules in <second-kernel>, then, specify
|
||||
--initrd=<initrd-for-second-kernel>.
|
||||
vii) Specify maxcpus=1 as, if during first kernel run, if panic happens on
|
||||
non-boot cpus, second kernel doesn't seem to be boot up all the cpus.
|
||||
The other option is to always built the second kernel without SMP
|
||||
support ie CONFIG_SMP=n
|
||||
|
||||
5) System reboots into the second kernel when a panic occurs. A module can be
|
||||
written to force the panic or "ALT-SysRq-c" can be used initiate a crash
|
||||
dump for testing purposes.
|
||||
4) After successfully loading the second kernel as above, if a panic occurs
|
||||
system reboots into the second kernel. A module can be written to force
|
||||
the panic or "ALT-SysRq-c" can be used initiate a crash dump for testing
|
||||
purposes.
|
||||
|
||||
6) Write out the dump file using
|
||||
5) Once the second kernel has booted, write out the dump file using
|
||||
|
||||
cp /proc/vmcore <dump-file>
|
||||
|
||||
|
@ -119,9 +124,9 @@ SETUP
|
|||
|
||||
Entire memory: dd if=/dev/oldmem of=oldmem.001
|
||||
|
||||
|
||||
ANALYSIS
|
||||
========
|
||||
|
||||
Limited analysis can be done using gdb on the dump file copied out of
|
||||
/proc/vmcore. Use vmlinux built with -g and run
|
||||
|
||||
|
@ -132,15 +137,19 @@ work fine.
|
|||
|
||||
Note: gdb cannot analyse core files generated in ELF64 format for i386.
|
||||
|
||||
Latest "crash" (crash-4.0-2.18) as available on Dave Anderson's site
|
||||
http://people.redhat.com/~anderson/ works well with kdump format.
|
||||
|
||||
|
||||
TODO
|
||||
====
|
||||
|
||||
1) Provide a kernel pages filtering mechanism so that core file size is not
|
||||
insane on systems having huge memory banks.
|
||||
2) Modify "crash" tool to make it recognize this dump.
|
||||
2) Relocatable kernel can help in maintaining multiple kernels for crashdump
|
||||
and same kernel as the first kernel can be used to capture the dump.
|
||||
|
||||
|
||||
CONTACT
|
||||
=======
|
||||
|
||||
Vivek Goyal (vgoyal@in.ibm.com)
|
||||
Maneesh Soni (maneesh@in.ibm.com)
|
||||
|
|
|
@ -452,6 +452,11 @@ running once the system is up.
|
|||
|
||||
eata= [HW,SCSI]
|
||||
|
||||
ec_intr= [HW,ACPI] ACPI Embedded Controller interrupt mode
|
||||
Format: <int>
|
||||
0: polling mode
|
||||
non-0: interrupt mode (default)
|
||||
|
||||
eda= [HW,PS2]
|
||||
|
||||
edb= [HW,PS2]
|
||||
|
@ -471,14 +476,15 @@ running once the system is up.
|
|||
arch/i386/kernel/cpu/cpufreq/elanfreq.c.
|
||||
|
||||
elevator= [IOSCHED]
|
||||
Format: {"as" | "cfq" | "deadline" | "noop"}
|
||||
Format: {"anticipatory" | "cfq" | "deadline" | "noop"}
|
||||
See Documentation/block/as-iosched.txt and
|
||||
Documentation/block/deadline-iosched.txt for details.
|
||||
|
||||
elfcorehdr= [IA-32]
|
||||
elfcorehdr= [IA-32, X86_64]
|
||||
Specifies physical address of start of kernel core
|
||||
image elf header.
|
||||
See Documentation/kdump.txt for details.
|
||||
image elf header. Generally kexec loader will
|
||||
pass this option to capture kernel.
|
||||
See Documentation/kdump/kdump.txt for details.
|
||||
|
||||
enforcing [SELINUX] Set initial enforcing status.
|
||||
Format: {"0" | "1"}
|
||||
|
@ -711,9 +717,17 @@ running once the system is up.
|
|||
load_ramdisk= [RAM] List of ramdisks to load from floppy
|
||||
See Documentation/ramdisk.txt.
|
||||
|
||||
lockd.udpport= [NFS]
|
||||
lockd.nlm_grace_period=P [NFS] Assign grace period.
|
||||
Format: <integer>
|
||||
|
||||
lockd.tcpport= [NFS]
|
||||
lockd.nlm_tcpport=N [NFS] Assign TCP port.
|
||||
Format: <integer>
|
||||
|
||||
lockd.nlm_timeout=T [NFS] Assign timeout value.
|
||||
Format: <integer>
|
||||
|
||||
lockd.nlm_udpport=M [NFS] Assign UDP port.
|
||||
Format: <integer>
|
||||
|
||||
logibm.irq= [HW,MOUSE] Logitech Bus Mouse Driver
|
||||
Format: <irq>
|
||||
|
@ -832,7 +846,7 @@ running once the system is up.
|
|||
mem=nopentium [BUGS=IA-32] Disable usage of 4MB pages for kernel
|
||||
memory.
|
||||
|
||||
memmap=exactmap [KNL,IA-32] Enable setting of an exact
|
||||
memmap=exactmap [KNL,IA-32,X86_64] Enable setting of an exact
|
||||
E820 memory map, as specified by the user.
|
||||
Such memmap=exactmap lines can be constructed based on
|
||||
BIOS output or other requirements. See the memmap=nn@ss
|
||||
|
@ -855,6 +869,49 @@ running once the system is up.
|
|||
|
||||
mga= [HW,DRM]
|
||||
|
||||
migration_cost=
|
||||
[KNL,SMP] debug: override scheduler migration costs
|
||||
Format: <level-1-usecs>,<level-2-usecs>,...
|
||||
This debugging option can be used to override the
|
||||
default scheduler migration cost matrix. The numbers
|
||||
are indexed by 'CPU domain distance'.
|
||||
E.g. migration_cost=1000,2000,3000 on an SMT NUMA
|
||||
box will set up an intra-core migration cost of
|
||||
1 msec, an inter-core migration cost of 2 msecs,
|
||||
and an inter-node migration cost of 3 msecs.
|
||||
|
||||
WARNING: using the wrong values here can break
|
||||
scheduler performance, so it's only for scheduler
|
||||
development purposes, not production environments.
|
||||
|
||||
migration_debug=
|
||||
[KNL,SMP] migration cost auto-detect verbosity
|
||||
Format=<0|1|2>
|
||||
If a system's migration matrix reported at bootup
|
||||
seems erroneous then this option can be used to
|
||||
increase verbosity of the detection process.
|
||||
We default to 0 (no extra messages), 1 will print
|
||||
some more information, and 2 will be really
|
||||
verbose (probably only useful if you also have a
|
||||
serial console attached to the system).
|
||||
|
||||
migration_factor=
|
||||
[KNL,SMP] multiply/divide migration costs by a factor
|
||||
Format=<percent>
|
||||
This debug option can be used to proportionally
|
||||
increase or decrease the auto-detected migration
|
||||
costs for all entries of the migration matrix.
|
||||
E.g. migration_factor=150 will increase migration
|
||||
costs by 50%. (and thus the scheduler will be less
|
||||
eager migrating cache-hot tasks)
|
||||
migration_factor=80 will decrease migration costs
|
||||
by 20%. (thus the scheduler will be more eager to
|
||||
migrate tasks)
|
||||
|
||||
WARNING: using the wrong values here can break
|
||||
scheduler performance, so it's only for scheduler
|
||||
development purposes, not production environments.
|
||||
|
||||
mousedev.tap_time=
|
||||
[MOUSE] Maximum time between finger touching and
|
||||
leaving touchpad surface for touch to be considered
|
||||
|
@ -998,6 +1055,8 @@ running once the system is up.
|
|||
|
||||
nowb [ARM]
|
||||
|
||||
nr_uarts= [SERIAL] maximum number of UARTs to be registered.
|
||||
|
||||
opl3= [HW,OSS]
|
||||
Format: <io>
|
||||
|
||||
|
@ -1176,6 +1235,10 @@ running once the system is up.
|
|||
Limit processor to maximum C-state
|
||||
max_cstate=9 overrides any DMI blacklist limit.
|
||||
|
||||
processor.nocst [HW,ACPI]
|
||||
Ignore the _CST method to determine C-states,
|
||||
instead using the legacy FADT method
|
||||
|
||||
prompt_ramdisk= [RAM] List of RAM disks to prompt for floppy disk
|
||||
before loading.
|
||||
See Documentation/ramdisk.txt.
|
||||
|
|
|
@ -56,10 +56,12 @@ A request proceeds in the following manner:
|
|||
(4) request_key() then forks and executes /sbin/request-key with a new session
|
||||
keyring that contains a link to auth key V.
|
||||
|
||||
(5) /sbin/request-key execs an appropriate program to perform the actual
|
||||
(5) /sbin/request-key assumes the authority associated with key U.
|
||||
|
||||
(6) /sbin/request-key execs an appropriate program to perform the actual
|
||||
instantiation.
|
||||
|
||||
(6) The program may want to access another key from A's context (say a
|
||||
(7) The program may want to access another key from A's context (say a
|
||||
Kerberos TGT key). It just requests the appropriate key, and the keyring
|
||||
search notes that the session keyring has auth key V in its bottom level.
|
||||
|
||||
|
@ -67,19 +69,19 @@ A request proceeds in the following manner:
|
|||
UID, GID, groups and security info of process A as if it was process A,
|
||||
and come up with key W.
|
||||
|
||||
(7) The program then does what it must to get the data with which to
|
||||
(8) The program then does what it must to get the data with which to
|
||||
instantiate key U, using key W as a reference (perhaps it contacts a
|
||||
Kerberos server using the TGT) and then instantiates key U.
|
||||
|
||||
(8) Upon instantiating key U, auth key V is automatically revoked so that it
|
||||
(9) Upon instantiating key U, auth key V is automatically revoked so that it
|
||||
may not be used again.
|
||||
|
||||
(9) The program then exits 0 and request_key() deletes key V and returns key
|
||||
(10) The program then exits 0 and request_key() deletes key V and returns key
|
||||
U to the caller.
|
||||
|
||||
This also extends further. If key W (step 5 above) didn't exist, key W would be
|
||||
created uninstantiated, another auth key (X) would be created [as per step 3]
|
||||
and another copy of /sbin/request-key spawned [as per step 4]; but the context
|
||||
This also extends further. If key W (step 7 above) didn't exist, key W would be
|
||||
created uninstantiated, another auth key (X) would be created (as per step 3)
|
||||
and another copy of /sbin/request-key spawned (as per step 4); but the context
|
||||
specified by auth key X will still be process A, as it was in auth key V.
|
||||
|
||||
This is because process A's keyrings can't simply be attached to
|
||||
|
@ -138,8 +140,8 @@ until one succeeds:
|
|||
|
||||
(3) The process's session keyring is searched.
|
||||
|
||||
(4) If the process has a request_key() authorisation key in its session
|
||||
keyring then:
|
||||
(4) If the process has assumed the authority associated with a request_key()
|
||||
authorisation key then:
|
||||
|
||||
(a) If extant, the calling process's thread keyring is searched.
|
||||
|
||||
|
|
|
@ -308,6 +308,8 @@ process making the call:
|
|||
KEY_SPEC_USER_KEYRING -4 UID-specific keyring
|
||||
KEY_SPEC_USER_SESSION_KEYRING -5 UID-session keyring
|
||||
KEY_SPEC_GROUP_KEYRING -6 GID-specific keyring
|
||||
KEY_SPEC_REQKEY_AUTH_KEY -7 assumed request_key()
|
||||
authorisation key
|
||||
|
||||
|
||||
The main syscalls are:
|
||||
|
@ -498,7 +500,11 @@ The keyctl syscall functions are:
|
|||
keyring is full, error ENFILE will result.
|
||||
|
||||
The link procedure checks the nesting of the keyrings, returning ELOOP if
|
||||
it appears to deep or EDEADLK if the link would introduce a cycle.
|
||||
it appears too deep or EDEADLK if the link would introduce a cycle.
|
||||
|
||||
Any links within the keyring to keys that match the new key in terms of
|
||||
type and description will be discarded from the keyring as the new one is
|
||||
added.
|
||||
|
||||
|
||||
(*) Unlink a key or keyring from another keyring:
|
||||
|
@ -628,6 +634,41 @@ The keyctl syscall functions are:
|
|||
there is one, otherwise the user default session keyring.
|
||||
|
||||
|
||||
(*) Set the timeout on a key.
|
||||
|
||||
long keyctl(KEYCTL_SET_TIMEOUT, key_serial_t key, unsigned timeout);
|
||||
|
||||
This sets or clears the timeout on a key. The timeout can be 0 to clear
|
||||
the timeout or a number of seconds to set the expiry time that far into
|
||||
the future.
|
||||
|
||||
The process must have attribute modification access on a key to set its
|
||||
timeout. Timeouts may not be set with this function on negative, revoked
|
||||
or expired keys.
|
||||
|
||||
|
||||
(*) Assume the authority granted to instantiate a key
|
||||
|
||||
long keyctl(KEYCTL_ASSUME_AUTHORITY, key_serial_t key);
|
||||
|
||||
This assumes or divests the authority required to instantiate the
|
||||
specified key. Authority can only be assumed if the thread has the
|
||||
authorisation key associated with the specified key in its keyrings
|
||||
somewhere.
|
||||
|
||||
Once authority is assumed, searches for keys will also search the
|
||||
requester's keyrings using the requester's security label, UID, GID and
|
||||
groups.
|
||||
|
||||
If the requested authority is unavailable, error EPERM will be returned,
|
||||
likewise if the authority has been revoked because the target key is
|
||||
already instantiated.
|
||||
|
||||
If the specified key is 0, then any assumed authority will be divested.
|
||||
|
||||
The assumed authorititive key is inherited across fork and exec.
|
||||
|
||||
|
||||
===============
|
||||
KERNEL SERVICES
|
||||
===============
|
||||
|
|
|
@ -411,7 +411,8 @@ int init_module(void)
|
|||
printk("Couldn't find %s to plant kprobe\n", "do_fork");
|
||||
return -1;
|
||||
}
|
||||
if ((ret = register_kprobe(&kp) < 0)) {
|
||||
ret = register_kprobe(&kp);
|
||||
if (ret < 0) {
|
||||
printk("register_kprobe failed, returned %d\n", ret);
|
||||
return -1;
|
||||
}
|
||||
|
|
|
@ -3,7 +3,7 @@ How to conserve battery power using laptop-mode
|
|||
|
||||
Document Author: Bart Samwel (bart@samwel.tk)
|
||||
Date created: January 2, 2004
|
||||
Last modified: July 10, 2004
|
||||
Last modified: December 06, 2004
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
@ -33,7 +33,7 @@ or anything. Simply install all the files included in this document, and
|
|||
laptop mode will automatically be started when you're on battery. For
|
||||
your convenience, a tarball containing an installer can be downloaded at:
|
||||
|
||||
http://www.xs4all.nl/~bsamwel/laptop_mode/tools
|
||||
http://www.xs4all.nl/~bsamwel/laptop_mode/tools/
|
||||
|
||||
To configure laptop mode, you need to edit the configuration file, which is
|
||||
located in /etc/default/laptop-mode on Debian-based systems, or in
|
||||
|
@ -357,7 +357,7 @@ MAX_AGE=${MAX_AGE:-'600'}
|
|||
# Read-ahead, in kilobytes
|
||||
READAHEAD=${READAHEAD:-'4096'}
|
||||
|
||||
# Shall we remount journaled fs. with appropiate commit interval? (1=yes)
|
||||
# Shall we remount journaled fs. with appropriate commit interval? (1=yes)
|
||||
DO_REMOUNTS=${DO_REMOUNTS:-'1'}
|
||||
|
||||
# And shall we add the "noatime" option to that as well? (1=yes)
|
||||
|
@ -912,7 +912,7 @@ void usage()
|
|||
exit(0);
|
||||
}
|
||||
|
||||
int main(int ac, char **av)
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int fd;
|
||||
char *disk = 0;
|
||||
|
|
|
@ -65,20 +65,3 @@ The default is to disallow mandatory locking. The intention is that
|
|||
mandatory locking only be enabled on a local filesystem as the specific need
|
||||
arises.
|
||||
|
||||
Until an updated version of mount(8) becomes available you may have to apply
|
||||
this patch to the mount sources (based on the version distributed with Rick
|
||||
Faith's util-linux-2.5 package):
|
||||
|
||||
*** mount.c.orig Sat Jun 8 09:14:31 1996
|
||||
--- mount.c Sat Jun 8 09:13:02 1996
|
||||
***************
|
||||
*** 100,105 ****
|
||||
--- 100,107 ----
|
||||
{ "noauto", 0, MS_NOAUTO }, /* Can only be mounted explicitly */
|
||||
{ "user", 0, MS_USER }, /* Allow ordinary user to mount */
|
||||
{ "nouser", 1, MS_USER }, /* Forbid ordinary user to mount */
|
||||
+ { "mand", 0, MS_MANDLOCK }, /* Allow mandatory locks on this FS */
|
||||
+ { "nomand", 1, MS_MANDLOCK }, /* Forbid mandatory locks on this FS */
|
||||
/* add new options here */
|
||||
#ifdef MS_NOSUB
|
||||
{ "sub", 1, MS_NOSUB }, /* allow submounts */
|
||||
|
|
|
@ -0,0 +1,135 @@
|
|||
Generic Mutex Subsystem
|
||||
|
||||
started by Ingo Molnar <mingo@redhat.com>
|
||||
|
||||
"Why on earth do we need a new mutex subsystem, and what's wrong
|
||||
with semaphores?"
|
||||
|
||||
firstly, there's nothing wrong with semaphores. But if the simpler
|
||||
mutex semantics are sufficient for your code, then there are a couple
|
||||
of advantages of mutexes:
|
||||
|
||||
- 'struct mutex' is smaller on most architectures: .e.g on x86,
|
||||
'struct semaphore' is 20 bytes, 'struct mutex' is 16 bytes.
|
||||
A smaller structure size means less RAM footprint, and better
|
||||
CPU-cache utilization.
|
||||
|
||||
- tighter code. On x86 i get the following .text sizes when
|
||||
switching all mutex-alike semaphores in the kernel to the mutex
|
||||
subsystem:
|
||||
|
||||
text data bss dec hex filename
|
||||
3280380 868188 396860 4545428 455b94 vmlinux-semaphore
|
||||
3255329 865296 396732 4517357 44eded vmlinux-mutex
|
||||
|
||||
that's 25051 bytes of code saved, or a 0.76% win - off the hottest
|
||||
codepaths of the kernel. (The .data savings are 2892 bytes, or 0.33%)
|
||||
Smaller code means better icache footprint, which is one of the
|
||||
major optimization goals in the Linux kernel currently.
|
||||
|
||||
- the mutex subsystem is slightly faster and has better scalability for
|
||||
contended workloads. On an 8-way x86 system, running a mutex-based
|
||||
kernel and testing creat+unlink+close (of separate, per-task files)
|
||||
in /tmp with 16 parallel tasks, the average number of ops/sec is:
|
||||
|
||||
Semaphores: Mutexes:
|
||||
|
||||
$ ./test-mutex V 16 10 $ ./test-mutex V 16 10
|
||||
8 CPUs, running 16 tasks. 8 CPUs, running 16 tasks.
|
||||
checking VFS performance. checking VFS performance.
|
||||
avg loops/sec: 34713 avg loops/sec: 84153
|
||||
CPU utilization: 63% CPU utilization: 22%
|
||||
|
||||
i.e. in this workload, the mutex based kernel was 2.4 times faster
|
||||
than the semaphore based kernel, _and_ it also had 2.8 times less CPU
|
||||
utilization. (In terms of 'ops per CPU cycle', the semaphore kernel
|
||||
performed 551 ops/sec per 1% of CPU time used, while the mutex kernel
|
||||
performed 3825 ops/sec per 1% of CPU time used - it was 6.9 times
|
||||
more efficient.)
|
||||
|
||||
the scalability difference is visible even on a 2-way P4 HT box:
|
||||
|
||||
Semaphores: Mutexes:
|
||||
|
||||
$ ./test-mutex V 16 10 $ ./test-mutex V 16 10
|
||||
4 CPUs, running 16 tasks. 8 CPUs, running 16 tasks.
|
||||
checking VFS performance. checking VFS performance.
|
||||
avg loops/sec: 127659 avg loops/sec: 181082
|
||||
CPU utilization: 100% CPU utilization: 34%
|
||||
|
||||
(the straight performance advantage of mutexes is 41%, the per-cycle
|
||||
efficiency of mutexes is 4.1 times better.)
|
||||
|
||||
- there are no fastpath tradeoffs, the mutex fastpath is just as tight
|
||||
as the semaphore fastpath. On x86, the locking fastpath is 2
|
||||
instructions:
|
||||
|
||||
c0377ccb <mutex_lock>:
|
||||
c0377ccb: f0 ff 08 lock decl (%eax)
|
||||
c0377cce: 78 0e js c0377cde <.text.lock.mutex>
|
||||
c0377cd0: c3 ret
|
||||
|
||||
the unlocking fastpath is equally tight:
|
||||
|
||||
c0377cd1 <mutex_unlock>:
|
||||
c0377cd1: f0 ff 00 lock incl (%eax)
|
||||
c0377cd4: 7e 0f jle c0377ce5 <.text.lock.mutex+0x7>
|
||||
c0377cd6: c3 ret
|
||||
|
||||
- 'struct mutex' semantics are well-defined and are enforced if
|
||||
CONFIG_DEBUG_MUTEXES is turned on. Semaphores on the other hand have
|
||||
virtually no debugging code or instrumentation. The mutex subsystem
|
||||
checks and enforces the following rules:
|
||||
|
||||
* - only one task can hold the mutex at a time
|
||||
* - only the owner can unlock the mutex
|
||||
* - multiple unlocks are not permitted
|
||||
* - recursive locking is not permitted
|
||||
* - a mutex object must be initialized via the API
|
||||
* - a mutex object must not be initialized via memset or copying
|
||||
* - task may not exit with mutex held
|
||||
* - memory areas where held locks reside must not be freed
|
||||
* - held mutexes must not be reinitialized
|
||||
* - mutexes may not be used in irq contexts
|
||||
|
||||
furthermore, there are also convenience features in the debugging
|
||||
code:
|
||||
|
||||
* - uses symbolic names of mutexes, whenever they are printed in debug output
|
||||
* - point-of-acquire tracking, symbolic lookup of function names
|
||||
* - list of all locks held in the system, printout of them
|
||||
* - owner tracking
|
||||
* - detects self-recursing locks and prints out all relevant info
|
||||
* - detects multi-task circular deadlocks and prints out all affected
|
||||
* locks and tasks (and only those tasks)
|
||||
|
||||
Disadvantages
|
||||
-------------
|
||||
|
||||
The stricter mutex API means you cannot use mutexes the same way you
|
||||
can use semaphores: e.g. they cannot be used from an interrupt context,
|
||||
nor can they be unlocked from a different context that which acquired
|
||||
it. [ I'm not aware of any other (e.g. performance) disadvantages from
|
||||
using mutexes at the moment, please let me know if you find any. ]
|
||||
|
||||
Implementation of mutexes
|
||||
-------------------------
|
||||
|
||||
'struct mutex' is the new mutex type, defined in include/linux/mutex.h
|
||||
and implemented in kernel/mutex.c. It is a counter-based mutex with a
|
||||
spinlock and a wait-list. The counter has 3 states: 1 for "unlocked",
|
||||
0 for "locked" and negative numbers (usually -1) for "locked, potential
|
||||
waiters queued".
|
||||
|
||||
the APIs of 'struct mutex' have been streamlined:
|
||||
|
||||
DEFINE_MUTEX(name);
|
||||
|
||||
mutex_init(mutex);
|
||||
|
||||
void mutex_lock(struct mutex *lock);
|
||||
int mutex_lock_interruptible(struct mutex *lock);
|
||||
int mutex_trylock(struct mutex *lock);
|
||||
void mutex_unlock(struct mutex *lock);
|
||||
int mutex_is_locked(struct mutex *lock);
|
||||
|
|
@ -945,7 +945,6 @@ bond0 Link encap:Ethernet HWaddr 00:C0:F0:1F:37:B4
|
|||
collisions:0 txqueuelen:0
|
||||
|
||||
eth0 Link encap:Ethernet HWaddr 00:C0:F0:1F:37:B4
|
||||
inet addr:XXX.XXX.XXX.YYY Bcast:XXX.XXX.XXX.255 Mask:255.255.252.0
|
||||
UP BROADCAST RUNNING SLAVE MULTICAST MTU:1500 Metric:1
|
||||
RX packets:3573025 errors:0 dropped:0 overruns:0 frame:0
|
||||
TX packets:1643167 errors:1 dropped:0 overruns:1 carrier:0
|
||||
|
@ -953,7 +952,6 @@ eth0 Link encap:Ethernet HWaddr 00:C0:F0:1F:37:B4
|
|||
Interrupt:10 Base address:0x1080
|
||||
|
||||
eth1 Link encap:Ethernet HWaddr 00:C0:F0:1F:37:B4
|
||||
inet addr:XXX.XXX.XXX.YYY Bcast:XXX.XXX.XXX.255 Mask:255.255.252.0
|
||||
UP BROADCAST RUNNING SLAVE MULTICAST MTU:1500 Metric:1
|
||||
RX packets:3651769 errors:0 dropped:0 overruns:0 frame:0
|
||||
TX packets:1643480 errors:0 dropped:0 overruns:0 carrier:0
|
||||
|
|
|
@ -91,7 +91,7 @@ To use the driver as a module, proceed as follows:
|
|||
with (M)
|
||||
5. Execute the command "make modules".
|
||||
6. Execute the command "make modules_install".
|
||||
The appropiate modules will be installed.
|
||||
The appropriate modules will be installed.
|
||||
7. Reboot your system.
|
||||
|
||||
|
||||
|
@ -245,7 +245,7 @@ Default: Both
|
|||
This parameters is only relevant if auto-negotiation for this port is
|
||||
not set to "Sense". If auto-negotiation is set to "On", all three values
|
||||
are possible. If it is set to "Off", only "Full" and "Half" are allowed.
|
||||
This parameter is usefull if your link partner does not support all
|
||||
This parameter is useful if your link partner does not support all
|
||||
possible combinations.
|
||||
|
||||
Flow Control
|
||||
|
|
|
@ -41,11 +41,9 @@ the disk is not available then you have three options :-
|
|||
run a null modem to a second machine and capture the output there
|
||||
using your favourite communication program. Minicom works well.
|
||||
|
||||
(3) Patch the kernel with one of the crash dump patches. These save
|
||||
data to a floppy disk or video rom or a swap partition. None of
|
||||
these are standard kernel patches so you have to find and apply
|
||||
them yourself. Search kernel archives for kmsgdump, lkcd and
|
||||
oops+smram.
|
||||
(3) Use Kdump (see Documentation/kdump/kdump.txt),
|
||||
extract the kernel ring buffer from old memory with using dmesg
|
||||
gdbmacro in Documentation/kdump/gdbmacros.txt.
|
||||
|
||||
|
||||
Full Information
|
||||
|
|
|
@ -0,0 +1,246 @@
|
|||
|
||||
PCI Error Recovery
|
||||
------------------
|
||||
May 31, 2005
|
||||
|
||||
Current document maintainer:
|
||||
Linas Vepstas <linas@austin.ibm.com>
|
||||
|
||||
|
||||
Some PCI bus controllers are able to detect certain "hard" PCI errors
|
||||
on the bus, such as parity errors on the data and address busses, as
|
||||
well as SERR and PERR errors. These chipsets are then able to disable
|
||||
I/O to/from the affected device, so that, for example, a bad DMA
|
||||
address doesn't end up corrupting system memory. These same chipsets
|
||||
are also able to reset the affected PCI device, and return it to
|
||||
working condition. This document describes a generic API form
|
||||
performing error recovery.
|
||||
|
||||
The core idea is that after a PCI error has been detected, there must
|
||||
be a way for the kernel to coordinate with all affected device drivers
|
||||
so that the pci card can be made operational again, possibly after
|
||||
performing a full electrical #RST of the PCI card. The API below
|
||||
provides a generic API for device drivers to be notified of PCI
|
||||
errors, and to be notified of, and respond to, a reset sequence.
|
||||
|
||||
Preliminary sketch of API, cut-n-pasted-n-modified email from
|
||||
Ben Herrenschmidt, circa 5 april 2005
|
||||
|
||||
The error recovery API support is exposed to the driver in the form of
|
||||
a structure of function pointers pointed to by a new field in struct
|
||||
pci_driver. The absence of this pointer in pci_driver denotes an
|
||||
"non-aware" driver, behaviour on these is platform dependant.
|
||||
Platforms like ppc64 can try to simulate pci hotplug remove/add.
|
||||
|
||||
The definition of "pci_error_token" is not covered here. It is based on
|
||||
Seto's work on the synchronous error detection. We still need to define
|
||||
functions for extracting infos out of an opaque error token. This is
|
||||
separate from this API.
|
||||
|
||||
This structure has the form:
|
||||
|
||||
struct pci_error_handlers
|
||||
{
|
||||
int (*error_detected)(struct pci_dev *dev, pci_error_token error);
|
||||
int (*mmio_enabled)(struct pci_dev *dev);
|
||||
int (*resume)(struct pci_dev *dev);
|
||||
int (*link_reset)(struct pci_dev *dev);
|
||||
int (*slot_reset)(struct pci_dev *dev);
|
||||
};
|
||||
|
||||
A driver doesn't have to implement all of these callbacks. The
|
||||
only mandatory one is error_detected(). If a callback is not
|
||||
implemented, the corresponding feature is considered unsupported.
|
||||
For example, if mmio_enabled() and resume() aren't there, then the
|
||||
driver is assumed as not doing any direct recovery and requires
|
||||
a reset. If link_reset() is not implemented, the card is assumed as
|
||||
not caring about link resets, in which case, if recover is supported,
|
||||
the core can try recover (but not slot_reset() unless it really did
|
||||
reset the slot). If slot_reset() is not supported, link_reset() can
|
||||
be called instead on a slot reset.
|
||||
|
||||
At first, the call will always be :
|
||||
|
||||
1) error_detected()
|
||||
|
||||
Error detected. This is sent once after an error has been detected. At
|
||||
this point, the device might not be accessible anymore depending on the
|
||||
platform (the slot will be isolated on ppc64). The driver may already
|
||||
have "noticed" the error because of a failing IO, but this is the proper
|
||||
"synchronisation point", that is, it gives a chance to the driver to
|
||||
cleanup, waiting for pending stuff (timers, whatever, etc...) to
|
||||
complete; it can take semaphores, schedule, etc... everything but touch
|
||||
the device. Within this function and after it returns, the driver
|
||||
shouldn't do any new IOs. Called in task context. This is sort of a
|
||||
"quiesce" point. See note about interrupts at the end of this doc.
|
||||
|
||||
Result codes:
|
||||
- PCIERR_RESULT_CAN_RECOVER:
|
||||
Driever returns this if it thinks it might be able to recover
|
||||
the HW by just banging IOs or if it wants to be given
|
||||
a chance to extract some diagnostic informations (see
|
||||
below).
|
||||
- PCIERR_RESULT_NEED_RESET:
|
||||
Driver returns this if it thinks it can't recover unless the
|
||||
slot is reset.
|
||||
- PCIERR_RESULT_DISCONNECT:
|
||||
Return this if driver thinks it won't recover at all,
|
||||
(this will detach the driver ? or just leave it
|
||||
dangling ? to be decided)
|
||||
|
||||
So at this point, we have called error_detected() for all drivers
|
||||
on the segment that had the error. On ppc64, the slot is isolated. What
|
||||
happens now typically depends on the result from the drivers. If all
|
||||
drivers on the segment/slot return PCIERR_RESULT_CAN_RECOVER, we would
|
||||
re-enable IOs on the slot (or do nothing special if the platform doesn't
|
||||
isolate slots) and call 2). If not and we can reset slots, we go to 4),
|
||||
if neither, we have a dead slot. If it's an hotplug slot, we might
|
||||
"simulate" reset by triggering HW unplug/replug though.
|
||||
|
||||
>>> Current ppc64 implementation assumes that a device driver will
|
||||
>>> *not* schedule or semaphore in this routine; the current ppc64
|
||||
>>> implementation uses one kernel thread to notify all devices;
|
||||
>>> thus, of one device sleeps/schedules, all devices are affected.
|
||||
>>> Doing better requires complex multi-threaded logic in the error
|
||||
>>> recovery implementation (e.g. waiting for all notification threads
|
||||
>>> to "join" before proceeding with recovery.) This seems excessively
|
||||
>>> complex and not worth implementing.
|
||||
|
||||
>>> The current ppc64 implementation doesn't much care if the device
|
||||
>>> attempts i/o at this point, or not. I/O's will fail, returning
|
||||
>>> a value of 0xff on read, and writes will be dropped. If the device
|
||||
>>> driver attempts more than 10K I/O's to a frozen adapter, it will
|
||||
>>> assume that the device driver has gone into an infinite loop, and
|
||||
>>> it will panic the the kernel.
|
||||
|
||||
2) mmio_enabled()
|
||||
|
||||
This is the "early recovery" call. IOs are allowed again, but DMA is
|
||||
not (hrm... to be discussed, I prefer not), with some restrictions. This
|
||||
is NOT a callback for the driver to start operations again, only to
|
||||
peek/poke at the device, extract diagnostic information, if any, and
|
||||
eventually do things like trigger a device local reset or some such,
|
||||
but not restart operations. This is sent if all drivers on a segment
|
||||
agree that they can try to recover and no automatic link reset was
|
||||
performed by the HW. If the platform can't just re-enable IOs without
|
||||
a slot reset or a link reset, it doesn't call this callback and goes
|
||||
directly to 3) or 4). All IOs should be done _synchronously_ from
|
||||
within this callback, errors triggered by them will be returned via
|
||||
the normal pci_check_whatever() api, no new error_detected() callback
|
||||
will be issued due to an error happening here. However, such an error
|
||||
might cause IOs to be re-blocked for the whole segment, and thus
|
||||
invalidate the recovery that other devices on the same segment might
|
||||
have done, forcing the whole segment into one of the next states,
|
||||
that is link reset or slot reset.
|
||||
|
||||
Result codes:
|
||||
- PCIERR_RESULT_RECOVERED
|
||||
Driver returns this if it thinks the device is fully
|
||||
functionnal and thinks it is ready to start
|
||||
normal driver operations again. There is no
|
||||
guarantee that the driver will actually be
|
||||
allowed to proceed, as another driver on the
|
||||
same segment might have failed and thus triggered a
|
||||
slot reset on platforms that support it.
|
||||
|
||||
- PCIERR_RESULT_NEED_RESET
|
||||
Driver returns this if it thinks the device is not
|
||||
recoverable in it's current state and it needs a slot
|
||||
reset to proceed.
|
||||
|
||||
- PCIERR_RESULT_DISCONNECT
|
||||
Same as above. Total failure, no recovery even after
|
||||
reset driver dead. (To be defined more precisely)
|
||||
|
||||
>>> The current ppc64 implementation does not implement this callback.
|
||||
|
||||
3) link_reset()
|
||||
|
||||
This is called after the link has been reset. This is typically
|
||||
a PCI Express specific state at this point and is done whenever a
|
||||
non-fatal error has been detected that can be "solved" by resetting
|
||||
the link. This call informs the driver of the reset and the driver
|
||||
should check if the device appears to be in working condition.
|
||||
This function acts a bit like 2) mmio_enabled(), in that the driver
|
||||
is not supposed to restart normal driver I/O operations right away.
|
||||
Instead, it should just "probe" the device to check it's recoverability
|
||||
status. If all is right, then the core will call resume() once all
|
||||
drivers have ack'd link_reset().
|
||||
|
||||
Result codes:
|
||||
(identical to mmio_enabled)
|
||||
|
||||
>>> The current ppc64 implementation does not implement this callback.
|
||||
|
||||
4) slot_reset()
|
||||
|
||||
This is called after the slot has been soft or hard reset by the
|
||||
platform. A soft reset consists of asserting the adapter #RST line
|
||||
and then restoring the PCI BARs and PCI configuration header. If the
|
||||
platform supports PCI hotplug, then it might instead perform a hard
|
||||
reset by toggling power on the slot off/on. This call gives drivers
|
||||
the chance to re-initialize the hardware (re-download firmware, etc.),
|
||||
but drivers shouldn't restart normal I/O processing operations at
|
||||
this point. (See note about interrupts; interrupts aren't guaranteed
|
||||
to be delivered until the resume() callback has been called). If all
|
||||
device drivers report success on this callback, the patform will call
|
||||
resume() to complete the error handling and let the driver restart
|
||||
normal I/O processing.
|
||||
|
||||
A driver can still return a critical failure for this function if
|
||||
it can't get the device operational after reset. If the platform
|
||||
previously tried a soft reset, it migh now try a hard reset (power
|
||||
cycle) and then call slot_reset() again. It the device still can't
|
||||
be recovered, there is nothing more that can be done; the platform
|
||||
will typically report a "permanent failure" in such a case. The
|
||||
device will be considered "dead" in this case.
|
||||
|
||||
Result codes:
|
||||
- PCIERR_RESULT_DISCONNECT
|
||||
Same as above.
|
||||
|
||||
>>> The current ppc64 implementation does not try a power-cycle reset
|
||||
>>> if the driver returned PCIERR_RESULT_DISCONNECT. However, it should.
|
||||
|
||||
5) resume()
|
||||
|
||||
This is called if all drivers on the segment have returned
|
||||
PCIERR_RESULT_RECOVERED from one of the 3 prevous callbacks.
|
||||
That basically tells the driver to restart activity, tht everything
|
||||
is back and running. No result code is taken into account here. If
|
||||
a new error happens, it will restart a new error handling process.
|
||||
|
||||
That's it. I think this covers all the possibilities. The way those
|
||||
callbacks are called is platform policy. A platform with no slot reset
|
||||
capability for example may want to just "ignore" drivers that can't
|
||||
recover (disconnect them) and try to let other cards on the same segment
|
||||
recover. Keep in mind that in most real life cases, though, there will
|
||||
be only one driver per segment.
|
||||
|
||||
Now, there is a note about interrupts. If you get an interrupt and your
|
||||
device is dead or has been isolated, there is a problem :)
|
||||
|
||||
After much thinking, I decided to leave that to the platform. That is,
|
||||
the recovery API only precies that:
|
||||
|
||||
- There is no guarantee that interrupt delivery can proceed from any
|
||||
device on the segment starting from the error detection and until the
|
||||
restart callback is sent, at which point interrupts are expected to be
|
||||
fully operational.
|
||||
|
||||
- There is no guarantee that interrupt delivery is stopped, that is, ad
|
||||
river that gets an interrupts after detecting an error, or that detects
|
||||
and error within the interrupt handler such that it prevents proper
|
||||
ack'ing of the interrupt (and thus removal of the source) should just
|
||||
return IRQ_NOTHANDLED. It's up to the platform to deal with taht
|
||||
condition, typically by masking the irq source during the duration of
|
||||
the error handling. It is expected that the platform "knows" which
|
||||
interrupts are routed to error-management capable slots and can deal
|
||||
with temporarily disabling that irq number during error processing (this
|
||||
isn't terribly complex). That means some IRQ latency for other devices
|
||||
sharing the interrupt, but there is simply no other way. High end
|
||||
platforms aren't supposed to share interrupts between many devices
|
||||
anyway :)
|
||||
|
||||
|
||||
Revised: 31 May 2005 Linas Vepstas <linas@austin.ibm.com>
|
|
@ -218,7 +218,7 @@ proceed in the opposite direction.
|
|||
Q: Who do I contact for additional information about
|
||||
enabling power management for my specific driver/device?
|
||||
|
||||
ACPI Development mailing list: acpi-devel@lists.sourceforge.net
|
||||
ACPI Development mailing list: linux-acpi@vger.kernel.org
|
||||
|
||||
System Interface -- OBSOLETE, DO NOT USE!
|
||||
----------------*************************
|
||||
|
|
|
@ -212,7 +212,7 @@ A: Try running
|
|||
|
||||
cat `cat /proc/[0-9]*/maps | grep / | sed 's:.* /:/:' | sort -u` > /dev/null
|
||||
|
||||
after resume. swapoff -a; swapon -a may also be usefull.
|
||||
after resume. swapoff -a; swapon -a may also be useful.
|
||||
|
||||
Q: What happens to devices during swsusp? They seem to be resumed
|
||||
during system suspend?
|
||||
|
@ -323,7 +323,7 @@ to be useless to try to suspend to disk while that app is running?
|
|||
A: No, it should work okay, as long as your app does not mlock()
|
||||
it. Just prepare big enough swap partition.
|
||||
|
||||
Q: What information is usefull for debugging suspend-to-disk problems?
|
||||
Q: What information is useful for debugging suspend-to-disk problems?
|
||||
|
||||
A: Well, last messages on the screen are always useful. If something
|
||||
is broken, it is usually some kernel driver, therefore trying with as
|
||||
|
|
|
@ -8,12 +8,18 @@ please mail me.
|
|||
cpu_features.txt
|
||||
- info on how we support a variety of CPUs with minimal compile-time
|
||||
options.
|
||||
eeh-pci-error-recovery.txt
|
||||
- info on PCI Bus EEH Error Recovery
|
||||
hvcs.txt
|
||||
- IBM "Hypervisor Virtual Console Server" Installation Guide
|
||||
mpc52xx.txt
|
||||
- Linux 2.6.x on MPC52xx family
|
||||
ppc_htab.txt
|
||||
- info about the Linux/PPC /proc/ppc_htab entry
|
||||
smp.txt
|
||||
- use and state info about Linux/PPC on MP machines
|
||||
SBC8260_memory_mapping.txt
|
||||
- EST SBC8260 board info
|
||||
smp.txt
|
||||
- use and state info about Linux/PPC on MP machines
|
||||
sound.txt
|
||||
- info on sound support under Linux/PPC
|
||||
zImage_layout.txt
|
||||
|
|
|
@ -0,0 +1,108 @@
|
|||
AACRAID Driver for Linux (take two)
|
||||
|
||||
Introduction
|
||||
-------------------------
|
||||
The aacraid driver adds support for Adaptec (http://www.adaptec.com)
|
||||
RAID controllers. This is a major rewrite from the original
|
||||
Adaptec supplied driver. It has signficantly cleaned up both the code
|
||||
and the running binary size (the module is less than half the size of
|
||||
the original).
|
||||
|
||||
Supported Cards/Chipsets
|
||||
-------------------------
|
||||
PCI ID (pci.ids) OEM Product
|
||||
9005:0285:9005:028a Adaptec 2020ZCR (Skyhawk)
|
||||
9005:0285:9005:028e Adaptec 2020SA (Skyhawk)
|
||||
9005:0285:9005:028b Adaptec 2025ZCR (Terminator)
|
||||
9005:0285:9005:028f Adaptec 2025SA (Terminator)
|
||||
9005:0285:9005:0286 Adaptec 2120S (Crusader)
|
||||
9005:0286:9005:028d Adaptec 2130S (Lancer)
|
||||
9005:0285:9005:0285 Adaptec 2200S (Vulcan)
|
||||
9005:0285:9005:0287 Adaptec 2200S (Vulcan-2m)
|
||||
9005:0286:9005:028c Adaptec 2230S (Lancer)
|
||||
9005:0286:9005:028c Adaptec 2230SLP (Lancer)
|
||||
9005:0285:9005:0296 Adaptec 2240S (SabreExpress)
|
||||
9005:0285:9005:0290 Adaptec 2410SA (Jaguar)
|
||||
9005:0285:9005:0293 Adaptec 21610SA (Corsair-16)
|
||||
9005:0285:103c:3227 Adaptec 2610SA (Bearcat)
|
||||
9005:0285:9005:0292 Adaptec 2810SA (Corsair-8)
|
||||
9005:0285:9005:0294 Adaptec Prowler
|
||||
9005:0286:9005:029d Adaptec 2420SA (Intruder)
|
||||
9005:0286:9005:029c Adaptec 2620SA (Intruder)
|
||||
9005:0286:9005:029b Adaptec 2820SA (Intruder)
|
||||
9005:0286:9005:02a7 Adaptec 2830SA (Skyray)
|
||||
9005:0286:9005:02a8 Adaptec 2430SA (Skyray)
|
||||
9005:0285:9005:0288 Adaptec 3230S (Harrier)
|
||||
9005:0285:9005:0289 Adaptec 3240S (Tornado)
|
||||
9005:0285:9005:0298 Adaptec 4000SAS (BlackBird)
|
||||
9005:0285:9005:0297 Adaptec 4005SAS (AvonPark)
|
||||
9005:0285:9005:0299 Adaptec 4800SAS (Marauder-X)
|
||||
9005:0285:9005:029a Adaptec 4805SAS (Marauder-E)
|
||||
9005:0286:9005:02a2 Adaptec 4810SAS (Hurricane)
|
||||
1011:0046:9005:0364 Adaptec 5400S (Mustang)
|
||||
1011:0046:9005:0365 Adaptec 5400S (Mustang)
|
||||
9005:0283:9005:0283 Adaptec Catapult (3210S with arc firmware)
|
||||
9005:0284:9005:0284 Adaptec Tomcat (3410S with arc firmware)
|
||||
9005:0287:9005:0800 Adaptec Themisto (Jupiter)
|
||||
9005:0200:9005:0200 Adaptec Themisto (Jupiter)
|
||||
9005:0286:9005:0800 Adaptec Callisto (Jupiter)
|
||||
1011:0046:9005:1364 Dell PERC 2/QC (Quad Channel, Mustang)
|
||||
1028:0001:1028:0001 Dell PERC 2/Si (Iguana)
|
||||
1028:0003:1028:0003 Dell PERC 3/Si (SlimFast)
|
||||
1028:0002:1028:0002 Dell PERC 3/Di (Opal)
|
||||
1028:0004:1028:0004 Dell PERC 3/DiF (Iguana)
|
||||
1028:0002:1028:00d1 Dell PERC 3/DiV (Viper)
|
||||
1028:0002:1028:00d9 Dell PERC 3/DiL (Lexus)
|
||||
1028:000a:1028:0106 Dell PERC 3/DiJ (Jaguar)
|
||||
1028:000a:1028:011b Dell PERC 3/DiD (Dagger)
|
||||
1028:000a:1028:0121 Dell PERC 3/DiB (Boxster)
|
||||
9005:0285:1028:0287 Dell PERC 320/DC (Vulcan)
|
||||
9005:0285:1028:0291 Dell CERC 2 (DellCorsair)
|
||||
1011:0046:103c:10c2 HP NetRAID-4M (Mustang)
|
||||
9005:0285:17aa:0286 Legend S220 (Crusader)
|
||||
9005:0285:17aa:0287 Legend S230 (Vulcan)
|
||||
9005:0285:9005:0290 IBM ServeRAID 7t (Jaguar)
|
||||
9005:0285:1014:02F2 IBM ServeRAID 8i (AvonPark)
|
||||
9005:0285:1014:0312 IBM ServeRAID 8i (AvonParkLite)
|
||||
9005:0286:1014:9580 IBM ServeRAID 8k/8k-l8 (Aurora)
|
||||
9005:0286:1014:9540 IBM ServeRAID 8k/8k-l4 (AuroraLite)
|
||||
9005:0286:9005:029f ICP ICP9014R0 (Lancer)
|
||||
9005:0286:9005:029e ICP ICP9024R0 (Lancer)
|
||||
9005:0286:9005:02a0 ICP ICP9047MA (Lancer)
|
||||
9005:0286:9005:02a1 ICP ICP9087MA (Lancer)
|
||||
9005:0286:9005:02a4 ICP ICP9085LI (Marauder-X)
|
||||
9005:0286:9005:02a5 ICP ICP5085BR (Marauder-E)
|
||||
9005:0286:9005:02a3 ICP ICP5085AU (Hurricane)
|
||||
9005:0286:9005:02a6 ICP ICP9067MA (Intruder-6)
|
||||
9005:0286:9005:02a9 ICP ICP5087AU (Skyray)
|
||||
9005:0286:9005:02aa ICP ICP5047AU (Skyray)
|
||||
|
||||
People
|
||||
-------------------------
|
||||
Alan Cox <alan@redhat.com>
|
||||
Christoph Hellwig <hch@infradead.org> (updates for new-style PCI probing and SCSI host registration,
|
||||
small cleanups/fixes)
|
||||
Matt Domsch <matt_domsch@dell.com> (revision ioctl, adapter messages)
|
||||
Deanna Bonds (non-DASD support, PAE fibs and 64 bit, added new adaptec controllers
|
||||
added new ioctls, changed scsi interface to use new error handler,
|
||||
increased the number of fibs and outstanding commands to a container)
|
||||
|
||||
(fixed 64bit and 64G memory model, changed confusing naming convention
|
||||
where fibs that go to the hardware are consistently called hw_fibs and
|
||||
not just fibs like the name of the driver tracking structure)
|
||||
Mark Salyzyn <Mark_Salyzyn@adaptec.com> Fixed panic issues and added some new product ids for upcoming hbas. Performance tuning, card failover and bug mitigations.
|
||||
|
||||
Original Driver
|
||||
-------------------------
|
||||
Adaptec Unix OEM Product Group
|
||||
|
||||
Mailing List
|
||||
-------------------------
|
||||
linux-scsi@vger.kernel.org (Interested parties troll here)
|
||||
Also note this is very different to Brian's original driver
|
||||
so don't expect him to support it.
|
||||
Adaptec does support this driver. Contact Adaptec tech support or
|
||||
aacraid@adaptec.com
|
||||
|
||||
Original by Brian Boerner February 2001
|
||||
Rewritten by Alan Cox, November 2001
|
|
@ -5577,7 +5577,7 @@ struct _snd_pcm_runtime {
|
|||
<informalexample>
|
||||
<programlisting>
|
||||
<![CDATA[
|
||||
static int mychip_suspend(strut pci_dev *pci, pm_message_t state)
|
||||
static int mychip_suspend(struct pci_dev *pci, pm_message_t state)
|
||||
{
|
||||
/* (1) */
|
||||
struct snd_card *card = pci_get_drvdata(pci);
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
spi_butterfly - parport-to-butterfly adapter driver
|
||||
===================================================
|
||||
|
||||
This is a hardware and software project that includes building and using
|
||||
a parallel port adapter cable, together with an "AVR Butterfly" to run
|
||||
firmware for user interfacing and/or sensors. A Butterfly is a $US20
|
||||
battery powered card with an AVR microcontroller and lots of goodies:
|
||||
sensors, LCD, flash, toggle stick, and more. You can use AVR-GCC to
|
||||
develop firmware for this, and flash it using this adapter cable.
|
||||
|
||||
You can make this adapter from an old printer cable and solder things
|
||||
directly to the Butterfly. Or (if you have the parts and skills) you
|
||||
can come up with something fancier, providing ciruit protection to the
|
||||
Butterfly and the printer port, or with a better power supply than two
|
||||
signal pins from the printer port.
|
||||
|
||||
|
||||
The first cable connections will hook Linux up to one SPI bus, with the
|
||||
AVR and a DataFlash chip; and to the AVR reset line. This is all you
|
||||
need to reflash the firmware, and the pins are the standard Atmel "ISP"
|
||||
connector pins (used also on non-Butterfly AVR boards).
|
||||
|
||||
Signal Butterfly Parport (DB-25)
|
||||
------ --------- ---------------
|
||||
SCK = J403.PB1/SCK = pin 2/D0
|
||||
RESET = J403.nRST = pin 3/D1
|
||||
VCC = J403.VCC_EXT = pin 8/D6
|
||||
MOSI = J403.PB2/MOSI = pin 9/D7
|
||||
MISO = J403.PB3/MISO = pin 11/S7,nBUSY
|
||||
GND = J403.GND = pin 23/GND
|
||||
|
||||
Then to let Linux master that bus to talk to the DataFlash chip, you must
|
||||
(a) flash new firmware that disables SPI (set PRR.2, and disable pullups
|
||||
by clearing PORTB.[0-3]); (b) configure the mtd_dataflash driver; and
|
||||
(c) cable in the chipselect.
|
||||
|
||||
Signal Butterfly Parport (DB-25)
|
||||
------ --------- ---------------
|
||||
VCC = J400.VCC_EXT = pin 7/D5
|
||||
SELECT = J400.PB0/nSS = pin 17/C3,nSELECT
|
||||
GND = J400.GND = pin 24/GND
|
||||
|
||||
The "USI" controller, using J405, can be used for a second SPI bus. That
|
||||
would let you talk to the AVR over SPI, running firmware that makes it act
|
||||
as an SPI slave, while letting either Linux or the AVR use the DataFlash.
|
||||
There are plenty of spare parport pins to wire this one up, such as:
|
||||
|
||||
Signal Butterfly Parport (DB-25)
|
||||
------ --------- ---------------
|
||||
SCK = J403.PE4/USCK = pin 5/D3
|
||||
MOSI = J403.PE5/DI = pin 6/D4
|
||||
MISO = J403.PE6/DO = pin 12/S5,nPAPEROUT
|
||||
GND = J403.GND = pin 22/GND
|
||||
|
||||
IRQ = J402.PF4 = pin 10/S6,ACK
|
||||
GND = J402.GND(P2) = pin 25/GND
|
||||
|
|
@ -0,0 +1,457 @@
|
|||
Overview of Linux kernel SPI support
|
||||
====================================
|
||||
|
||||
02-Dec-2005
|
||||
|
||||
What is SPI?
|
||||
------------
|
||||
The "Serial Peripheral Interface" (SPI) is a synchronous four wire serial
|
||||
link used to connect microcontrollers to sensors, memory, and peripherals.
|
||||
|
||||
The three signal wires hold a clock (SCLK, often on the order of 10 MHz),
|
||||
and parallel data lines with "Master Out, Slave In" (MOSI) or "Master In,
|
||||
Slave Out" (MISO) signals. (Other names are also used.) There are four
|
||||
clocking modes through which data is exchanged; mode-0 and mode-3 are most
|
||||
commonly used. Each clock cycle shifts data out and data in; the clock
|
||||
doesn't cycle except when there is data to shift.
|
||||
|
||||
SPI masters may use a "chip select" line to activate a given SPI slave
|
||||
device, so those three signal wires may be connected to several chips
|
||||
in parallel. All SPI slaves support chipselects. Some devices have
|
||||
other signals, often including an interrupt to the master.
|
||||
|
||||
Unlike serial busses like USB or SMBUS, even low level protocols for
|
||||
SPI slave functions are usually not interoperable between vendors
|
||||
(except for cases like SPI memory chips).
|
||||
|
||||
- SPI may be used for request/response style device protocols, as with
|
||||
touchscreen sensors and memory chips.
|
||||
|
||||
- It may also be used to stream data in either direction (half duplex),
|
||||
or both of them at the same time (full duplex).
|
||||
|
||||
- Some devices may use eight bit words. Others may different word
|
||||
lengths, such as streams of 12-bit or 20-bit digital samples.
|
||||
|
||||
In the same way, SPI slaves will only rarely support any kind of automatic
|
||||
discovery/enumeration protocol. The tree of slave devices accessible from
|
||||
a given SPI master will normally be set up manually, with configuration
|
||||
tables.
|
||||
|
||||
SPI is only one of the names used by such four-wire protocols, and
|
||||
most controllers have no problem handling "MicroWire" (think of it as
|
||||
half-duplex SPI, for request/response protocols), SSP ("Synchronous
|
||||
Serial Protocol"), PSP ("Programmable Serial Protocol"), and other
|
||||
related protocols.
|
||||
|
||||
Microcontrollers often support both master and slave sides of the SPI
|
||||
protocol. This document (and Linux) currently only supports the master
|
||||
side of SPI interactions.
|
||||
|
||||
|
||||
Who uses it? On what kinds of systems?
|
||||
---------------------------------------
|
||||
Linux developers using SPI are probably writing device drivers for embedded
|
||||
systems boards. SPI is used to control external chips, and it is also a
|
||||
protocol supported by every MMC or SD memory card. (The older "DataFlash"
|
||||
cards, predating MMC cards but using the same connectors and card shape,
|
||||
support only SPI.) Some PC hardware uses SPI flash for BIOS code.
|
||||
|
||||
SPI slave chips range from digital/analog converters used for analog
|
||||
sensors and codecs, to memory, to peripherals like USB controllers
|
||||
or Ethernet adapters; and more.
|
||||
|
||||
Most systems using SPI will integrate a few devices on a mainboard.
|
||||
Some provide SPI links on expansion connectors; in cases where no
|
||||
dedicated SPI controller exists, GPIO pins can be used to create a
|
||||
low speed "bitbanging" adapter. Very few systems will "hotplug" an SPI
|
||||
controller; the reasons to use SPI focus on low cost and simple operation,
|
||||
and if dynamic reconfiguration is important, USB will often be a more
|
||||
appropriate low-pincount peripheral bus.
|
||||
|
||||
Many microcontrollers that can run Linux integrate one or more I/O
|
||||
interfaces with SPI modes. Given SPI support, they could use MMC or SD
|
||||
cards without needing a special purpose MMC/SD/SDIO controller.
|
||||
|
||||
|
||||
How do these driver programming interfaces work?
|
||||
------------------------------------------------
|
||||
The <linux/spi/spi.h> header file includes kerneldoc, as does the
|
||||
main source code, and you should certainly read that. This is just
|
||||
an overview, so you get the big picture before the details.
|
||||
|
||||
SPI requests always go into I/O queues. Requests for a given SPI device
|
||||
are always executed in FIFO order, and complete asynchronously through
|
||||
completion callbacks. There are also some simple synchronous wrappers
|
||||
for those calls, including ones for common transaction types like writing
|
||||
a command and then reading its response.
|
||||
|
||||
There are two types of SPI driver, here called:
|
||||
|
||||
Controller drivers ... these are often built in to System-On-Chip
|
||||
processors, and often support both Master and Slave roles.
|
||||
These drivers touch hardware registers and may use DMA.
|
||||
Or they can be PIO bitbangers, needing just GPIO pins.
|
||||
|
||||
Protocol drivers ... these pass messages through the controller
|
||||
driver to communicate with a Slave or Master device on the
|
||||
other side of an SPI link.
|
||||
|
||||
So for example one protocol driver might talk to the MTD layer to export
|
||||
data to filesystems stored on SPI flash like DataFlash; and others might
|
||||
control audio interfaces, present touchscreen sensors as input interfaces,
|
||||
or monitor temperature and voltage levels during industrial processing.
|
||||
And those might all be sharing the same controller driver.
|
||||
|
||||
A "struct spi_device" encapsulates the master-side interface between
|
||||
those two types of driver. At this writing, Linux has no slave side
|
||||
programming interface.
|
||||
|
||||
There is a minimal core of SPI programming interfaces, focussing on
|
||||
using driver model to connect controller and protocol drivers using
|
||||
device tables provided by board specific initialization code. SPI
|
||||
shows up in sysfs in several locations:
|
||||
|
||||
/sys/devices/.../CTLR/spiB.C ... spi_device for on bus "B",
|
||||
chipselect C, accessed through CTLR.
|
||||
|
||||
/sys/devices/.../CTLR/spiB.C/modalias ... identifies the driver
|
||||
that should be used with this device (for hotplug/coldplug)
|
||||
|
||||
/sys/bus/spi/devices/spiB.C ... symlink to the physical
|
||||
spiB-C device
|
||||
|
||||
/sys/bus/spi/drivers/D ... driver for one or more spi*.* devices
|
||||
|
||||
/sys/class/spi_master/spiB ... class device for the controller
|
||||
managing bus "B". All the spiB.* devices share the same
|
||||
physical SPI bus segment, with SCLK, MOSI, and MISO.
|
||||
|
||||
|
||||
How does board-specific init code declare SPI devices?
|
||||
------------------------------------------------------
|
||||
Linux needs several kinds of information to properly configure SPI devices.
|
||||
That information is normally provided by board-specific code, even for
|
||||
chips that do support some of automated discovery/enumeration.
|
||||
|
||||
DECLARE CONTROLLERS
|
||||
|
||||
The first kind of information is a list of what SPI controllers exist.
|
||||
For System-on-Chip (SOC) based boards, these will usually be platform
|
||||
devices, and the controller may need some platform_data in order to
|
||||
operate properly. The "struct platform_device" will include resources
|
||||
like the physical address of the controller's first register and its IRQ.
|
||||
|
||||
Platforms will often abstract the "register SPI controller" operation,
|
||||
maybe coupling it with code to initialize pin configurations, so that
|
||||
the arch/.../mach-*/board-*.c files for several boards can all share the
|
||||
same basic controller setup code. This is because most SOCs have several
|
||||
SPI-capable controllers, and only the ones actually usable on a given
|
||||
board should normally be set up and registered.
|
||||
|
||||
So for example arch/.../mach-*/board-*.c files might have code like:
|
||||
|
||||
#include <asm/arch/spi.h> /* for mysoc_spi_data */
|
||||
|
||||
/* if your mach-* infrastructure doesn't support kernels that can
|
||||
* run on multiple boards, pdata wouldn't benefit from "__init".
|
||||
*/
|
||||
static struct mysoc_spi_data __init pdata = { ... };
|
||||
|
||||
static __init board_init(void)
|
||||
{
|
||||
...
|
||||
/* this board only uses SPI controller #2 */
|
||||
mysoc_register_spi(2, &pdata);
|
||||
...
|
||||
}
|
||||
|
||||
And SOC-specific utility code might look something like:
|
||||
|
||||
#include <asm/arch/spi.h>
|
||||
|
||||
static struct platform_device spi2 = { ... };
|
||||
|
||||
void mysoc_register_spi(unsigned n, struct mysoc_spi_data *pdata)
|
||||
{
|
||||
struct mysoc_spi_data *pdata2;
|
||||
|
||||
pdata2 = kmalloc(sizeof *pdata2, GFP_KERNEL);
|
||||
*pdata2 = pdata;
|
||||
...
|
||||
if (n == 2) {
|
||||
spi2->dev.platform_data = pdata2;
|
||||
register_platform_device(&spi2);
|
||||
|
||||
/* also: set up pin modes so the spi2 signals are
|
||||
* visible on the relevant pins ... bootloaders on
|
||||
* production boards may already have done this, but
|
||||
* developer boards will often need Linux to do it.
|
||||
*/
|
||||
}
|
||||
...
|
||||
}
|
||||
|
||||
Notice how the platform_data for boards may be different, even if the
|
||||
same SOC controller is used. For example, on one board SPI might use
|
||||
an external clock, where another derives the SPI clock from current
|
||||
settings of some master clock.
|
||||
|
||||
|
||||
DECLARE SLAVE DEVICES
|
||||
|
||||
The second kind of information is a list of what SPI slave devices exist
|
||||
on the target board, often with some board-specific data needed for the
|
||||
driver to work correctly.
|
||||
|
||||
Normally your arch/.../mach-*/board-*.c files would provide a small table
|
||||
listing the SPI devices on each board. (This would typically be only a
|
||||
small handful.) That might look like:
|
||||
|
||||
static struct ads7846_platform_data ads_info = {
|
||||
.vref_delay_usecs = 100,
|
||||
.x_plate_ohms = 580,
|
||||
.y_plate_ohms = 410,
|
||||
};
|
||||
|
||||
static struct spi_board_info spi_board_info[] __initdata = {
|
||||
{
|
||||
.modalias = "ads7846",
|
||||
.platform_data = &ads_info,
|
||||
.mode = SPI_MODE_0,
|
||||
.irq = GPIO_IRQ(31),
|
||||
.max_speed_hz = 120000 /* max sample rate at 3V */ * 16,
|
||||
.bus_num = 1,
|
||||
.chip_select = 0,
|
||||
},
|
||||
};
|
||||
|
||||
Again, notice how board-specific information is provided; each chip may need
|
||||
several types. This example shows generic constraints like the fastest SPI
|
||||
clock to allow (a function of board voltage in this case) or how an IRQ pin
|
||||
is wired, plus chip-specific constraints like an important delay that's
|
||||
changed by the capacitance at one pin.
|
||||
|
||||
(There's also "controller_data", information that may be useful to the
|
||||
controller driver. An example would be peripheral-specific DMA tuning
|
||||
data or chipselect callbacks. This is stored in spi_device later.)
|
||||
|
||||
The board_info should provide enough information to let the system work
|
||||
without the chip's driver being loaded. The most troublesome aspect of
|
||||
that is likely the SPI_CS_HIGH bit in the spi_device.mode field, since
|
||||
sharing a bus with a device that interprets chipselect "backwards" is
|
||||
not possible.
|
||||
|
||||
Then your board initialization code would register that table with the SPI
|
||||
infrastructure, so that it's available later when the SPI master controller
|
||||
driver is registered:
|
||||
|
||||
spi_register_board_info(spi_board_info, ARRAY_SIZE(spi_board_info));
|
||||
|
||||
Like with other static board-specific setup, you won't unregister those.
|
||||
|
||||
The widely used "card" style computers bundle memory, cpu, and little else
|
||||
onto a card that's maybe just thirty square centimeters. On such systems,
|
||||
your arch/.../mach-.../board-*.c file would primarily provide information
|
||||
about the devices on the mainboard into which such a card is plugged. That
|
||||
certainly includes SPI devices hooked up through the card connectors!
|
||||
|
||||
|
||||
NON-STATIC CONFIGURATIONS
|
||||
|
||||
Developer boards often play by different rules than product boards, and one
|
||||
example is the potential need to hotplug SPI devices and/or controllers.
|
||||
|
||||
For those cases you might need to use use spi_busnum_to_master() to look
|
||||
up the spi bus master, and will likely need spi_new_device() to provide the
|
||||
board info based on the board that was hotplugged. Of course, you'd later
|
||||
call at least spi_unregister_device() when that board is removed.
|
||||
|
||||
When Linux includes support for MMC/SD/SDIO/DataFlash cards through SPI, those
|
||||
configurations will also be dynamic. Fortunately, those devices all support
|
||||
basic device identification probes, so that support should hotplug normally.
|
||||
|
||||
|
||||
How do I write an "SPI Protocol Driver"?
|
||||
----------------------------------------
|
||||
All SPI drivers are currently kernel drivers. A userspace driver API
|
||||
would just be another kernel driver, probably offering some lowlevel
|
||||
access through aio_read(), aio_write(), and ioctl() calls and using the
|
||||
standard userspace sysfs mechanisms to bind to a given SPI device.
|
||||
|
||||
SPI protocol drivers somewhat resemble platform device drivers:
|
||||
|
||||
static struct spi_driver CHIP_driver = {
|
||||
.driver = {
|
||||
.name = "CHIP",
|
||||
.bus = &spi_bus_type,
|
||||
.owner = THIS_MODULE,
|
||||
},
|
||||
|
||||
.probe = CHIP_probe,
|
||||
.remove = __devexit_p(CHIP_remove),
|
||||
.suspend = CHIP_suspend,
|
||||
.resume = CHIP_resume,
|
||||
};
|
||||
|
||||
The driver core will autmatically attempt to bind this driver to any SPI
|
||||
device whose board_info gave a modalias of "CHIP". Your probe() code
|
||||
might look like this unless you're creating a class_device:
|
||||
|
||||
static int __devinit CHIP_probe(struct spi_device *spi)
|
||||
{
|
||||
struct CHIP *chip;
|
||||
struct CHIP_platform_data *pdata;
|
||||
|
||||
/* assuming the driver requires board-specific data: */
|
||||
pdata = &spi->dev.platform_data;
|
||||
if (!pdata)
|
||||
return -ENODEV;
|
||||
|
||||
/* get memory for driver's per-chip state */
|
||||
chip = kzalloc(sizeof *chip, GFP_KERNEL);
|
||||
if (!chip)
|
||||
return -ENOMEM;
|
||||
dev_set_drvdata(&spi->dev, chip);
|
||||
|
||||
... etc
|
||||
return 0;
|
||||
}
|
||||
|
||||
As soon as it enters probe(), the driver may issue I/O requests to
|
||||
the SPI device using "struct spi_message". When remove() returns,
|
||||
the driver guarantees that it won't submit any more such messages.
|
||||
|
||||
- An spi_message is a sequence of of protocol operations, executed
|
||||
as one atomic sequence. SPI driver controls include:
|
||||
|
||||
+ when bidirectional reads and writes start ... by how its
|
||||
sequence of spi_transfer requests is arranged;
|
||||
|
||||
+ optionally defining short delays after transfers ... using
|
||||
the spi_transfer.delay_usecs setting;
|
||||
|
||||
+ whether the chipselect becomes inactive after a transfer and
|
||||
any delay ... by using the spi_transfer.cs_change flag;
|
||||
|
||||
+ hinting whether the next message is likely to go to this same
|
||||
device ... using the spi_transfer.cs_change flag on the last
|
||||
transfer in that atomic group, and potentially saving costs
|
||||
for chip deselect and select operations.
|
||||
|
||||
- Follow standard kernel rules, and provide DMA-safe buffers in
|
||||
your messages. That way controller drivers using DMA aren't forced
|
||||
to make extra copies unless the hardware requires it (e.g. working
|
||||
around hardware errata that force the use of bounce buffering).
|
||||
|
||||
If standard dma_map_single() handling of these buffers is inappropriate,
|
||||
you can use spi_message.is_dma_mapped to tell the controller driver
|
||||
that you've already provided the relevant DMA addresses.
|
||||
|
||||
- The basic I/O primitive is spi_async(). Async requests may be
|
||||
issued in any context (irq handler, task, etc) and completion
|
||||
is reported using a callback provided with the message.
|
||||
After any detected error, the chip is deselected and processing
|
||||
of that spi_message is aborted.
|
||||
|
||||
- There are also synchronous wrappers like spi_sync(), and wrappers
|
||||
like spi_read(), spi_write(), and spi_write_then_read(). These
|
||||
may be issued only in contexts that may sleep, and they're all
|
||||
clean (and small, and "optional") layers over spi_async().
|
||||
|
||||
- The spi_write_then_read() call, and convenience wrappers around
|
||||
it, should only be used with small amounts of data where the
|
||||
cost of an extra copy may be ignored. It's designed to support
|
||||
common RPC-style requests, such as writing an eight bit command
|
||||
and reading a sixteen bit response -- spi_w8r16() being one its
|
||||
wrappers, doing exactly that.
|
||||
|
||||
Some drivers may need to modify spi_device characteristics like the
|
||||
transfer mode, wordsize, or clock rate. This is done with spi_setup(),
|
||||
which would normally be called from probe() before the first I/O is
|
||||
done to the device.
|
||||
|
||||
While "spi_device" would be the bottom boundary of the driver, the
|
||||
upper boundaries might include sysfs (especially for sensor readings),
|
||||
the input layer, ALSA, networking, MTD, the character device framework,
|
||||
or other Linux subsystems.
|
||||
|
||||
Note that there are two types of memory your driver must manage as part
|
||||
of interacting with SPI devices.
|
||||
|
||||
- I/O buffers use the usual Linux rules, and must be DMA-safe.
|
||||
You'd normally allocate them from the heap or free page pool.
|
||||
Don't use the stack, or anything that's declared "static".
|
||||
|
||||
- The spi_message and spi_transfer metadata used to glue those
|
||||
I/O buffers into a group of protocol transactions. These can
|
||||
be allocated anywhere it's convenient, including as part of
|
||||
other allocate-once driver data structures. Zero-init these.
|
||||
|
||||
If you like, spi_message_alloc() and spi_message_free() convenience
|
||||
routines are available to allocate and zero-initialize an spi_message
|
||||
with several transfers.
|
||||
|
||||
|
||||
How do I write an "SPI Master Controller Driver"?
|
||||
-------------------------------------------------
|
||||
An SPI controller will probably be registered on the platform_bus; write
|
||||
a driver to bind to the device, whichever bus is involved.
|
||||
|
||||
The main task of this type of driver is to provide an "spi_master".
|
||||
Use spi_alloc_master() to allocate the master, and class_get_devdata()
|
||||
to get the driver-private data allocated for that device.
|
||||
|
||||
struct spi_master *master;
|
||||
struct CONTROLLER *c;
|
||||
|
||||
master = spi_alloc_master(dev, sizeof *c);
|
||||
if (!master)
|
||||
return -ENODEV;
|
||||
|
||||
c = class_get_devdata(&master->cdev);
|
||||
|
||||
The driver will initialize the fields of that spi_master, including the
|
||||
bus number (maybe the same as the platform device ID) and three methods
|
||||
used to interact with the SPI core and SPI protocol drivers. It will
|
||||
also initialize its own internal state.
|
||||
|
||||
master->setup(struct spi_device *spi)
|
||||
This sets up the device clock rate, SPI mode, and word sizes.
|
||||
Drivers may change the defaults provided by board_info, and then
|
||||
call spi_setup(spi) to invoke this routine. It may sleep.
|
||||
|
||||
master->transfer(struct spi_device *spi, struct spi_message *message)
|
||||
This must not sleep. Its responsibility is arrange that the
|
||||
transfer happens and its complete() callback is issued; the two
|
||||
will normally happen later, after other transfers complete.
|
||||
|
||||
master->cleanup(struct spi_device *spi)
|
||||
Your controller driver may use spi_device.controller_state to hold
|
||||
state it dynamically associates with that device. If you do that,
|
||||
be sure to provide the cleanup() method to free that state.
|
||||
|
||||
The bulk of the driver will be managing the I/O queue fed by transfer().
|
||||
|
||||
That queue could be purely conceptual. For example, a driver used only
|
||||
for low-frequency sensor acess might be fine using synchronous PIO.
|
||||
|
||||
But the queue will probably be very real, using message->queue, PIO,
|
||||
often DMA (especially if the root filesystem is in SPI flash), and
|
||||
execution contexts like IRQ handlers, tasklets, or workqueues (such
|
||||
as keventd). Your driver can be as fancy, or as simple, as you need.
|
||||
|
||||
|
||||
THANKS TO
|
||||
---------
|
||||
Contributors to Linux-SPI discussions include (in alphabetical order,
|
||||
by last name):
|
||||
|
||||
David Brownell
|
||||
Russell King
|
||||
Dmitry Pervushin
|
||||
Stephen Street
|
||||
Mark Underwood
|
||||
Andrew Victor
|
||||
Vitaly Wool
|
||||
|
|
@ -1,58 +1,56 @@
|
|||
Everything you ever wanted to know about Linux 2.6 -stable releases.
|
||||
|
||||
Rules on what kind of patches are accepted, and what ones are not, into
|
||||
the "-stable" tree:
|
||||
Rules on what kind of patches are accepted, and which ones are not, into the
|
||||
"-stable" tree:
|
||||
|
||||
- It must be obviously correct and tested.
|
||||
- It can not bigger than 100 lines, with context.
|
||||
- It can not be bigger than 100 lines, with context.
|
||||
- It must fix only one thing.
|
||||
- It must fix a real bug that bothers people (not a, "This could be a
|
||||
problem..." type thing.)
|
||||
problem..." type thing).
|
||||
- It must fix a problem that causes a build error (but not for things
|
||||
marked CONFIG_BROKEN), an oops, a hang, data corruption, a real
|
||||
security issue, or some "oh, that's not good" issue. In short,
|
||||
something critical.
|
||||
- No "theoretical race condition" issues, unless an explanation of how
|
||||
the race can be exploited.
|
||||
security issue, or some "oh, that's not good" issue. In short, something
|
||||
critical.
|
||||
- No "theoretical race condition" issues, unless an explanation of how the
|
||||
race can be exploited is also provided.
|
||||
- It can not contain any "trivial" fixes in it (spelling changes,
|
||||
whitespace cleanups, etc.)
|
||||
whitespace cleanups, etc).
|
||||
- It must be accepted by the relevant subsystem maintainer.
|
||||
- It must follow Documentation/SubmittingPatches rules.
|
||||
- It must follow the Documentation/SubmittingPatches rules.
|
||||
|
||||
|
||||
Procedure for submitting patches to the -stable tree:
|
||||
|
||||
- Send the patch, after verifying that it follows the above rules, to
|
||||
stable@kernel.org.
|
||||
- The sender will receive an ack when the patch has been accepted into
|
||||
the queue, or a nak if the patch is rejected. This response might
|
||||
take a few days, according to the developer's schedules.
|
||||
- If accepted, the patch will be added to the -stable queue, for review
|
||||
by other developers.
|
||||
- The sender will receive an ACK when the patch has been accepted into the
|
||||
queue, or a NAK if the patch is rejected. This response might take a few
|
||||
days, according to the developer's schedules.
|
||||
- If accepted, the patch will be added to the -stable queue, for review by
|
||||
other developers.
|
||||
- Security patches should not be sent to this alias, but instead to the
|
||||
documented security@kernel.org.
|
||||
documented security@kernel.org address.
|
||||
|
||||
|
||||
Review cycle:
|
||||
|
||||
- When the -stable maintainers decide for a review cycle, the patches
|
||||
will be sent to the review committee, and the maintainer of the
|
||||
affected area of the patch (unless the submitter is the maintainer of
|
||||
the area) and CC: to the linux-kernel mailing list.
|
||||
- The review committee has 48 hours in which to ack or nak the patch.
|
||||
- When the -stable maintainers decide for a review cycle, the patches will be
|
||||
sent to the review committee, and the maintainer of the affected area of
|
||||
the patch (unless the submitter is the maintainer of the area) and CC: to
|
||||
the linux-kernel mailing list.
|
||||
- The review committee has 48 hours in which to ACK or NAK the patch.
|
||||
- If the patch is rejected by a member of the committee, or linux-kernel
|
||||
members object to the patch, bringing up issues that the maintainers
|
||||
and members did not realize, the patch will be dropped from the
|
||||
queue.
|
||||
- At the end of the review cycle, the acked patches will be added to
|
||||
the latest -stable release, and a new -stable release will happen.
|
||||
- Security patches will be accepted into the -stable tree directly from
|
||||
the security kernel team, and not go through the normal review cycle.
|
||||
members object to the patch, bringing up issues that the maintainers and
|
||||
members did not realize, the patch will be dropped from the queue.
|
||||
- At the end of the review cycle, the ACKed patches will be added to the
|
||||
latest -stable release, and a new -stable release will happen.
|
||||
- Security patches will be accepted into the -stable tree directly from the
|
||||
security kernel team, and not go through the normal review cycle.
|
||||
Contact the kernel security team for more details on this procedure.
|
||||
|
||||
|
||||
Review committe:
|
||||
|
||||
- This will be made up of a number of kernel developers who have
|
||||
volunteered for this task, and a few that haven't.
|
||||
|
||||
- This is made up of a number of kernel developers who have volunteered for
|
||||
this task, and a few that haven't.
|
||||
|
|
|
@ -26,12 +26,14 @@ Currently, these files are in /proc/sys/vm:
|
|||
- min_free_kbytes
|
||||
- laptop_mode
|
||||
- block_dump
|
||||
- drop-caches
|
||||
- zone_reclaim_mode
|
||||
|
||||
==============================================================
|
||||
|
||||
dirty_ratio, dirty_background_ratio, dirty_expire_centisecs,
|
||||
dirty_writeback_centisecs, vfs_cache_pressure, laptop_mode,
|
||||
block_dump, swap_token_timeout:
|
||||
block_dump, swap_token_timeout, drop-caches:
|
||||
|
||||
See Documentation/filesystems/proc.txt
|
||||
|
||||
|
@ -102,3 +104,37 @@ This is used to force the Linux VM to keep a minimum number
|
|||
of kilobytes free. The VM uses this number to compute a pages_min
|
||||
value for each lowmem zone in the system. Each lowmem zone gets
|
||||
a number of reserved free pages based proportionally on its size.
|
||||
|
||||
==============================================================
|
||||
|
||||
percpu_pagelist_fraction
|
||||
|
||||
This is the fraction of pages at most (high mark pcp->high) in each zone that
|
||||
are allocated for each per cpu page list. The min value for this is 8. It
|
||||
means that we don't allow more than 1/8th of pages in each zone to be
|
||||
allocated in any single per_cpu_pagelist. This entry only changes the value
|
||||
of hot per cpu pagelists. User can specify a number like 100 to allocate
|
||||
1/100th of each zone to each per cpu page list.
|
||||
|
||||
The batch value of each per cpu pagelist is also updated as a result. It is
|
||||
set to pcp->high/4. The upper limit of batch is (PAGE_SHIFT * 8)
|
||||
|
||||
The initial value is zero. Kernel does not use this value at boot time to set
|
||||
the high water marks for each per cpu page list.
|
||||
|
||||
===============================================================
|
||||
|
||||
zone_reclaim_mode:
|
||||
|
||||
This is set during bootup to 1 if it is determined that pages from
|
||||
remote zones will cause a significant performance reduction. The
|
||||
page allocator will then reclaim easily reusable pages (those page
|
||||
cache pages that are currently not used) before going off node.
|
||||
|
||||
The user can override this setting. It may be beneficial to switch
|
||||
off zone reclaim if the system is used for a file server and all
|
||||
of memory should be used for caching files from disk.
|
||||
|
||||
It may be beneficial to switch this on if one wants to do zone
|
||||
reclaim regardless of the numa distances in the system.
|
||||
|
||||
|
|
|
@ -141,3 +141,5 @@
|
|||
140 -> Osprey 440 [0070:ff07]
|
||||
141 -> Asound Skyeye PCTV
|
||||
142 -> Sabrent TV-FM (bttv version)
|
||||
143 -> Hauppauge ImpactVCB (bt878) [0070:13eb]
|
||||
144 -> MagicTV
|
||||
|
|
|
@ -16,10 +16,10 @@
|
|||
15 -> DViCO FusionHDTV DVB-T1 [18ac:db00]
|
||||
16 -> KWorld LTV883RF
|
||||
17 -> DViCO FusionHDTV 3 Gold-Q [18ac:d810]
|
||||
18 -> Hauppauge Nova-T DVB-T [0070:9002]
|
||||
18 -> Hauppauge Nova-T DVB-T [0070:9002,0070:9001]
|
||||
19 -> Conexant DVB-T reference design [14f1:0187]
|
||||
20 -> Provideo PV259 [1540:2580]
|
||||
21 -> DViCO FusionHDTV DVB-T Plus [18ac:db10]
|
||||
21 -> DViCO FusionHDTV DVB-T Plus [18ac:db10,18ac:db11]
|
||||
22 -> pcHDTV HD3000 HDTV [7063:3000]
|
||||
23 -> digitalnow DNTV Live! DVB-T [17de:a8a6]
|
||||
24 -> Hauppauge WinTV 28xxx (Roslyn) models [0070:2801]
|
||||
|
@ -35,3 +35,11 @@
|
|||
34 -> ATI HDTV Wonder [1002:a101]
|
||||
35 -> WinFast DTV1000-T [107d:665f]
|
||||
36 -> AVerTV 303 (M126) [1461:000a]
|
||||
37 -> Hauppauge Nova-S-Plus DVB-S [0070:9201,0070:9202]
|
||||
38 -> Hauppauge Nova-SE2 DVB-S [0070:9200]
|
||||
39 -> KWorld DVB-S 100 [17de:08b2]
|
||||
40 -> Hauppauge WinTV-HVR1100 DVB-T/Hybrid [0070:9400,0070:9402]
|
||||
41 -> Hauppauge WinTV-HVR1100 DVB-T/Hybrid (Low Profile) [0070:9800,0070:9802]
|
||||
42 -> digitalnow DNTV Live! DVB-T Pro [1822:0025]
|
||||
43 -> KWorld/VStream XPert DVB-T with cx22702 [17de:08a1]
|
||||
44 -> DViCO FusionHDTV DVB-T Dual Digital [18ac:db50]
|
||||
|
|
|
@ -56,7 +56,7 @@
|
|||
55 -> LifeView FlyDVB-T DUO [5168:0502,5168:0306]
|
||||
56 -> Avermedia AVerTV 307 [1461:a70a]
|
||||
57 -> Avermedia AVerTV GO 007 FM [1461:f31f]
|
||||
58 -> ADS Tech Instant TV (saa7135) [1421:0350,1421:0370,1421:1370]
|
||||
58 -> ADS Tech Instant TV (saa7135) [1421:0350,1421:0351,1421:0370,1421:1370]
|
||||
59 -> Kworld/Tevion V-Stream Xpert TV PVR7134
|
||||
60 -> Typhoon DVB-T Duo Digital/Analog Cardbus [4e42:0502]
|
||||
61 -> Philips TOUGH DVB-T reference design [1131:2004]
|
||||
|
@ -81,4 +81,5 @@
|
|||
80 -> ASUS Digimatrix TV [1043:0210]
|
||||
81 -> Philips Tiger reference design [1131:2018]
|
||||
82 -> MSI TV@Anywhere plus [1462:6231]
|
||||
|
||||
83 -> Terratec Cinergy 250 PCI TV [153b:1160]
|
||||
84 -> LifeView FlyDVB Trio [5168:0319]
|
||||
|
|
|
@ -40,7 +40,7 @@ tuner=38 - Philips PAL/SECAM multi (FM1216ME MK3)
|
|||
tuner=39 - LG NTSC (newer TAPC series)
|
||||
tuner=40 - HITACHI V7-J180AT
|
||||
tuner=41 - Philips PAL_MK (FI1216 MK)
|
||||
tuner=42 - Philips 1236D ATSC/NTSC daul in
|
||||
tuner=42 - Philips 1236D ATSC/NTSC dual in
|
||||
tuner=43 - Philips NTSC MK3 (FM1236MK3 or FM1236/F)
|
||||
tuner=44 - Philips 4 in 1 (ATI TV Wonder Pro/Conexant)
|
||||
tuner=45 - Microtune 4049 FM5
|
||||
|
@ -50,7 +50,7 @@ tuner=48 - Tenna TNF 8831 BGFF)
|
|||
tuner=49 - Microtune 4042 FI5 ATSC/NTSC dual in
|
||||
tuner=50 - TCL 2002N
|
||||
tuner=51 - Philips PAL/SECAM_D (FM 1256 I-H3)
|
||||
tuner=52 - Thomson DDT 7610 (ATSC/NTSC)
|
||||
tuner=52 - Thomson DTT 7610 (ATSC/NTSC)
|
||||
tuner=53 - Philips FQ1286
|
||||
tuner=54 - tda8290+75
|
||||
tuner=55 - TCL 2002MB
|
||||
|
@ -58,7 +58,7 @@ tuner=56 - Philips PAL/SECAM multi (FQ1216AME MK4)
|
|||
tuner=57 - Philips FQ1236A MK4
|
||||
tuner=58 - Ymec TVision TVF-8531MF/8831MF/8731MF
|
||||
tuner=59 - Ymec TVision TVF-5533MF
|
||||
tuner=60 - Thomson DDT 7611 (ATSC/NTSC)
|
||||
tuner=60 - Thomson DTT 761X (ATSC/NTSC)
|
||||
tuner=61 - Tena TNF9533-D/IF/TNF9533-B/DF
|
||||
tuner=62 - Philips TEA5767HN FM Radio
|
||||
tuner=63 - Philips FMD1216ME MK3 Hybrid Tuner
|
||||
|
@ -68,3 +68,4 @@ tuner=66 - LG NTSC (TALN mini series)
|
|||
tuner=67 - Philips TD1316 Hybrid Tuner
|
||||
tuner=68 - Philips TUV1236D ATSC/NTSC dual in
|
||||
tuner=69 - Tena TNF 5335 MF
|
||||
tuner=70 - Samsung TCPN 2121P30A
|
||||
|
|
|
@ -125,7 +125,7 @@ SMP
|
|||
cpumask=MASK only use cpus with bits set in mask
|
||||
|
||||
additional_cpus=NUM Allow NUM more CPUs for hotplug
|
||||
(defaults are specified by the BIOS or half the available CPUs)
|
||||
(defaults are specified by the BIOS, see Documentation/x86_64/cpu-hotplug-spec)
|
||||
|
||||
NUMA
|
||||
|
||||
|
@ -198,6 +198,6 @@ Debugging
|
|||
|
||||
Misc
|
||||
|
||||
noreplacement Don't replace instructions with more appropiate ones
|
||||
noreplacement Don't replace instructions with more appropriate ones
|
||||
for the CPU. This may be useful on asymmetric MP systems
|
||||
where some CPU have less capabilities than the others.
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
Firmware support for CPU hotplug under Linux/x86-64
|
||||
---------------------------------------------------
|
||||
|
||||
Linux/x86-64 supports CPU hotplug now. For various reasons Linux wants to
|
||||
know in advance boot time the maximum number of CPUs that could be plugged
|
||||
into the system. ACPI 3.0 currently has no official way to supply
|
||||
this information from the firmware to the operating system.
|
||||
|
||||
In ACPI each CPU needs an LAPIC object in the MADT table (5.2.11.5 in the
|
||||
ACPI 3.0 specification). ACPI already has the concept of disabled LAPIC
|
||||
objects by setting the Enabled bit in the LAPIC object to zero.
|
||||
|
||||
For CPU hotplug Linux/x86-64 expects now that any possible future hotpluggable
|
||||
CPU is already available in the MADT. If the CPU is not available yet
|
||||
it should have its LAPIC Enabled bit set to 0. Linux will use the number
|
||||
of disabled LAPICs to compute the maximum number of future CPUs.
|
||||
|
||||
In the worst case the user can overwrite this choice using a command line
|
||||
option (additional_cpus=...), but it is recommended to supply the correct
|
||||
number (or a reasonable approximation of it, with erring towards more not less)
|
||||
in the MADT to avoid manual configuration.
|
5
Kbuild
5
Kbuild
|
@ -22,8 +22,6 @@ sed-$(CONFIG_MIPS) := "/^@@@/s///p"
|
|||
|
||||
quiet_cmd_offsets = GEN $@
|
||||
define cmd_offsets
|
||||
mkdir -p $(dir $@); \
|
||||
cat $< | \
|
||||
(set -e; \
|
||||
echo "#ifndef __ASM_OFFSETS_H__"; \
|
||||
echo "#define __ASM_OFFSETS_H__"; \
|
||||
|
@ -34,7 +32,7 @@ define cmd_offsets
|
|||
echo " *"; \
|
||||
echo " */"; \
|
||||
echo ""; \
|
||||
sed -ne $(sed-y); \
|
||||
sed -ne $(sed-y) $<; \
|
||||
echo ""; \
|
||||
echo "#endif" ) > $@
|
||||
endef
|
||||
|
@ -45,5 +43,6 @@ arch/$(ARCH)/kernel/asm-offsets.s: arch/$(ARCH)/kernel/asm-offsets.c FORCE
|
|||
$(call if_changed_dep,cc_s_c)
|
||||
|
||||
$(obj)/$(offsets-file): arch/$(ARCH)/kernel/asm-offsets.s Kbuild
|
||||
$(Q)mkdir -p $(dir $@)
|
||||
$(call cmd,offsets)
|
||||
|
||||
|
|
100
MAINTAINERS
100
MAINTAINERS
|
@ -182,7 +182,7 @@ S: Supported
|
|||
ACPI
|
||||
P: Len Brown
|
||||
M: len.brown@intel.com
|
||||
L: acpi-devel@lists.sourceforge.net
|
||||
L: linux-acpi@vger.kernel.org
|
||||
W: http://acpi.sourceforge.net/
|
||||
T: git kernel.org:/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6.git
|
||||
S: Maintained
|
||||
|
@ -546,16 +546,10 @@ W: http://linuxtv.org
|
|||
T: git kernel.org:/pub/scm/linux/kernel/git/mchehab/v4l-dvb.git
|
||||
S: Maintained
|
||||
|
||||
BUSLOGIC SCSI DRIVER
|
||||
P: Leonard N. Zubkoff
|
||||
M: Leonard N. Zubkoff <lnz@dandelion.com>
|
||||
L: linux-scsi@vger.kernel.org
|
||||
W: http://www.dandelion.com/Linux/
|
||||
S: Maintained
|
||||
|
||||
COMMON INTERNET FILE SYSTEM (CIFS)
|
||||
P: Steve French
|
||||
M: sfrench@samba.org
|
||||
L: linux-cifs-client@lists.samba.org
|
||||
L: samba-technical@lists.samba.org
|
||||
W: http://us1.samba.org/samba/Linux_CIFS_client.html
|
||||
T: git kernel.org:/pub/scm/linux/kernel/git/sfrench/cifs-2.6.git
|
||||
|
@ -811,6 +805,7 @@ S: Maintained
|
|||
DOCBOOK FOR DOCUMENTATION
|
||||
P: Martin Waitz
|
||||
M: tali@admingilde.org
|
||||
T: git http://tali.admingilde.org/git/linux-docbook.git
|
||||
S: Maintained
|
||||
|
||||
DOUBLETALK DRIVER
|
||||
|
@ -872,6 +867,15 @@ L: ebtables-devel@lists.sourceforge.net
|
|||
W: http://ebtables.sourceforge.net/
|
||||
S: Maintained
|
||||
|
||||
EDAC-CORE
|
||||
P: Doug Thompson
|
||||
M: norsk5@xmission.com, dthompson@linuxnetworx.com
|
||||
P: Dave Peterson
|
||||
M: dsp@llnl.gov, dave_peterson@pobox.com
|
||||
L: bluesmoke-devel@lists.sourceforge.net
|
||||
W: bluesmoke.sourceforge.net
|
||||
S: Maintained
|
||||
|
||||
EEPRO100 NETWORK DRIVER
|
||||
P: Andrey V. Savochkin
|
||||
M: saw@saw.sw.com.sg
|
||||
|
@ -927,7 +931,6 @@ S: Maintained
|
|||
FARSYNC SYNCHRONOUS DRIVER
|
||||
P: Kevin Curtis
|
||||
M: kevin.curtis@farsite.co.uk
|
||||
M: kevin.curtis@farsite.co.uk
|
||||
W: http://www.farsite.co.uk/
|
||||
S: Supported
|
||||
|
||||
|
@ -1307,6 +1310,12 @@ M: ttb@tentacle.dhs.org and rml@novell.com
|
|||
L: linux-kernel@vger.kernel.org
|
||||
S: Maintained
|
||||
|
||||
INTEL FRAMEBUFFER DRIVER (excluding 810 and 815)
|
||||
P: Sylvain Meyer
|
||||
M: sylvain.meyer@worldonline.fr
|
||||
L: linux-fbdev-devel@lists.sourceforge.net
|
||||
S: Maintained
|
||||
|
||||
INTEL 810/815 FRAMEBUFFER DRIVER
|
||||
P: Antonino Daplas
|
||||
M: adaplas@pol.net
|
||||
|
@ -1398,7 +1407,7 @@ IRDA SUBSYSTEM
|
|||
P: Jean Tourrilhes
|
||||
L: irda-users@lists.sourceforge.net (subscribers-only)
|
||||
W: http://irda.sourceforge.net/
|
||||
S: Maintained
|
||||
S: Odd Fixes
|
||||
|
||||
ISAPNP
|
||||
P: Jaroslav Kysela
|
||||
|
@ -1696,12 +1705,13 @@ M: mtk-manpages@gmx.net
|
|||
W: ftp://ftp.kernel.org/pub/linux/docs/manpages
|
||||
S: Maintained
|
||||
|
||||
MARVELL MV64340 ETHERNET DRIVER
|
||||
MARVELL MV643XX ETHERNET DRIVER
|
||||
P: Dale Farnsworth
|
||||
M: dale@farnsworth.org
|
||||
P: Manish Lachwani
|
||||
M: Manish_Lachwani@pmc-sierra.com
|
||||
L: linux-mips@linux-mips.org
|
||||
M: mlachwani@mvista.com
|
||||
L: netdev@vger.kernel.org
|
||||
S: Supported
|
||||
S: Odd Fixes for 2.4; Maintained for 2.6.
|
||||
|
||||
MATROX FRAMEBUFFER DRIVER
|
||||
P: Petr Vandrovec
|
||||
|
@ -1842,7 +1852,14 @@ M: yoshfuji@linux-ipv6.org
|
|||
P: Patrick McHardy
|
||||
M: kaber@coreworks.de
|
||||
L: netdev@vger.kernel.org
|
||||
T: git kernel.org:/pub/scm/linux/kernel/davem/net-2.6.git
|
||||
T: git kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6.git
|
||||
S: Maintained
|
||||
|
||||
NETWORKING [WIRELESS]
|
||||
P: John W. Linville
|
||||
M: linville@tuxdriver.com
|
||||
L: netdev@vger.kernel.org
|
||||
T: git kernel.org:/pub/scm/linux/kernel/git/linville/wireless-2.6.git
|
||||
S: Maintained
|
||||
|
||||
IPVS
|
||||
|
@ -1897,11 +1914,11 @@ W: http://linux-ntfs.sf.net/
|
|||
T: git kernel.org:/pub/scm/linux/kernel/git/aia21/ntfs-2.6.git
|
||||
S: Maintained
|
||||
|
||||
NVIDIA (RIVA) FRAMEBUFFER DRIVER
|
||||
P: Ani Joshi
|
||||
M: ajoshi@shell.unixbox.com
|
||||
L: linux-nvidia@lists.surfsouth.com
|
||||
S: Maintained
|
||||
NVIDIA (rivafb and nvidiafb) FRAMEBUFFER DRIVER
|
||||
P: Antonino Daplas
|
||||
M: adaplas@pol.net
|
||||
L: linux-fbdev-devel@lists.sourceforge.net
|
||||
S: Maintained
|
||||
|
||||
ORACLE CLUSTER FILESYSTEM 2 (OCFS2)
|
||||
P: Mark Fasheh
|
||||
|
@ -1996,6 +2013,13 @@ M: hch@infradead.org
|
|||
L: linux-abi-devel@lists.sourceforge.net
|
||||
S: Maintained
|
||||
|
||||
PCI ERROR RECOVERY
|
||||
P: Linas Vepstas
|
||||
M: linas@austin.ibm.com
|
||||
L: linux-kernel@vger.kernel.org
|
||||
L: linux-pci@atrey.karlin.mff.cuni.cz
|
||||
S: Supported
|
||||
|
||||
PCI SOUND DRIVERS (ES1370, ES1371 and SONICVIBES)
|
||||
P: Thomas Sailer
|
||||
M: sailer@ife.ee.ethz.ch
|
||||
|
@ -2054,7 +2078,7 @@ S: Maintained
|
|||
POSIX CLOCKS and TIMERS
|
||||
P: George Anzinger
|
||||
M: george@mvista.com
|
||||
L: netdev@vger.kernel.org
|
||||
L: linux-kernel@vger.kernel.org
|
||||
S: Supported
|
||||
|
||||
POWERPC 4xx EMAC DRIVER
|
||||
|
@ -2189,6 +2213,12 @@ L: rtl@rtlinux.org
|
|||
W: www.rtlinux.org
|
||||
S: Maintained
|
||||
|
||||
S3 SAVAGE FRAMEBUFFER DRIVER
|
||||
P: Antonino Daplas
|
||||
M: adaplas@pol.net
|
||||
L: linux-fbdev-devel@lists.sourceforge.net
|
||||
S: Maintained
|
||||
|
||||
S390
|
||||
P: Martin Schwidefsky
|
||||
M: schwidefsky@de.ibm.com
|
||||
|
@ -2360,13 +2390,6 @@ P: Nicolas Pitre
|
|||
M: nico@cam.org
|
||||
S: Maintained
|
||||
|
||||
SNA NETWORK LAYER
|
||||
P: Jay Schulist
|
||||
M: jschlst@samba.org
|
||||
L: linux-sna@turbolinux.com
|
||||
W: http://www.linux-sna.org
|
||||
S: Supported
|
||||
|
||||
SOFTWARE RAID (Multiple Disks) SUPPORT
|
||||
P: Ingo Molnar
|
||||
M: mingo@redhat.com
|
||||
|
@ -2488,7 +2511,7 @@ P: Paul Mundt
|
|||
M: lethal@linux-sh.org
|
||||
P: Kazumoto Kojima
|
||||
M: kkojima@rr.iij4u.or.jp
|
||||
L: linux-sh@m17n.org
|
||||
L: linuxsh-dev@lists.sourceforge.net
|
||||
W: http://www.linux-sh.org
|
||||
W: http://www.m17n.org/linux-sh/
|
||||
W: http://www.rr.iij4u.or.jp/~kkojima/linux-sh4.html
|
||||
|
@ -2527,6 +2550,19 @@ P: Romain Lievin
|
|||
M: roms@lpg.ticalc.org
|
||||
S: Maintained
|
||||
|
||||
TIPC NETWORK LAYER
|
||||
P: Per Liden
|
||||
M: per.liden@ericsson.com
|
||||
P: Jon Maloy
|
||||
M: jon.maloy@ericsson.com
|
||||
P: Allan Stephens
|
||||
M: allan.stephens@windriver.com
|
||||
L: tipc-discussion@lists.sourceforge.net
|
||||
W: http://tipc.sourceforge.net/
|
||||
W: http://tipc.cslab.ericsson.net/
|
||||
T: git tipc.cslab.ericsson.net:/pub/git/tipc.git
|
||||
S: Maintained
|
||||
|
||||
TLAN NETWORK DRIVER
|
||||
P: Samuel Chessman
|
||||
M: chessman@tux.org
|
||||
|
@ -2948,6 +2984,12 @@ M: dm@sangoma.com
|
|||
W: http://www.sangoma.com
|
||||
S: Supported
|
||||
|
||||
WATCHDOG DEVICE DRIVERS
|
||||
P: Wim Van Sebroeck
|
||||
M: wim@iguana.be
|
||||
T: git kernel.org:/pub/scm/linux/kernel/git/wim/linux-2.6-watchdog.git
|
||||
S: Maintained
|
||||
|
||||
WAVELAN NETWORK DRIVER & WIRELESS EXTENSIONS
|
||||
P: Jean Tourrilhes
|
||||
M: jt@hpl.hp.com
|
||||
|
|
126
Makefile
126
Makefile
|
@ -1,7 +1,7 @@
|
|||
VERSION = 2
|
||||
PATCHLEVEL = 6
|
||||
SUBLEVEL = 15
|
||||
EXTRAVERSION =
|
||||
SUBLEVEL = 16
|
||||
EXTRAVERSION =-rc1
|
||||
NAME=Sliding Snow Leopard
|
||||
|
||||
# *DOCUMENTATION*
|
||||
|
@ -106,12 +106,13 @@ KBUILD_OUTPUT := $(shell cd $(KBUILD_OUTPUT) && /bin/pwd)
|
|||
$(if $(KBUILD_OUTPUT),, \
|
||||
$(error output directory "$(saved-output)" does not exist))
|
||||
|
||||
.PHONY: $(MAKECMDGOALS)
|
||||
.PHONY: $(MAKECMDGOALS) cdbuilddir
|
||||
$(MAKECMDGOALS) _all: cdbuilddir
|
||||
|
||||
$(filter-out _all,$(MAKECMDGOALS)) _all:
|
||||
cdbuilddir:
|
||||
$(if $(KBUILD_VERBOSE:1=),@)$(MAKE) -C $(KBUILD_OUTPUT) \
|
||||
KBUILD_SRC=$(CURDIR) \
|
||||
KBUILD_EXTMOD="$(KBUILD_EXTMOD)" -f $(CURDIR)/Makefile $@
|
||||
KBUILD_EXTMOD="$(KBUILD_EXTMOD)" -f $(CURDIR)/Makefile $(MAKECMDGOALS)
|
||||
|
||||
# Leave processing to above invocation of make
|
||||
skip-makefile := 1
|
||||
|
@ -141,24 +142,6 @@ VPATH := $(srctree)
|
|||
|
||||
export srctree objtree VPATH TOPDIR
|
||||
|
||||
nullstring :=
|
||||
space := $(nullstring) # end of line
|
||||
|
||||
# Take the contents of any files called localversion* and the config
|
||||
# variable CONFIG_LOCALVERSION and append them to KERNELRELEASE. Be
|
||||
# careful not to include files twice if building in the source
|
||||
# directory. LOCALVERSION from the command line override all of this
|
||||
|
||||
localver := $(objtree)/localversion* $(srctree)/localversion*
|
||||
localver := $(sort $(wildcard $(localver)))
|
||||
# skip backup files (containing '~')
|
||||
localver := $(foreach f, $(localver), $(if $(findstring ~, $(f)),,$(f)))
|
||||
|
||||
LOCALVERSION = $(subst $(space),, \
|
||||
$(shell cat /dev/null $(localver)) \
|
||||
$(patsubst "%",%,$(CONFIG_LOCALVERSION)))
|
||||
|
||||
KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)$(LOCALVERSION)
|
||||
|
||||
# SUBARCH tells the usermode build what the underlying arch is. That is set
|
||||
# first, and if a usermode build is happening, the "ARCH=um" on the command
|
||||
|
@ -169,7 +152,7 @@ KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)$(LOCALVERSION)
|
|||
SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \
|
||||
-e s/arm.*/arm/ -e s/sa110/arm/ \
|
||||
-e s/s390x/s390/ -e s/parisc64/parisc/ \
|
||||
-e s/ppc64/powerpc/ )
|
||||
-e s/ppc.*/powerpc/ )
|
||||
|
||||
# Cross compiling and selecting different set of gcc/bin-utils
|
||||
# ---------------------------------------------------------------------------
|
||||
|
@ -251,7 +234,7 @@ export KBUILD_CHECKSRC KBUILD_SRC KBUILD_EXTMOD
|
|||
# If it is set to "silent_", nothing wil be printed at all, since
|
||||
# the variable $(silent_cmd_cc_o_c) doesn't exist.
|
||||
#
|
||||
# A simple variant is to prefix commands with $(Q) - that's usefull
|
||||
# A simple variant is to prefix commands with $(Q) - that's useful
|
||||
# for commands that shall be hidden in non-verbose mode.
|
||||
#
|
||||
# $(Q)ln $@ :<
|
||||
|
@ -280,6 +263,13 @@ export quiet Q KBUILD_VERBOSE
|
|||
# cc support functions to be used (only) in arch/$(ARCH)/Makefile
|
||||
# See documentation in Documentation/kbuild/makefiles.txt
|
||||
|
||||
# as-option
|
||||
# Usage: cflags-y += $(call as-option, -Wa$(comma)-isa=foo,)
|
||||
|
||||
as-option = $(shell if $(CC) $(CFLAGS) $(1) -Wa,-Z -c -o /dev/null \
|
||||
-xassembler /dev/null > /dev/null 2>&1; then echo "$(1)"; \
|
||||
else echo "$(2)"; fi ;)
|
||||
|
||||
# cc-option
|
||||
# Usage: cflags-y += $(call cc-option, -march=winchip-c6, -march=i586)
|
||||
|
||||
|
@ -353,7 +343,11 @@ CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
|
|||
-ffreestanding
|
||||
AFLAGS := -D__ASSEMBLY__
|
||||
|
||||
export VERSION PATCHLEVEL SUBLEVEL EXTRAVERSION LOCALVERSION KERNELRELEASE \
|
||||
# Read KERNELRELEASE from .kernelrelease (if it exists)
|
||||
KERNELRELEASE = $(shell cat .kernelrelease 2> /dev/null)
|
||||
KERNELVERSION = $(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
|
||||
|
||||
export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION \
|
||||
ARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC \
|
||||
CPP AR NM STRIP OBJCOPY OBJDUMP MAKE AWK GENKSYMS PERL UTS_MACHINE \
|
||||
HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
|
||||
|
@ -448,6 +442,7 @@ export KBUILD_DEFCONFIG
|
|||
config %config: scripts_basic outputmakefile FORCE
|
||||
$(Q)mkdir -p include/linux
|
||||
$(Q)$(MAKE) $(build)=scripts/kconfig $@
|
||||
$(Q)$(MAKE) .kernelrelease
|
||||
|
||||
else
|
||||
# ===========================================================================
|
||||
|
@ -551,33 +546,13 @@ export KBUILD_IMAGE ?= vmlinux
|
|||
# images. Default is /boot, but you can set it to other values
|
||||
export INSTALL_PATH ?= /boot
|
||||
|
||||
# If CONFIG_LOCALVERSION_AUTO is set, we automatically perform some tests
|
||||
# and try to determine if the current source tree is a release tree, of any sort,
|
||||
# or if is a pure development tree.
|
||||
#
|
||||
# A 'release tree' is any tree with a git TAG associated
|
||||
# with it. The primary goal of this is to make it safe for a native
|
||||
# git/CVS/SVN user to build a release tree (i.e, 2.6.9) and also to
|
||||
# continue developing against the current Linus tree, without having the Linus
|
||||
# tree overwrite the 2.6.9 tree when installed.
|
||||
#
|
||||
# Currently, only git is supported.
|
||||
# Other SCMs can edit scripts/setlocalversion and add the appropriate
|
||||
# checks as needed.
|
||||
|
||||
|
||||
ifdef CONFIG_LOCALVERSION_AUTO
|
||||
localversion-auto := $(shell $(PERL) $(srctree)/scripts/setlocalversion $(srctree))
|
||||
LOCALVERSION := $(LOCALVERSION)$(localversion-auto)
|
||||
endif
|
||||
|
||||
#
|
||||
# INSTALL_MOD_PATH specifies a prefix to MODLIB for module directory
|
||||
# relocations required by build roots. This is not defined in the
|
||||
# makefile but the arguement can be passed to make if needed.
|
||||
#
|
||||
|
||||
MODLIB := $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE)
|
||||
MODLIB = $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE)
|
||||
export MODLIB
|
||||
|
||||
|
||||
|
@ -782,6 +757,48 @@ $(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ;
|
|||
$(vmlinux-dirs): prepare scripts
|
||||
$(Q)$(MAKE) $(build)=$@
|
||||
|
||||
# Build the kernel release string
|
||||
# The KERNELRELEASE is stored in a file named .kernelrelease
|
||||
# to be used when executing for example make install or make modules_install
|
||||
#
|
||||
# Take the contents of any files called localversion* and the config
|
||||
# variable CONFIG_LOCALVERSION and append them to KERNELRELEASE.
|
||||
# LOCALVERSION from the command line override all of this
|
||||
|
||||
nullstring :=
|
||||
space := $(nullstring) # end of line
|
||||
|
||||
___localver = $(objtree)/localversion* $(srctree)/localversion*
|
||||
__localver = $(sort $(wildcard $(___localver)))
|
||||
# skip backup files (containing '~')
|
||||
_localver = $(foreach f, $(__localver), $(if $(findstring ~, $(f)),,$(f)))
|
||||
|
||||
localver = $(subst $(space),, \
|
||||
$(shell cat /dev/null $(_localver)) \
|
||||
$(patsubst "%",%,$(CONFIG_LOCALVERSION)))
|
||||
|
||||
# If CONFIG_LOCALVERSION_AUTO is set scripts/setlocalversion is called
|
||||
# and if the SCM is know a tag from the SCM is appended.
|
||||
# The appended tag is determinded by the SCM used.
|
||||
#
|
||||
# Currently, only git is supported.
|
||||
# Other SCMs can edit scripts/setlocalversion and add the appropriate
|
||||
# checks as needed.
|
||||
ifdef CONFIG_LOCALVERSION_AUTO
|
||||
_localver-auto = $(shell $(CONFIG_SHELL) \
|
||||
$(srctree)/scripts/setlocalversion $(srctree))
|
||||
localver-auto = $(LOCALVERSION)$(_localver-auto)
|
||||
endif
|
||||
|
||||
localver-full = $(localver)$(localver-auto)
|
||||
|
||||
# Store (new) KERNELRELASE string in .kernelrelease
|
||||
kernelrelease = $(KERNELVERSION)$(localver-full)
|
||||
.kernelrelease: FORCE
|
||||
$(Q)rm -f $@
|
||||
$(Q)echo $(kernelrelease) > $@
|
||||
|
||||
|
||||
# Things we need to do before we recursively start building the kernel
|
||||
# or the modules are listed in "prepare".
|
||||
# A multi level approach is used. prepareN is processed before prepareN-1.
|
||||
|
@ -798,8 +815,7 @@ $(vmlinux-dirs): prepare scripts
|
|||
# and if so do:
|
||||
# 1) Check that make has not been executed in the kernel src $(srctree)
|
||||
# 2) Create the include2 directory, used for the second asm symlink
|
||||
|
||||
prepare3:
|
||||
prepare3: .kernelrelease
|
||||
ifneq ($(KBUILD_SRC),)
|
||||
@echo ' Using $(srctree) as source for kernel'
|
||||
$(Q)if [ -f $(srctree)/.config ]; then \
|
||||
|
@ -890,7 +906,7 @@ define filechk_version.h
|
|||
)
|
||||
endef
|
||||
|
||||
include/linux/version.h: $(srctree)/Makefile FORCE
|
||||
include/linux/version.h: $(srctree)/Makefile .config FORCE
|
||||
$(call filechk,version.h)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
@ -984,9 +1000,9 @@ CLEAN_FILES += vmlinux System.map \
|
|||
|
||||
# Directories & files removed with 'make mrproper'
|
||||
MRPROPER_DIRS += include/config include2
|
||||
MRPROPER_FILES += .config .config.old include/asm .version \
|
||||
MRPROPER_FILES += .config .config.old include/asm .version .old_version \
|
||||
include/linux/autoconf.h include/linux/version.h \
|
||||
Module.symvers tags TAGS cscope*
|
||||
.kernelrelease Module.symvers tags TAGS cscope*
|
||||
|
||||
# clean - Delete most, but leave enough to build external modules
|
||||
#
|
||||
|
@ -1072,6 +1088,7 @@ help:
|
|||
@echo ' tags/TAGS - Generate tags file for editors'
|
||||
@echo ' cscope - Generate cscope index'
|
||||
@echo ' kernelrelease - Output the release version string'
|
||||
@echo ' kernelversion - Output the version stored in Makefile'
|
||||
@echo ''
|
||||
@echo 'Static analysers'
|
||||
@echo ' buildcheck - List dangling references to vmlinux discarded sections'
|
||||
|
@ -1292,7 +1309,10 @@ checkstack:
|
|||
$(PERL) $(src)/scripts/checkstack.pl $(ARCH)
|
||||
|
||||
kernelrelease:
|
||||
@echo $(KERNELRELEASE)
|
||||
$(if $(wildcard .kernelrelease), $(Q)echo $(KERNELRELEASE), \
|
||||
$(error kernelrelease not valid - run 'make *config' to update it))
|
||||
kernelversion:
|
||||
@echo $(KERNELVERSION)
|
||||
|
||||
# FIXME Should go into a make.lib or something
|
||||
# ===========================================================================
|
||||
|
|
37
README
37
README
|
@ -1,4 +1,4 @@
|
|||
Linux kernel release 2.6.xx
|
||||
Linux kernel release 2.6.xx <http://kernel.org>
|
||||
|
||||
These are the release notes for Linux version 2.6. Read them carefully,
|
||||
as they tell you what this is all about, explain how to install the
|
||||
|
@ -6,23 +6,31 @@ kernel, and what to do if something goes wrong.
|
|||
|
||||
WHAT IS LINUX?
|
||||
|
||||
Linux is a Unix clone written from scratch by Linus Torvalds with
|
||||
assistance from a loosely-knit team of hackers across the Net.
|
||||
It aims towards POSIX compliance.
|
||||
Linux is a clone of the operating system Unix, written from scratch by
|
||||
Linus Torvalds with assistance from a loosely-knit team of hackers across
|
||||
the Net. It aims towards POSIX and Single UNIX Specification compliance.
|
||||
|
||||
It has all the features you would expect in a modern fully-fledged
|
||||
Unix, including true multitasking, virtual memory, shared libraries,
|
||||
demand loading, shared copy-on-write executables, proper memory
|
||||
management and TCP/IP networking.
|
||||
It has all the features you would expect in a modern fully-fledged Unix,
|
||||
including true multitasking, virtual memory, shared libraries, demand
|
||||
loading, shared copy-on-write executables, proper memory management,
|
||||
and multistack networking including IPv4 and IPv6.
|
||||
|
||||
It is distributed under the GNU General Public License - see the
|
||||
accompanying COPYING file for more details.
|
||||
|
||||
ON WHAT HARDWARE DOES IT RUN?
|
||||
|
||||
Linux was first developed for 386/486-based PCs. These days it also
|
||||
runs on ARMs, DEC Alphas, SUN Sparcs, M68000 machines (like Atari and
|
||||
Amiga), MIPS and PowerPC, and others.
|
||||
Although originally developed first for 32-bit x86-based PCs (386 or higher),
|
||||
today Linux also runs on (at least) the Compaq Alpha AXP, Sun SPARC and
|
||||
UltraSPARC, Motorola 68000, PowerPC, PowerPC64, ARM, Hitachi SuperH,
|
||||
IBM S/390, MIPS, HP PA-RISC, Intel IA-64, DEC VAX, AMD x86-64, AXIS CRIS,
|
||||
and Renesas M32R architectures.
|
||||
|
||||
Linux is easily portable to most general-purpose 32- or 64-bit architectures
|
||||
as long as they have a paged memory management unit (PMMU) and a port of the
|
||||
GNU C compiler (gcc) (part of The GNU Compiler Collection, GCC). Linux has
|
||||
also been ported to a number of architectures without a PMMU, although
|
||||
functionality is then obviously somewhat limited.
|
||||
|
||||
DOCUMENTATION:
|
||||
|
||||
|
@ -183,11 +191,8 @@ CONFIGURING the kernel:
|
|||
|
||||
COMPILING the kernel:
|
||||
|
||||
- Make sure you have gcc 2.95.3 available.
|
||||
gcc 2.91.66 (egcs-1.1.2), and gcc 2.7.2.3 are known to miscompile
|
||||
some parts of the kernel, and are *no longer supported*.
|
||||
Also remember to upgrade your binutils package (for as/ld/nm and company)
|
||||
if necessary. For more information, refer to Documentation/Changes.
|
||||
- Make sure you have at least gcc 3.2 available.
|
||||
For more information, refer to Documentation/Changes.
|
||||
|
||||
Please note that you can still run a.out user programs with this kernel.
|
||||
|
||||
|
|
|
@ -18,9 +18,6 @@ config MMU
|
|||
bool
|
||||
default y
|
||||
|
||||
config UID16
|
||||
bool
|
||||
|
||||
config RWSEM_GENERIC_SPINLOCK
|
||||
bool
|
||||
|
||||
|
|
|
@ -40,7 +40,6 @@
|
|||
#include <asm/unistd.h>
|
||||
|
||||
extern struct hwrpb_struct *hwrpb;
|
||||
extern void dump_thread(struct pt_regs *, struct user *);
|
||||
extern spinlock_t rtc_lock;
|
||||
|
||||
/* these are C runtime functions with special calling conventions: */
|
||||
|
|
|
@ -960,7 +960,7 @@ osf_utimes(char __user *filename, struct timeval32 __user *tvs)
|
|||
return -EFAULT;
|
||||
}
|
||||
|
||||
return do_utimes(filename, tvs ? ktvs : NULL);
|
||||
return do_utimes(AT_FDCWD, filename, tvs ? ktvs : NULL);
|
||||
}
|
||||
|
||||
#define MAX_SELECT_SECONDS \
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include <linux/pci.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/capability.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/sched.h>
|
||||
|
|
|
@ -43,6 +43,11 @@
|
|||
#include "proto.h"
|
||||
#include "pci_impl.h"
|
||||
|
||||
/*
|
||||
* Power off function, if any
|
||||
*/
|
||||
void (*pm_power_off)(void) = machine_power_off;
|
||||
|
||||
void
|
||||
cpu_idle(void)
|
||||
{
|
||||
|
@ -271,7 +276,7 @@ copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
|
|||
{
|
||||
extern void ret_from_fork(void);
|
||||
|
||||
struct thread_info *childti = p->thread_info;
|
||||
struct thread_info *childti = task_thread_info(p);
|
||||
struct pt_regs * childregs;
|
||||
struct switch_stack * childstack, *stack;
|
||||
unsigned long stack_offset, settls;
|
||||
|
@ -280,7 +285,7 @@ copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
|
|||
if (!(regs->ps & 8))
|
||||
stack_offset = (PAGE_SIZE-1) & (unsigned long) regs;
|
||||
childregs = (struct pt_regs *)
|
||||
(stack_offset + PAGE_SIZE + (long) childti);
|
||||
(stack_offset + PAGE_SIZE + task_stack_page(p));
|
||||
|
||||
*childregs = *regs;
|
||||
settls = regs->r20;
|
||||
|
@ -423,30 +428,15 @@ dump_elf_thread(elf_greg_t *dest, struct pt_regs *pt, struct thread_info *ti)
|
|||
int
|
||||
dump_elf_task(elf_greg_t *dest, struct task_struct *task)
|
||||
{
|
||||
struct thread_info *ti;
|
||||
struct pt_regs *pt;
|
||||
|
||||
ti = task->thread_info;
|
||||
pt = (struct pt_regs *)((unsigned long)ti + 2*PAGE_SIZE) - 1;
|
||||
|
||||
dump_elf_thread(dest, pt, ti);
|
||||
|
||||
dump_elf_thread(dest, task_pt_regs(task), task_thread_info(task));
|
||||
return 1;
|
||||
}
|
||||
|
||||
int
|
||||
dump_elf_task_fp(elf_fpreg_t *dest, struct task_struct *task)
|
||||
{
|
||||
struct thread_info *ti;
|
||||
struct pt_regs *pt;
|
||||
struct switch_stack *sw;
|
||||
|
||||
ti = task->thread_info;
|
||||
pt = (struct pt_regs *)((unsigned long)ti + 2*PAGE_SIZE) - 1;
|
||||
sw = (struct switch_stack *)pt - 1;
|
||||
|
||||
struct switch_stack *sw = (struct switch_stack *)task_pt_regs(task) - 1;
|
||||
memcpy(dest, sw->fp, 32 * 8);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -487,8 +477,8 @@ out:
|
|||
unsigned long
|
||||
thread_saved_pc(task_t *t)
|
||||
{
|
||||
unsigned long base = (unsigned long)t->thread_info;
|
||||
unsigned long fp, sp = t->thread_info->pcb.ksp;
|
||||
unsigned long base = (unsigned long)task_stack_page(t);
|
||||
unsigned long fp, sp = task_thread_info(t)->pcb.ksp;
|
||||
|
||||
if (sp > base && sp+6*8 < base + 16*1024) {
|
||||
fp = ((unsigned long*)sp)[6];
|
||||
|
@ -518,7 +508,7 @@ get_wchan(struct task_struct *p)
|
|||
|
||||
pc = thread_saved_pc(p);
|
||||
if (in_sched_functions(pc)) {
|
||||
schedule_frame = ((unsigned long *)p->thread_info->pcb.ksp)[6];
|
||||
schedule_frame = ((unsigned long *)task_thread_info(p)->pcb.ksp)[6];
|
||||
return ((unsigned long *)schedule_frame)[12];
|
||||
}
|
||||
return pc;
|
||||
|
|
|
@ -72,6 +72,13 @@ enum {
|
|||
REG_R0 = 0, REG_F0 = 32, REG_FPCR = 63, REG_PC = 64
|
||||
};
|
||||
|
||||
#define PT_REG(reg) \
|
||||
(PAGE_SIZE*2 - sizeof(struct pt_regs) + offsetof(struct pt_regs, reg))
|
||||
|
||||
#define SW_REG(reg) \
|
||||
(PAGE_SIZE*2 - sizeof(struct pt_regs) - sizeof(struct switch_stack) \
|
||||
+ offsetof(struct switch_stack, reg))
|
||||
|
||||
static int regoff[] = {
|
||||
PT_REG( r0), PT_REG( r1), PT_REG( r2), PT_REG( r3),
|
||||
PT_REG( r4), PT_REG( r5), PT_REG( r6), PT_REG( r7),
|
||||
|
@ -103,14 +110,14 @@ get_reg_addr(struct task_struct * task, unsigned long regno)
|
|||
unsigned long *addr;
|
||||
|
||||
if (regno == 30) {
|
||||
addr = &task->thread_info->pcb.usp;
|
||||
addr = &task_thread_info(task)->pcb.usp;
|
||||
} else if (regno == 65) {
|
||||
addr = &task->thread_info->pcb.unique;
|
||||
addr = &task_thread_info(task)->pcb.unique;
|
||||
} else if (regno == 31 || regno > 65) {
|
||||
zero = 0;
|
||||
addr = &zero;
|
||||
} else {
|
||||
addr = (void *)task->thread_info + regoff[regno];
|
||||
addr = task_stack_page(task) + regoff[regno];
|
||||
}
|
||||
return addr;
|
||||
}
|
||||
|
@ -125,7 +132,7 @@ get_reg(struct task_struct * task, unsigned long regno)
|
|||
if (regno == 63) {
|
||||
unsigned long fpcr = *get_reg_addr(task, regno);
|
||||
unsigned long swcr
|
||||
= task->thread_info->ieee_state & IEEE_SW_MASK;
|
||||
= task_thread_info(task)->ieee_state & IEEE_SW_MASK;
|
||||
swcr = swcr_update_status(swcr, fpcr);
|
||||
return fpcr | swcr;
|
||||
}
|
||||
|
@ -139,8 +146,8 @@ static int
|
|||
put_reg(struct task_struct *task, unsigned long regno, unsigned long data)
|
||||
{
|
||||
if (regno == 63) {
|
||||
task->thread_info->ieee_state
|
||||
= ((task->thread_info->ieee_state & ~IEEE_SW_MASK)
|
||||
task_thread_info(task)->ieee_state
|
||||
= ((task_thread_info(task)->ieee_state & ~IEEE_SW_MASK)
|
||||
| (data & IEEE_SW_MASK));
|
||||
data = (data & FPCR_DYN_MASK) | ieee_swcr_to_fpcr(data);
|
||||
}
|
||||
|
@ -188,35 +195,35 @@ ptrace_set_bpt(struct task_struct * child)
|
|||
* branch (emulation can be tricky for fp branches).
|
||||
*/
|
||||
displ = ((s32)(insn << 11)) >> 9;
|
||||
child->thread_info->bpt_addr[nsaved++] = pc + 4;
|
||||
task_thread_info(child)->bpt_addr[nsaved++] = pc + 4;
|
||||
if (displ) /* guard against unoptimized code */
|
||||
child->thread_info->bpt_addr[nsaved++]
|
||||
task_thread_info(child)->bpt_addr[nsaved++]
|
||||
= pc + 4 + displ;
|
||||
DBG(DBG_BPT, ("execing branch\n"));
|
||||
} else if (op_code == 0x1a) {
|
||||
reg_b = (insn >> 16) & 0x1f;
|
||||
child->thread_info->bpt_addr[nsaved++] = get_reg(child, reg_b);
|
||||
task_thread_info(child)->bpt_addr[nsaved++] = get_reg(child, reg_b);
|
||||
DBG(DBG_BPT, ("execing jump\n"));
|
||||
} else {
|
||||
child->thread_info->bpt_addr[nsaved++] = pc + 4;
|
||||
task_thread_info(child)->bpt_addr[nsaved++] = pc + 4;
|
||||
DBG(DBG_BPT, ("execing normal insn\n"));
|
||||
}
|
||||
|
||||
/* install breakpoints: */
|
||||
for (i = 0; i < nsaved; ++i) {
|
||||
res = read_int(child, child->thread_info->bpt_addr[i],
|
||||
res = read_int(child, task_thread_info(child)->bpt_addr[i],
|
||||
(int *) &insn);
|
||||
if (res < 0)
|
||||
return res;
|
||||
child->thread_info->bpt_insn[i] = insn;
|
||||
task_thread_info(child)->bpt_insn[i] = insn;
|
||||
DBG(DBG_BPT, (" -> next_pc=%lx\n",
|
||||
child->thread_info->bpt_addr[i]));
|
||||
res = write_int(child, child->thread_info->bpt_addr[i],
|
||||
task_thread_info(child)->bpt_addr[i]));
|
||||
res = write_int(child, task_thread_info(child)->bpt_addr[i],
|
||||
BREAKINST);
|
||||
if (res < 0)
|
||||
return res;
|
||||
}
|
||||
child->thread_info->bpt_nsaved = nsaved;
|
||||
task_thread_info(child)->bpt_nsaved = nsaved;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -227,9 +234,9 @@ ptrace_set_bpt(struct task_struct * child)
|
|||
int
|
||||
ptrace_cancel_bpt(struct task_struct * child)
|
||||
{
|
||||
int i, nsaved = child->thread_info->bpt_nsaved;
|
||||
int i, nsaved = task_thread_info(child)->bpt_nsaved;
|
||||
|
||||
child->thread_info->bpt_nsaved = 0;
|
||||
task_thread_info(child)->bpt_nsaved = 0;
|
||||
|
||||
if (nsaved > 2) {
|
||||
printk("ptrace_cancel_bpt: bogus nsaved: %d!\n", nsaved);
|
||||
|
@ -237,8 +244,8 @@ ptrace_cancel_bpt(struct task_struct * child)
|
|||
}
|
||||
|
||||
for (i = 0; i < nsaved; ++i) {
|
||||
write_int(child, child->thread_info->bpt_addr[i],
|
||||
child->thread_info->bpt_insn[i]);
|
||||
write_int(child, task_thread_info(child)->bpt_addr[i],
|
||||
task_thread_info(child)->bpt_insn[i]);
|
||||
}
|
||||
return (nsaved != 0);
|
||||
}
|
||||
|
@ -265,30 +272,16 @@ do_sys_ptrace(long request, long pid, long addr, long data,
|
|||
lock_kernel();
|
||||
DBG(DBG_MEM, ("request=%ld pid=%ld addr=0x%lx data=0x%lx\n",
|
||||
request, pid, addr, data));
|
||||
ret = -EPERM;
|
||||
if (request == PTRACE_TRACEME) {
|
||||
/* are we already being traced? */
|
||||
if (current->ptrace & PT_PTRACED)
|
||||
goto out_notsk;
|
||||
ret = security_ptrace(current->parent, current);
|
||||
if (ret)
|
||||
goto out_notsk;
|
||||
/* set the ptrace bit in the process ptrace flags. */
|
||||
current->ptrace |= PT_PTRACED;
|
||||
ret = 0;
|
||||
ret = ptrace_traceme();
|
||||
goto out_notsk;
|
||||
}
|
||||
if (pid == 1) /* you may not mess with init */
|
||||
goto out_notsk;
|
||||
|
||||
ret = -ESRCH;
|
||||
read_lock(&tasklist_lock);
|
||||
child = find_task_by_pid(pid);
|
||||
if (child)
|
||||
get_task_struct(child);
|
||||
read_unlock(&tasklist_lock);
|
||||
if (!child)
|
||||
child = ptrace_get_task_struct(pid);
|
||||
if (IS_ERR(child)) {
|
||||
ret = PTR_ERR(child);
|
||||
goto out_notsk;
|
||||
}
|
||||
|
||||
if (request == PTRACE_ATTACH) {
|
||||
ret = ptrace_attach(child);
|
||||
|
@ -369,7 +362,7 @@ do_sys_ptrace(long request, long pid, long addr, long data,
|
|||
if (!valid_signal(data))
|
||||
break;
|
||||
/* Mark single stepping. */
|
||||
child->thread_info->bpt_nsaved = -1;
|
||||
task_thread_info(child)->bpt_nsaved = -1;
|
||||
clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
|
||||
child->exit_code = data;
|
||||
wake_up_process(child);
|
||||
|
|
|
@ -302,7 +302,7 @@ secondary_cpu_start(int cpuid, struct task_struct *idle)
|
|||
+ hwrpb->processor_offset
|
||||
+ cpuid * hwrpb->processor_size);
|
||||
hwpcb = (struct pcb_struct *) cpu->hwpcb;
|
||||
ipcb = &idle->thread_info->pcb;
|
||||
ipcb = &task_thread_info(idle)->pcb;
|
||||
|
||||
/* Initialize the CPU's HWPCB to something just good enough for
|
||||
us to get started. Immediately after starting, we'll swpctx
|
||||
|
|
|
@ -254,7 +254,7 @@ alcor_init_pci(void)
|
|||
* motherboard, by looking for a 21040 TULIP in slot 6, which is
|
||||
* built into XLT and BRET/MAVERICK, but not available on ALCOR.
|
||||
*/
|
||||
dev = pci_find_device(PCI_VENDOR_ID_DEC,
|
||||
dev = pci_get_device(PCI_VENDOR_ID_DEC,
|
||||
PCI_DEVICE_ID_DEC_TULIP,
|
||||
NULL);
|
||||
if (dev && dev->devfn == PCI_DEVFN(6,0)) {
|
||||
|
@ -262,6 +262,7 @@ alcor_init_pci(void)
|
|||
printk(KERN_INFO "%s: Detected AS500 or XLT motherboard.\n",
|
||||
__FUNCTION__);
|
||||
}
|
||||
pci_dev_put(dev);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -105,7 +105,7 @@ sio_collect_irq_levels(void)
|
|||
struct pci_dev *dev = NULL;
|
||||
|
||||
/* Iterate through the devices, collecting IRQ levels. */
|
||||
while ((dev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
|
||||
for_each_pci_dev(dev) {
|
||||
if ((dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) &&
|
||||
(dev->class >> 8 != PCI_CLASS_BRIDGE_PCMCIA))
|
||||
continue;
|
||||
|
@ -229,8 +229,8 @@ alphabook1_init_pci(void)
|
|||
*/
|
||||
|
||||
dev = NULL;
|
||||
while ((dev = pci_find_device(PCI_VENDOR_ID_NCR, PCI_ANY_ID, dev))) {
|
||||
if (dev->device == PCI_DEVICE_ID_NCR_53C810
|
||||
while ((dev = pci_get_device(PCI_VENDOR_ID_NCR, PCI_ANY_ID, dev))) {
|
||||
if (dev->device == PCI_DEVICE_ID_NCR_53C810
|
||||
|| dev->device == PCI_DEVICE_ID_NCR_53C815
|
||||
|| dev->device == PCI_DEVICE_ID_NCR_53C820
|
||||
|| dev->device == PCI_DEVICE_ID_NCR_53C825) {
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
/* 2.3.x zone allocator, 1999 Andrea Arcangeli <andrea@suse.de> */
|
||||
|
||||
#include <linux/config.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/signal.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/kernel.h>
|
||||
|
|
|
@ -46,10 +46,6 @@ config MCA
|
|||
<file:Documentation/mca.txt> (and especially the web page given
|
||||
there) before attempting to build an MCA bus kernel.
|
||||
|
||||
config UID16
|
||||
bool
|
||||
default y
|
||||
|
||||
config RWSEM_GENERIC_SPINLOCK
|
||||
bool
|
||||
default y
|
||||
|
@ -103,13 +99,6 @@ config ARCH_EBSA110
|
|||
Ethernet interface, two PCMCIA sockets, two serial ports and a
|
||||
parallel port.
|
||||
|
||||
config ARCH_CAMELOT
|
||||
bool "Epxa10db"
|
||||
help
|
||||
This enables support for Altera's Excalibur XA10 development board.
|
||||
If you would like to build your kernel to run on one of these boards
|
||||
then you must say 'Y' here. Otherwise say 'N'
|
||||
|
||||
config ARCH_FOOTBRIDGE
|
||||
bool "FootBridge"
|
||||
select FOOTBRIDGE
|
||||
|
@ -154,6 +143,7 @@ config ARCH_RPC
|
|||
select FIQ
|
||||
select TIMER_ACORN
|
||||
select ARCH_MAY_HAVE_PC_FDC
|
||||
select ISA_DMA_API
|
||||
help
|
||||
On the Acorn Risc-PC, Linux can support the internal IDE disk and
|
||||
CD-ROM interface, serial and parallel port, and the floppy drive.
|
||||
|
@ -190,6 +180,7 @@ config ARCH_OMAP
|
|||
config ARCH_VERSATILE
|
||||
bool "Versatile"
|
||||
select ARM_AMBA
|
||||
select ARM_VIC
|
||||
select ICST307
|
||||
help
|
||||
This enables support for ARM Ltd Versatile board.
|
||||
|
@ -206,6 +197,7 @@ config ARCH_IMX
|
|||
|
||||
config ARCH_H720X
|
||||
bool "Hynix-HMS720x-based"
|
||||
select ISA_DMA_API
|
||||
help
|
||||
This enables support for systems based on the Hynix HMS720x
|
||||
|
||||
|
@ -215,12 +207,16 @@ config ARCH_AAEC2000
|
|||
help
|
||||
This enables support for systems based on the Agilent AAEC-2000
|
||||
|
||||
config ARCH_AT91RM9200
|
||||
bool "AT91RM9200"
|
||||
help
|
||||
Say Y here if you intend to run this kernel on an AT91RM9200-based
|
||||
board.
|
||||
|
||||
endchoice
|
||||
|
||||
source "arch/arm/mach-clps711x/Kconfig"
|
||||
|
||||
source "arch/arm/mach-epxa10db/Kconfig"
|
||||
|
||||
source "arch/arm/mach-footbridge/Kconfig"
|
||||
|
||||
source "arch/arm/mach-integrator/Kconfig"
|
||||
|
@ -255,6 +251,8 @@ source "arch/arm/mach-aaec2000/Kconfig"
|
|||
|
||||
source "arch/arm/mach-realview/Kconfig"
|
||||
|
||||
source "arch/arm/mach-at91rm9200/Kconfig"
|
||||
|
||||
# Definitions to make life easier
|
||||
config ARCH_ACORN
|
||||
bool
|
||||
|
@ -290,12 +288,14 @@ config ISA
|
|||
(MCA) or VESA. ISA is an older system, now being displaced by PCI;
|
||||
newer boards don't support it. If you have ISA, say Y, otherwise N.
|
||||
|
||||
# Select ISA DMA controller support
|
||||
config ISA_DMA
|
||||
bool
|
||||
select ISA_DMA_API
|
||||
|
||||
# Select ISA DMA interface
|
||||
config ISA_DMA_API
|
||||
bool
|
||||
default y
|
||||
|
||||
config PCI
|
||||
bool "PCI support" if ARCH_INTEGRATOR_AP || ARCH_VERSATILE_PB
|
||||
|
@ -401,6 +401,38 @@ config NO_IDLE_HZ
|
|||
Currently at least OMAP, PXA2xx and SA11x0 platforms are known
|
||||
to have accurate timekeeping with dynamic tick.
|
||||
|
||||
config AEABI
|
||||
bool "Use the ARM EABI to compile the kernel"
|
||||
help
|
||||
This option allows for the kernel to be compiled using the latest
|
||||
ARM ABI (aka EABI). This is only useful if you are using a user
|
||||
space environment that is also compiled with EABI.
|
||||
|
||||
Since there are major incompatibilities between the legacy ABI and
|
||||
EABI, especially with regard to structure member alignment, this
|
||||
option also changes the kernel syscall calling convention to
|
||||
disambiguate both ABIs and allow for backward compatibility support
|
||||
(selected with CONFIG_OABI_COMPAT).
|
||||
|
||||
To use this you need GCC version 4.0.0 or later.
|
||||
|
||||
config OABI_COMPAT
|
||||
bool "Allow old ABI binaries to run with this kernel"
|
||||
depends on AEABI
|
||||
default y
|
||||
help
|
||||
This option preserves the old syscall interface along with the
|
||||
new (ARM EABI) one. It also provides a compatibility layer to
|
||||
intercept syscalls that have structure arguments which layout
|
||||
in memory differs between the legacy ABI and the new ARM EABI
|
||||
(only for non "thumb" binaries). This option adds a tiny
|
||||
overhead to all syscalls and produces a slightly larger kernel.
|
||||
If you know you'll be using only pure EABI user space then you
|
||||
can say N here. If this option is not selected and you attempt
|
||||
to execute a legacy ABI binary then the result will be
|
||||
UNPREDICTABLE (in fact it can be predicted that it won't work
|
||||
at all). If in doubt say Y.
|
||||
|
||||
config ARCH_DISCONTIGMEM_ENABLE
|
||||
bool
|
||||
default (ARCH_LH7A40X && !LH7A40X_CONTIGMEM)
|
||||
|
@ -418,7 +450,8 @@ config LEDS
|
|||
ARCH_EBSA285 || ARCH_IMX || ARCH_INTEGRATOR || \
|
||||
ARCH_LUBBOCK || MACH_MAINSTONE || ARCH_NETWINDER || \
|
||||
ARCH_OMAP || ARCH_P720T || ARCH_PXA_IDP || \
|
||||
ARCH_SA1100 || ARCH_SHARK || ARCH_VERSATILE
|
||||
ARCH_SA1100 || ARCH_SHARK || ARCH_VERSATILE || \
|
||||
ARCH_AT91RM9200
|
||||
help
|
||||
If you say Y here, the LEDs on your machine will be used
|
||||
to provide useful information about your current system status.
|
||||
|
@ -586,6 +619,7 @@ comment "At least one emulation must be selected"
|
|||
|
||||
config FPE_NWFPE
|
||||
bool "NWFPE math emulation"
|
||||
depends on !AEABI || OABI_COMPAT
|
||||
---help---
|
||||
Say Y to include the NWFPE floating point emulator in the kernel.
|
||||
This is necessary to run most binaries. Linux does not currently
|
||||
|
@ -609,7 +643,7 @@ config FPE_NWFPE_XP
|
|||
|
||||
config FPE_FASTFPE
|
||||
bool "FastFPE math emulation (EXPERIMENTAL)"
|
||||
depends on !CPU_32v3 && EXPERIMENTAL
|
||||
depends on (!AEABI || OABI_COMPAT) && !CPU_32v3 && EXPERIMENTAL
|
||||
---help---
|
||||
Say Y here to include the FAST floating point emulator in the kernel.
|
||||
This is an experimental much faster emulator which now also has full
|
||||
|
@ -641,6 +675,7 @@ source "fs/Kconfig.binfmt"
|
|||
|
||||
config ARTHUR
|
||||
tristate "RISC OS personality"
|
||||
depends on !AEABI
|
||||
help
|
||||
Say Y here to include the kernel code necessary if you want to run
|
||||
Acorn RISC OS/Arthur binaries under Linux. This code is still very
|
||||
|
@ -656,7 +691,6 @@ source "kernel/power/Kconfig"
|
|||
|
||||
config APM
|
||||
tristate "Advanced Power Management Emulation"
|
||||
depends on PM_LEGACY
|
||||
---help---
|
||||
APM is a BIOS specification for saving power using several different
|
||||
techniques. This is mostly useful for battery powered laptops with
|
||||
|
@ -730,6 +764,8 @@ source "drivers/char/Kconfig"
|
|||
|
||||
source "drivers/i2c/Kconfig"
|
||||
|
||||
source "drivers/spi/Kconfig"
|
||||
|
||||
source "drivers/hwmon/Kconfig"
|
||||
|
||||
#source "drivers/l3/Kconfig"
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
# Copyright (C) 1995-2001 by Russell King
|
||||
|
||||
LDFLAGS_vmlinux :=-p --no-undefined -X
|
||||
CPPFLAGS_vmlinux.lds = -DKERNEL_RAM_ADDR=$(TEXTADDR)
|
||||
CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)
|
||||
OBJCOPYFLAGS :=-O binary -R .note -R .comment -S
|
||||
GZFLAGS :=-9
|
||||
#CFLAGS +=-pipe
|
||||
|
@ -56,8 +56,13 @@ tune-$(CONFIG_CPU_SA1100) :=-mtune=strongarm1100
|
|||
tune-$(CONFIG_CPU_XSCALE) :=$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale
|
||||
tune-$(CONFIG_CPU_V6) :=$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm)
|
||||
|
||||
# Need -Uarm for gcc < 3.x
|
||||
ifeq ($(CONFIG_AEABI),y)
|
||||
CFLAGS_ABI :=-mabi=aapcs -mno-thumb-interwork
|
||||
else
|
||||
CFLAGS_ABI :=$(call cc-option,-mapcs-32,-mabi=apcs-gnu) $(call cc-option,-mno-thumb-interwork,)
|
||||
endif
|
||||
|
||||
# Need -Uarm for gcc < 3.x
|
||||
CFLAGS +=$(CFLAGS_ABI) $(arch-y) $(tune-y) $(call cc-option,-mshort-load-bytes,$(call cc-option,-malignment-traps,)) -msoft-float -Uarm
|
||||
AFLAGS +=$(CFLAGS_ABI) $(arch-y) $(tune-y) -msoft-float
|
||||
|
||||
|
@ -65,7 +70,7 @@ CHECKFLAGS += -D__arm__
|
|||
|
||||
#Default value
|
||||
head-y := arch/arm/kernel/head.o arch/arm/kernel/init_task.o
|
||||
textaddr-y := 0xC0008000
|
||||
textofs-y := 0x00008000
|
||||
|
||||
machine-$(CONFIG_ARCH_RPC) := rpc
|
||||
machine-$(CONFIG_ARCH_EBSA110) := ebsa110
|
||||
|
@ -73,22 +78,19 @@ textaddr-y := 0xC0008000
|
|||
incdir-$(CONFIG_ARCH_CLPS7500) := cl7500
|
||||
machine-$(CONFIG_FOOTBRIDGE) := footbridge
|
||||
incdir-$(CONFIG_FOOTBRIDGE) := ebsa285
|
||||
textaddr-$(CONFIG_ARCH_CO285) := 0x60008000
|
||||
machine-$(CONFIG_ARCH_CO285) := footbridge
|
||||
incdir-$(CONFIG_ARCH_CO285) := ebsa285
|
||||
machine-$(CONFIG_ARCH_SHARK) := shark
|
||||
machine-$(CONFIG_ARCH_SA1100) := sa1100
|
||||
ifeq ($(CONFIG_ARCH_SA1100),y)
|
||||
# SA1111 DMA bug: we don't want the kernel to live in precious DMA-able memory
|
||||
textaddr-$(CONFIG_SA1111) := 0xc0208000
|
||||
textofs-$(CONFIG_SA1111) := 0x00208000
|
||||
endif
|
||||
machine-$(CONFIG_ARCH_PXA) := pxa
|
||||
machine-$(CONFIG_ARCH_L7200) := l7200
|
||||
machine-$(CONFIG_ARCH_INTEGRATOR) := integrator
|
||||
machine-$(CONFIG_ARCH_CAMELOT) := epxa10db
|
||||
textaddr-$(CONFIG_ARCH_CLPS711X) := 0xc0028000
|
||||
textofs-$(CONFIG_ARCH_CLPS711X) := 0x00028000
|
||||
machine-$(CONFIG_ARCH_CLPS711X) := clps711x
|
||||
textaddr-$(CONFIG_ARCH_FORTUNET) := 0xc0008000
|
||||
machine-$(CONFIG_ARCH_IOP3XX) := iop3xx
|
||||
machine-$(CONFIG_ARCH_IXP4XX) := ixp4xx
|
||||
machine-$(CONFIG_ARCH_IXP2000) := ixp2000
|
||||
|
@ -102,6 +104,7 @@ textaddr-$(CONFIG_ARCH_FORTUNET) := 0xc0008000
|
|||
machine-$(CONFIG_ARCH_H720X) := h720x
|
||||
machine-$(CONFIG_ARCH_AAEC2000) := aaec2000
|
||||
machine-$(CONFIG_ARCH_REALVIEW) := realview
|
||||
machine-$(CONFIG_ARCH_AT91RM9200) := at91rm9200
|
||||
|
||||
ifeq ($(CONFIG_ARCH_EBSA110),y)
|
||||
# This is what happens if you forget the IOCS16 line.
|
||||
|
@ -110,7 +113,8 @@ CFLAGS_3c589_cs.o :=-DISA_SIXTEEN_BIT_PERIPHERAL
|
|||
export CFLAGS_3c589_cs.o
|
||||
endif
|
||||
|
||||
TEXTADDR := $(textaddr-y)
|
||||
# The byte offset of the kernel image in RAM from the start of RAM.
|
||||
TEXT_OFFSET := $(textofs-y)
|
||||
|
||||
ifeq ($(incdir-y),)
|
||||
incdir-y := $(machine-y)
|
||||
|
@ -123,7 +127,7 @@ else
|
|||
MACHINE :=
|
||||
endif
|
||||
|
||||
export TEXTADDR GZFLAGS
|
||||
export TEXT_OFFSET GZFLAGS
|
||||
|
||||
# Do we have FASTFPE?
|
||||
FASTFPE :=arch/arm/fastfpe
|
||||
|
|
|
@ -15,7 +15,7 @@ include $(srctree)/$(MACHINE)/Makefile.boot
|
|||
endif
|
||||
|
||||
# Note: the following conditions must always be true:
|
||||
# ZRELADDR == virt_to_phys(TEXTADDR)
|
||||
# ZRELADDR == virt_to_phys(PAGE_OFFSET + TEXT_OFFSET)
|
||||
# PARAMS_PHYS must be within 4MB of ZRELADDR
|
||||
# INITRD_PHYS must be in RAM
|
||||
ZRELADDR := $(zreladdr-y)
|
||||
|
|
|
@ -21,10 +21,6 @@ ifeq ($(CONFIG_ARCH_SHARK),y)
|
|||
OBJS += head-shark.o ofw-shark.o
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARCH_CAMELOT),y)
|
||||
OBJS += head-epxa10db.o
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARCH_L7200),y)
|
||||
OBJS += head-l7200.o
|
||||
endif
|
||||
|
@ -50,6 +46,10 @@ ifeq ($(CONFIG_PXA_SHARPSL),y)
|
|||
OBJS += head-sharpsl.o
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ARCH_AT91RM9200),y)
|
||||
OBJS += head-at91rm9200.o
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_DEBUG_ICEDCC),y)
|
||||
OBJS += ice-dcc.o
|
||||
endif
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* linux/arch/arm/boot/compressed/head-at91rm9200.S
|
||||
*
|
||||
* Copyright (C) 2003 SAN People
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
#include <asm/mach-types.h>
|
||||
|
||||
.section ".start", "ax"
|
||||
|
||||
@ Atmel AT91RM9200-DK : 262
|
||||
mov r3, #(MACH_TYPE_AT91RM9200DK & 0xff)
|
||||
orr r3, r3, #(MACH_TYPE_AT91RM9200DK & 0xff00)
|
||||
cmp r7, r3
|
||||
beq 99f
|
||||
|
||||
@ Cogent CSB337 : 399
|
||||
mov r3, #(MACH_TYPE_CSB337 & 0xff)
|
||||
orr r3, r3, #(MACH_TYPE_CSB337 & 0xff00)
|
||||
cmp r7, r3
|
||||
beq 99f
|
||||
|
||||
@ Cogent CSB637 : 648
|
||||
mov r3, #(MACH_TYPE_CSB637 & 0xff)
|
||||
orr r3, r3, #(MACH_TYPE_CSB637 & 0xff00)
|
||||
cmp r7, r3
|
||||
beq 99f
|
||||
|
||||
@ Atmel AT91RM9200-EK : 705
|
||||
mov r3, #(MACH_TYPE_AT91RM9200EK & 0xff)
|
||||
orr r3, r3, #(MACH_TYPE_AT91RM9200EK & 0xff00)
|
||||
cmp r7, r3
|
||||
beq 99f
|
||||
|
||||
@ Conitec Carmeva : 769
|
||||
mov r3, #(MACH_TYPE_CARMEVA & 0xff)
|
||||
orr r3, r3, #(MACH_TYPE_CARMEVA & 0xff00)
|
||||
cmp r7, r3
|
||||
beq 99f
|
||||
|
||||
@ KwikByte KB920x : 612
|
||||
mov r3, #(MACH_TYPE_KB9200 & 0xff)
|
||||
orr r3, r3, #(MACH_TYPE_KB9200 & 0xff00)
|
||||
cmp r7, r3
|
||||
beq 99f
|
||||
|
||||
@ Unknown board, use the AT91RM9200DK board
|
||||
@ mov r7, #MACH_TYPE_AT91RM9200
|
||||
mov r7, #(MACH_TYPE_AT91RM9200DK & 0xff)
|
||||
orr r7, r7, #(MACH_TYPE_AT91RM9200DK & 0xff00)
|
||||
|
||||
99:
|
|
@ -1,5 +0,0 @@
|
|||
#include <asm/mach-types.h>
|
||||
#include <asm/arch/excalibur.h>
|
||||
|
||||
.section ".start", "ax"
|
||||
mov r7, #MACH_TYPE_CAMELOT
|
|
@ -84,7 +84,7 @@
|
|||
kputc #'\n'
|
||||
kphex r5, 8 /* decompressed kernel start */
|
||||
kputc #'-'
|
||||
kphex r8, 8 /* decompressed kernel end */
|
||||
kphex r9, 8 /* decompressed kernel end */
|
||||
kputc #'>'
|
||||
kphex r4, 8 /* kernel execution address */
|
||||
kputc #'\n'
|
||||
|
@ -116,7 +116,7 @@ start:
|
|||
.word start @ absolute load/run zImage address
|
||||
.word _edata @ zImage end address
|
||||
1: mov r7, r1 @ save architecture ID
|
||||
mov r8, #0 @ save r0
|
||||
mov r8, r2 @ save atags pointer
|
||||
|
||||
#ifndef __ARM_ARCH_2__
|
||||
/*
|
||||
|
@ -144,7 +144,7 @@ not_angel:
|
|||
|
||||
/*
|
||||
* some architecture specific code can be inserted
|
||||
* by the linker here, but it should preserve r7 and r8.
|
||||
* by the linker here, but it should preserve r7, r8, and r9.
|
||||
*/
|
||||
|
||||
.text
|
||||
|
@ -249,16 +249,17 @@ not_relocated: mov r0, #0
|
|||
* r5 = decompressed kernel start
|
||||
* r6 = processor ID
|
||||
* r7 = architecture ID
|
||||
* r8-r14 = unused
|
||||
* r8 = atags pointer
|
||||
* r9-r14 = corrupted
|
||||
*/
|
||||
add r1, r5, r0 @ end of decompressed kernel
|
||||
adr r2, reloc_start
|
||||
ldr r3, LC1
|
||||
add r3, r2, r3
|
||||
1: ldmia r2!, {r8 - r13} @ copy relocation code
|
||||
stmia r1!, {r8 - r13}
|
||||
ldmia r2!, {r8 - r13}
|
||||
stmia r1!, {r8 - r13}
|
||||
1: ldmia r2!, {r9 - r14} @ copy relocation code
|
||||
stmia r1!, {r9 - r14}
|
||||
ldmia r2!, {r9 - r14}
|
||||
stmia r1!, {r9 - r14}
|
||||
cmp r2, r3
|
||||
blo 1b
|
||||
|
||||
|
@ -308,11 +309,12 @@ params: ldr r0, =params_phys
|
|||
* r4 = kernel execution address
|
||||
* r6 = processor ID
|
||||
* r7 = architecture number
|
||||
* r8 = run-time address of "start"
|
||||
* r8 = atags pointer
|
||||
* r9 = run-time address of "start" (???)
|
||||
* On exit,
|
||||
* r1, r2, r3, r8, r9, r12 corrupted
|
||||
* r1, r2, r3, r9, r10, r12 corrupted
|
||||
* This routine must preserve:
|
||||
* r4, r5, r6, r7
|
||||
* r4, r5, r6, r7, r8
|
||||
*/
|
||||
.align 5
|
||||
cache_on: mov r3, #8 @ cache_on function
|
||||
|
@ -326,15 +328,15 @@ __setup_mmu: sub r3, r4, #16384 @ Page directory size
|
|||
* bits for the RAM area only.
|
||||
*/
|
||||
mov r0, r3
|
||||
mov r8, r0, lsr #18
|
||||
mov r8, r8, lsl #18 @ start of RAM
|
||||
add r9, r8, #0x10000000 @ a reasonable RAM size
|
||||
mov r9, r0, lsr #18
|
||||
mov r9, r9, lsl #18 @ start of RAM
|
||||
add r10, r9, #0x10000000 @ a reasonable RAM size
|
||||
mov r1, #0x12
|
||||
orr r1, r1, #3 << 10
|
||||
add r2, r3, #16384
|
||||
1: cmp r1, r8 @ if virt > start of RAM
|
||||
1: cmp r1, r9 @ if virt > start of RAM
|
||||
orrhs r1, r1, #0x0c @ set cacheable, bufferable
|
||||
cmp r1, r9 @ if virt > end of RAM
|
||||
cmp r1, r10 @ if virt > end of RAM
|
||||
bichs r1, r1, #0x0c @ clear cacheable, bufferable
|
||||
str r1, [r0], #4 @ 1:1 mapping
|
||||
add r1, r1, #1048576
|
||||
|
@ -403,26 +405,28 @@ __common_cache_on:
|
|||
* r5 = decompressed kernel start
|
||||
* r6 = processor ID
|
||||
* r7 = architecture ID
|
||||
* r8-r14 = unused
|
||||
* r8 = atags pointer
|
||||
* r9-r14 = corrupted
|
||||
*/
|
||||
.align 5
|
||||
reloc_start: add r8, r5, r0
|
||||
reloc_start: add r9, r5, r0
|
||||
debug_reloc_start
|
||||
mov r1, r4
|
||||
1:
|
||||
.rept 4
|
||||
ldmia r5!, {r0, r2, r3, r9 - r13} @ relocate kernel
|
||||
stmia r1!, {r0, r2, r3, r9 - r13}
|
||||
ldmia r5!, {r0, r2, r3, r10 - r14} @ relocate kernel
|
||||
stmia r1!, {r0, r2, r3, r10 - r14}
|
||||
.endr
|
||||
|
||||
cmp r5, r8
|
||||
cmp r5, r9
|
||||
blo 1b
|
||||
debug_reloc_end
|
||||
|
||||
call_kernel: bl cache_clean_flush
|
||||
bl cache_off
|
||||
mov r0, #0
|
||||
mov r0, #0 @ must be zero
|
||||
mov r1, r7 @ restore architecture number
|
||||
mov r2, r8 @ restore atags pointer
|
||||
mov pc, r4 @ call kernel
|
||||
|
||||
/*
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
config ICST525
|
||||
config ARM_GIC
|
||||
bool
|
||||
|
||||
config ARM_GIC
|
||||
config ARM_VIC
|
||||
bool
|
||||
|
||||
config ICST525
|
||||
bool
|
||||
|
||||
config ICST307
|
||||
|
@ -23,5 +26,8 @@ config SHARP_LOCOMO
|
|||
config SHARP_PARAM
|
||||
bool
|
||||
|
||||
config SHARPSL_PM
|
||||
bool
|
||||
|
||||
config SHARP_SCOOP
|
||||
bool
|
||||
|
|
|
@ -3,8 +3,8 @@
|
|||
#
|
||||
|
||||
obj-y += rtctime.o
|
||||
obj-$(CONFIG_ARM_AMBA) += amba.o
|
||||
obj-$(CONFIG_ARM_GIC) += gic.o
|
||||
obj-$(CONFIG_ARM_VIC) += vic.o
|
||||
obj-$(CONFIG_ICST525) += icst525.o
|
||||
obj-$(CONFIG_ICST307) += icst307.o
|
||||
obj-$(CONFIG_SA1111) += sa1111.o
|
||||
|
@ -13,4 +13,5 @@ obj-$(CONFIG_DMABOUNCE) += dmabounce.o
|
|||
obj-$(CONFIG_TIMER_ACORN) += time-acorn.o
|
||||
obj-$(CONFIG_SHARP_LOCOMO) += locomo.o
|
||||
obj-$(CONFIG_SHARP_PARAM) += sharpsl_param.o
|
||||
obj-$(CONFIG_SHARPSL_PM) += sharpsl_pm.o
|
||||
obj-$(CONFIG_SHARP_SCOOP) += scoop.o
|
||||
|
|
|
@ -1103,14 +1103,14 @@ static int locomo_bus_remove(struct device *dev)
|
|||
struct bus_type locomo_bus_type = {
|
||||
.name = "locomo-bus",
|
||||
.match = locomo_match,
|
||||
.probe = locomo_bus_probe,
|
||||
.remove = locomo_bus_remove,
|
||||
.suspend = locomo_bus_suspend,
|
||||
.resume = locomo_bus_resume,
|
||||
};
|
||||
|
||||
int locomo_driver_register(struct locomo_driver *driver)
|
||||
{
|
||||
driver->drv.probe = locomo_bus_probe;
|
||||
driver->drv.remove = locomo_bus_remove;
|
||||
driver->drv.bus = &locomo_bus_type;
|
||||
return driver_register(&driver->drv);
|
||||
}
|
||||
|
|
|
@ -17,7 +17,9 @@
|
|||
#include <linux/proc_fs.h>
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/capability.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/mutex.h>
|
||||
|
||||
#include <asm/rtc.h>
|
||||
#include <asm/semaphore.h>
|
||||
|
@ -34,7 +36,7 @@ static unsigned long rtc_irq_data;
|
|||
/*
|
||||
* rtc_sem protects rtc_inuse and rtc_ops
|
||||
*/
|
||||
static DECLARE_MUTEX(rtc_sem);
|
||||
static DEFINE_MUTEX(rtc_mutex);
|
||||
static unsigned long rtc_inuse;
|
||||
static struct rtc_ops *rtc_ops;
|
||||
|
||||
|
@ -355,7 +357,7 @@ static int rtc_open(struct inode *inode, struct file *file)
|
|||
{
|
||||
int ret;
|
||||
|
||||
down(&rtc_sem);
|
||||
mutex_lock(&rtc_mutex);
|
||||
|
||||
if (rtc_inuse) {
|
||||
ret = -EBUSY;
|
||||
|
@ -373,7 +375,7 @@ static int rtc_open(struct inode *inode, struct file *file)
|
|||
rtc_inuse = 1;
|
||||
}
|
||||
}
|
||||
up(&rtc_sem);
|
||||
mutex_unlock(&rtc_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -479,7 +481,7 @@ int register_rtc(struct rtc_ops *ops)
|
|||
{
|
||||
int ret = -EBUSY;
|
||||
|
||||
down(&rtc_sem);
|
||||
mutex_lock(&rtc_mutex);
|
||||
if (rtc_ops == NULL) {
|
||||
rtc_ops = ops;
|
||||
|
||||
|
@ -488,7 +490,7 @@ int register_rtc(struct rtc_ops *ops)
|
|||
create_proc_read_entry("driver/rtc", 0, NULL,
|
||||
rtc_read_proc, ops);
|
||||
}
|
||||
up(&rtc_sem);
|
||||
mutex_unlock(&rtc_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -496,12 +498,12 @@ EXPORT_SYMBOL(register_rtc);
|
|||
|
||||
void unregister_rtc(struct rtc_ops *rtc)
|
||||
{
|
||||
down(&rtc_sem);
|
||||
mutex_lock(&rtc_mutex);
|
||||
if (rtc == rtc_ops) {
|
||||
remove_proc_entry("driver/rtc", NULL);
|
||||
misc_deregister(&rtc_miscdev);
|
||||
rtc_ops = NULL;
|
||||
}
|
||||
up(&rtc_sem);
|
||||
mutex_unlock(&rtc_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL(unregister_rtc);
|
||||
|
|
|
@ -1247,14 +1247,14 @@ static int sa1111_bus_remove(struct device *dev)
|
|||
struct bus_type sa1111_bus_type = {
|
||||
.name = "sa1111-rab",
|
||||
.match = sa1111_match,
|
||||
.probe = sa1111_bus_probe,
|
||||
.remove = sa1111_bus_remove,
|
||||
.suspend = sa1111_bus_suspend,
|
||||
.resume = sa1111_bus_resume,
|
||||
};
|
||||
|
||||
int sa1111_driver_register(struct sa1111_driver *driver)
|
||||
{
|
||||
driver->drv.probe = sa1111_bus_probe;
|
||||
driver->drv.remove = sa1111_bus_remove;
|
||||
driver->drv.bus = &sa1111_bus_type;
|
||||
return driver_register(&driver->drv);
|
||||
}
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
|
||||
#include <linux/device.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/hardware/scoop.h>
|
||||
|
@ -33,7 +34,6 @@ void reset_scoop(struct device *dev)
|
|||
|
||||
SCOOP_REG(sdev->base,SCOOP_MCR) = 0x0100; // 00
|
||||
SCOOP_REG(sdev->base,SCOOP_CDR) = 0x0000; // 04
|
||||
SCOOP_REG(sdev->base,SCOOP_CPR) = 0x0000; // 0C
|
||||
SCOOP_REG(sdev->base,SCOOP_CCR) = 0x0000; // 10
|
||||
SCOOP_REG(sdev->base,SCOOP_IMR) = 0x0000; // 18
|
||||
SCOOP_REG(sdev->base,SCOOP_IRM) = 0x00FF; // 14
|
||||
|
@ -154,6 +154,7 @@ int __init scoop_probe(struct platform_device *pdev)
|
|||
|
||||
SCOOP_REG(devptr->base, SCOOP_MCR) = 0x0140;
|
||||
reset_scoop(&pdev->dev);
|
||||
SCOOP_REG(devptr->base, SCOOP_CPR) = 0x0000;
|
||||
SCOOP_REG(devptr->base, SCOOP_GPCR) = inf->io_dir & 0xffff;
|
||||
SCOOP_REG(devptr->base, SCOOP_GPWR) = inf->io_out & 0xffff;
|
||||
|
||||
|
|
|
@ -0,0 +1,839 @@
|
|||
/*
|
||||
* Battery and Power Management code for the Sharp SL-C7xx and SL-Cxx00
|
||||
* series of PDAs
|
||||
*
|
||||
* Copyright (c) 2004-2005 Richard Purdie
|
||||
*
|
||||
* Based on code written by Sharp for 2.4 kernels
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
*/
|
||||
|
||||
#undef DEBUG
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/timer.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/apm_bios.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/platform_device.h>
|
||||
|
||||
#include <asm/hardware.h>
|
||||
#include <asm/mach-types.h>
|
||||
#include <asm/irq.h>
|
||||
#include <asm/apm.h>
|
||||
#include <asm/arch/pm.h>
|
||||
#include <asm/arch/pxa-regs.h>
|
||||
#include <asm/arch/sharpsl.h>
|
||||
#include <asm/hardware/sharpsl_pm.h>
|
||||
|
||||
/*
|
||||
* Constants
|
||||
*/
|
||||
#define SHARPSL_CHARGE_ON_TIME_INTERVAL (msecs_to_jiffies(1*60*1000)) /* 1 min */
|
||||
#define SHARPSL_CHARGE_FINISH_TIME (msecs_to_jiffies(10*60*1000)) /* 10 min */
|
||||
#define SHARPSL_BATCHK_TIME (msecs_to_jiffies(15*1000)) /* 15 sec */
|
||||
#define SHARPSL_BATCHK_TIME_SUSPEND (60*10) /* 10 min */
|
||||
#define SHARPSL_WAIT_CO_TIME 15 /* 15 sec */
|
||||
#define SHARPSL_WAIT_DISCHARGE_ON 100 /* 100 msec */
|
||||
#define SHARPSL_CHECK_BATTERY_WAIT_TIME_TEMP 10 /* 10 msec */
|
||||
#define SHARPSL_CHECK_BATTERY_WAIT_TIME_VOLT 10 /* 10 msec */
|
||||
#define SHARPSL_CHECK_BATTERY_WAIT_TIME_ACIN 10 /* 10 msec */
|
||||
#define SHARPSL_CHARGE_WAIT_TIME 15 /* 15 msec */
|
||||
#define SHARPSL_CHARGE_CO_CHECK_TIME 5 /* 5 msec */
|
||||
#define SHARPSL_CHARGE_RETRY_CNT 1 /* eqv. 10 min */
|
||||
|
||||
#define SHARPSL_CHARGE_ON_VOLT 0x99 /* 2.9V */
|
||||
#define SHARPSL_CHARGE_ON_TEMP 0xe0 /* 2.9V */
|
||||
#define SHARPSL_CHARGE_ON_ACIN_HIGH 0x9b /* 6V */
|
||||
#define SHARPSL_CHARGE_ON_ACIN_LOW 0x34 /* 2V */
|
||||
#define SHARPSL_FATAL_ACIN_VOLT 182 /* 3.45V */
|
||||
#define SHARPSL_FATAL_NOACIN_VOLT 170 /* 3.40V */
|
||||
|
||||
/*
|
||||
* Prototypes
|
||||
*/
|
||||
static int sharpsl_off_charge_battery(void);
|
||||
static int sharpsl_check_battery_temp(void);
|
||||
static int sharpsl_check_battery_voltage(void);
|
||||
static int sharpsl_ac_check(void);
|
||||
static int sharpsl_fatal_check(void);
|
||||
static int sharpsl_average_value(int ad);
|
||||
static void sharpsl_average_clear(void);
|
||||
static void sharpsl_charge_toggle(void *private_);
|
||||
static void sharpsl_battery_thread(void *private_);
|
||||
|
||||
|
||||
/*
|
||||
* Variables
|
||||
*/
|
||||
struct sharpsl_pm_status sharpsl_pm;
|
||||
DECLARE_WORK(toggle_charger, sharpsl_charge_toggle, NULL);
|
||||
DECLARE_WORK(sharpsl_bat, sharpsl_battery_thread, NULL);
|
||||
|
||||
|
||||
static int get_percentage(int voltage)
|
||||
{
|
||||
int i = sharpsl_pm.machinfo->bat_levels - 1;
|
||||
struct battery_thresh *thresh;
|
||||
|
||||
if (sharpsl_pm.charge_mode == CHRG_ON)
|
||||
thresh=sharpsl_pm.machinfo->bat_levels_acin;
|
||||
else
|
||||
thresh=sharpsl_pm.machinfo->bat_levels_noac;
|
||||
|
||||
while (i > 0 && (voltage > thresh[i].voltage))
|
||||
i--;
|
||||
|
||||
return thresh[i].percentage;
|
||||
}
|
||||
|
||||
static int get_apm_status(int voltage)
|
||||
{
|
||||
int low_thresh, high_thresh;
|
||||
|
||||
if (sharpsl_pm.charge_mode == CHRG_ON) {
|
||||
high_thresh = sharpsl_pm.machinfo->status_high_acin;
|
||||
low_thresh = sharpsl_pm.machinfo->status_low_acin;
|
||||
} else {
|
||||
high_thresh = sharpsl_pm.machinfo->status_high_noac;
|
||||
low_thresh = sharpsl_pm.machinfo->status_low_noac;
|
||||
}
|
||||
|
||||
if (voltage >= high_thresh)
|
||||
return APM_BATTERY_STATUS_HIGH;
|
||||
if (voltage >= low_thresh)
|
||||
return APM_BATTERY_STATUS_LOW;
|
||||
return APM_BATTERY_STATUS_CRITICAL;
|
||||
}
|
||||
|
||||
void sharpsl_battery_kick(void)
|
||||
{
|
||||
schedule_delayed_work(&sharpsl_bat, msecs_to_jiffies(125));
|
||||
}
|
||||
EXPORT_SYMBOL(sharpsl_battery_kick);
|
||||
|
||||
|
||||
static void sharpsl_battery_thread(void *private_)
|
||||
{
|
||||
int voltage, percent, apm_status, i = 0;
|
||||
|
||||
if (!sharpsl_pm.machinfo)
|
||||
return;
|
||||
|
||||
sharpsl_pm.battstat.ac_status = (sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_ACIN) ? APM_AC_ONLINE : APM_AC_OFFLINE);
|
||||
|
||||
/* Corgi cannot confirm when battery fully charged so periodically kick! */
|
||||
if (machine_is_corgi() && (sharpsl_pm.charge_mode == CHRG_ON)
|
||||
&& time_after(jiffies, sharpsl_pm.charge_start_time + SHARPSL_CHARGE_ON_TIME_INTERVAL))
|
||||
schedule_work(&toggle_charger);
|
||||
|
||||
while(1) {
|
||||
voltage = sharpsl_pm.machinfo->read_devdata(SHARPSL_BATT_VOLT);
|
||||
|
||||
if (voltage > 0) break;
|
||||
if (i++ > 5) {
|
||||
voltage = sharpsl_pm.machinfo->bat_levels_noac[0].voltage;
|
||||
dev_warn(sharpsl_pm.dev, "Warning: Cannot read main battery!\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
voltage = sharpsl_average_value(voltage);
|
||||
apm_status = get_apm_status(voltage);
|
||||
percent = get_percentage(voltage);
|
||||
|
||||
/* At low battery voltages, the voltage has a tendency to start
|
||||
creeping back up so we try to avoid this here */
|
||||
if ((sharpsl_pm.battstat.ac_status == APM_AC_ONLINE) || (apm_status == APM_BATTERY_STATUS_HIGH) || percent <= sharpsl_pm.battstat.mainbat_percent) {
|
||||
sharpsl_pm.battstat.mainbat_voltage = voltage;
|
||||
sharpsl_pm.battstat.mainbat_status = apm_status;
|
||||
sharpsl_pm.battstat.mainbat_percent = percent;
|
||||
}
|
||||
|
||||
dev_dbg(sharpsl_pm.dev, "Battery: voltage: %d, status: %d, percentage: %d, time: %d\n", voltage,
|
||||
sharpsl_pm.battstat.mainbat_status, sharpsl_pm.battstat.mainbat_percent, jiffies);
|
||||
|
||||
/* If battery is low. limit backlight intensity to save power. */
|
||||
if ((sharpsl_pm.battstat.ac_status != APM_AC_ONLINE)
|
||||
&& ((sharpsl_pm.battstat.mainbat_status == APM_BATTERY_STATUS_LOW) ||
|
||||
(sharpsl_pm.battstat.mainbat_status == APM_BATTERY_STATUS_CRITICAL))) {
|
||||
if (!(sharpsl_pm.flags & SHARPSL_BL_LIMIT)) {
|
||||
corgibl_limit_intensity(1);
|
||||
sharpsl_pm.flags |= SHARPSL_BL_LIMIT;
|
||||
}
|
||||
} else if (sharpsl_pm.flags & SHARPSL_BL_LIMIT) {
|
||||
corgibl_limit_intensity(0);
|
||||
sharpsl_pm.flags &= ~SHARPSL_BL_LIMIT;
|
||||
}
|
||||
|
||||
/* Suspend if critical battery level */
|
||||
if ((sharpsl_pm.battstat.ac_status != APM_AC_ONLINE)
|
||||
&& (sharpsl_pm.battstat.mainbat_status == APM_BATTERY_STATUS_CRITICAL)
|
||||
&& !(sharpsl_pm.flags & SHARPSL_APM_QUEUED)) {
|
||||
sharpsl_pm.flags |= SHARPSL_APM_QUEUED;
|
||||
dev_err(sharpsl_pm.dev, "Fatal Off\n");
|
||||
apm_queue_event(APM_CRITICAL_SUSPEND);
|
||||
}
|
||||
|
||||
schedule_delayed_work(&sharpsl_bat, SHARPSL_BATCHK_TIME);
|
||||
}
|
||||
|
||||
void sharpsl_pm_led(int val)
|
||||
{
|
||||
if (val == SHARPSL_LED_ERROR) {
|
||||
dev_err(sharpsl_pm.dev, "Charging Error!\n");
|
||||
} else if (val == SHARPSL_LED_ON) {
|
||||
dev_dbg(sharpsl_pm.dev, "Charge LED On\n");
|
||||
|
||||
} else {
|
||||
dev_dbg(sharpsl_pm.dev, "Charge LED Off\n");
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
static void sharpsl_charge_on(void)
|
||||
{
|
||||
dev_dbg(sharpsl_pm.dev, "Turning Charger On\n");
|
||||
|
||||
sharpsl_pm.full_count = 0;
|
||||
sharpsl_pm.charge_mode = CHRG_ON;
|
||||
schedule_delayed_work(&toggle_charger, msecs_to_jiffies(250));
|
||||
schedule_delayed_work(&sharpsl_bat, msecs_to_jiffies(500));
|
||||
}
|
||||
|
||||
static void sharpsl_charge_off(void)
|
||||
{
|
||||
dev_dbg(sharpsl_pm.dev, "Turning Charger Off\n");
|
||||
|
||||
sharpsl_pm.machinfo->charge(0);
|
||||
sharpsl_pm_led(SHARPSL_LED_OFF);
|
||||
sharpsl_pm.charge_mode = CHRG_OFF;
|
||||
|
||||
schedule_work(&sharpsl_bat);
|
||||
}
|
||||
|
||||
static void sharpsl_charge_error(void)
|
||||
{
|
||||
sharpsl_pm_led(SHARPSL_LED_ERROR);
|
||||
sharpsl_pm.machinfo->charge(0);
|
||||
sharpsl_pm.charge_mode = CHRG_ERROR;
|
||||
}
|
||||
|
||||
static void sharpsl_charge_toggle(void *private_)
|
||||
{
|
||||
dev_dbg(sharpsl_pm.dev, "Toogling Charger at time: %lx\n", jiffies);
|
||||
|
||||
if (!sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_ACIN)) {
|
||||
sharpsl_charge_off();
|
||||
return;
|
||||
} else if ((sharpsl_check_battery_temp() < 0) || (sharpsl_ac_check() < 0)) {
|
||||
sharpsl_charge_error();
|
||||
return;
|
||||
}
|
||||
|
||||
sharpsl_pm_led(SHARPSL_LED_ON);
|
||||
sharpsl_pm.machinfo->charge(0);
|
||||
mdelay(SHARPSL_CHARGE_WAIT_TIME);
|
||||
sharpsl_pm.machinfo->charge(1);
|
||||
|
||||
sharpsl_pm.charge_start_time = jiffies;
|
||||
}
|
||||
|
||||
static void sharpsl_ac_timer(unsigned long data)
|
||||
{
|
||||
int acin = sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_ACIN);
|
||||
|
||||
dev_dbg(sharpsl_pm.dev, "AC Status: %d\n",acin);
|
||||
|
||||
sharpsl_average_clear();
|
||||
if (acin && (sharpsl_pm.charge_mode != CHRG_ON))
|
||||
sharpsl_charge_on();
|
||||
else if (sharpsl_pm.charge_mode == CHRG_ON)
|
||||
sharpsl_charge_off();
|
||||
|
||||
schedule_work(&sharpsl_bat);
|
||||
}
|
||||
|
||||
|
||||
irqreturn_t sharpsl_ac_isr(int irq, void *dev_id, struct pt_regs *fp)
|
||||
{
|
||||
/* Delay the event slightly to debounce */
|
||||
/* Must be a smaller delay than the chrg_full_isr below */
|
||||
mod_timer(&sharpsl_pm.ac_timer, jiffies + msecs_to_jiffies(250));
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static void sharpsl_chrg_full_timer(unsigned long data)
|
||||
{
|
||||
dev_dbg(sharpsl_pm.dev, "Charge Full at time: %lx\n", jiffies);
|
||||
|
||||
sharpsl_pm.full_count++;
|
||||
|
||||
if (!sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_ACIN)) {
|
||||
dev_dbg(sharpsl_pm.dev, "Charge Full: AC removed - stop charging!\n");
|
||||
if (sharpsl_pm.charge_mode == CHRG_ON)
|
||||
sharpsl_charge_off();
|
||||
} else if (sharpsl_pm.full_count < 2) {
|
||||
dev_dbg(sharpsl_pm.dev, "Charge Full: Count too low\n");
|
||||
schedule_work(&toggle_charger);
|
||||
} else if (time_after(jiffies, sharpsl_pm.charge_start_time + SHARPSL_CHARGE_FINISH_TIME)) {
|
||||
dev_dbg(sharpsl_pm.dev, "Charge Full: Interrupt generated too slowly - retry.\n");
|
||||
schedule_work(&toggle_charger);
|
||||
} else {
|
||||
sharpsl_charge_off();
|
||||
sharpsl_pm.charge_mode = CHRG_DONE;
|
||||
dev_dbg(sharpsl_pm.dev, "Charge Full: Charging Finished\n");
|
||||
}
|
||||
}
|
||||
|
||||
/* Charging Finished Interrupt (Not present on Corgi) */
|
||||
/* Can trigger at the same time as an AC staus change so
|
||||
delay until after that has been processed */
|
||||
irqreturn_t sharpsl_chrg_full_isr(int irq, void *dev_id, struct pt_regs *fp)
|
||||
{
|
||||
if (sharpsl_pm.flags & SHARPSL_SUSPENDED)
|
||||
return IRQ_HANDLED;
|
||||
|
||||
/* delay until after any ac interrupt */
|
||||
mod_timer(&sharpsl_pm.chrg_full_timer, jiffies + msecs_to_jiffies(500));
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
irqreturn_t sharpsl_fatal_isr(int irq, void *dev_id, struct pt_regs *fp)
|
||||
{
|
||||
int is_fatal = 0;
|
||||
|
||||
if (!sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_LOCK)) {
|
||||
dev_err(sharpsl_pm.dev, "Battery now Unlocked! Suspending.\n");
|
||||
is_fatal = 1;
|
||||
}
|
||||
|
||||
if (!sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_FATAL)) {
|
||||
dev_err(sharpsl_pm.dev, "Fatal Batt Error! Suspending.\n");
|
||||
is_fatal = 1;
|
||||
}
|
||||
|
||||
if (!(sharpsl_pm.flags & SHARPSL_APM_QUEUED) && is_fatal) {
|
||||
sharpsl_pm.flags |= SHARPSL_APM_QUEUED;
|
||||
apm_queue_event(APM_CRITICAL_SUSPEND);
|
||||
}
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
/*
|
||||
* Maintain an average of the last 10 readings
|
||||
*/
|
||||
#define SHARPSL_CNV_VALUE_NUM 10
|
||||
static int sharpsl_ad_index;
|
||||
|
||||
static void sharpsl_average_clear(void)
|
||||
{
|
||||
sharpsl_ad_index = 0;
|
||||
}
|
||||
|
||||
static int sharpsl_average_value(int ad)
|
||||
{
|
||||
int i, ad_val = 0;
|
||||
static int sharpsl_ad[SHARPSL_CNV_VALUE_NUM+1];
|
||||
|
||||
if (sharpsl_pm.battstat.mainbat_status != APM_BATTERY_STATUS_HIGH) {
|
||||
sharpsl_ad_index = 0;
|
||||
return ad;
|
||||
}
|
||||
|
||||
sharpsl_ad[sharpsl_ad_index] = ad;
|
||||
sharpsl_ad_index++;
|
||||
if (sharpsl_ad_index >= SHARPSL_CNV_VALUE_NUM) {
|
||||
for (i=0; i < (SHARPSL_CNV_VALUE_NUM-1); i++)
|
||||
sharpsl_ad[i] = sharpsl_ad[i+1];
|
||||
sharpsl_ad_index = SHARPSL_CNV_VALUE_NUM - 1;
|
||||
}
|
||||
for (i=0; i < sharpsl_ad_index; i++)
|
||||
ad_val += sharpsl_ad[i];
|
||||
|
||||
return (ad_val / sharpsl_ad_index);
|
||||
}
|
||||
|
||||
/*
|
||||
* Take an array of 5 integers, remove the maximum and minimum values
|
||||
* and return the average.
|
||||
*/
|
||||
static int get_select_val(int *val)
|
||||
{
|
||||
int i, j, k, temp, sum = 0;
|
||||
|
||||
/* Find MAX val */
|
||||
temp = val[0];
|
||||
j=0;
|
||||
for (i=1; i<5; i++) {
|
||||
if (temp < val[i]) {
|
||||
temp = val[i];
|
||||
j = i;
|
||||
}
|
||||
}
|
||||
|
||||
/* Find MIN val */
|
||||
temp = val[4];
|
||||
k=4;
|
||||
for (i=3; i>=0; i--) {
|
||||
if (temp > val[i]) {
|
||||
temp = val[i];
|
||||
k = i;
|
||||
}
|
||||
}
|
||||
|
||||
for (i=0; i<5; i++)
|
||||
if (i != j && i != k )
|
||||
sum += val[i];
|
||||
|
||||
dev_dbg(sharpsl_pm.dev, "Average: %d from values: %d, %d, %d, %d, %d\n", sum/3, val[0], val[1], val[2], val[3], val[4]);
|
||||
|
||||
return (sum/3);
|
||||
}
|
||||
|
||||
static int sharpsl_check_battery_temp(void)
|
||||
{
|
||||
int val, i, buff[5];
|
||||
|
||||
/* Check battery temperature */
|
||||
for (i=0; i<5; i++) {
|
||||
mdelay(SHARPSL_CHECK_BATTERY_WAIT_TIME_TEMP);
|
||||
sharpsl_pm.machinfo->measure_temp(1);
|
||||
mdelay(SHARPSL_CHECK_BATTERY_WAIT_TIME_TEMP);
|
||||
buff[i] = sharpsl_pm.machinfo->read_devdata(SHARPSL_BATT_TEMP);
|
||||
sharpsl_pm.machinfo->measure_temp(0);
|
||||
}
|
||||
|
||||
val = get_select_val(buff);
|
||||
|
||||
dev_dbg(sharpsl_pm.dev, "Temperature: %d\n", val);
|
||||
if (val > SHARPSL_CHARGE_ON_TEMP)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sharpsl_check_battery_voltage(void)
|
||||
{
|
||||
int val, i, buff[5];
|
||||
|
||||
/* disable charge, enable discharge */
|
||||
sharpsl_pm.machinfo->charge(0);
|
||||
sharpsl_pm.machinfo->discharge(1);
|
||||
mdelay(SHARPSL_WAIT_DISCHARGE_ON);
|
||||
|
||||
if (sharpsl_pm.machinfo->discharge1)
|
||||
sharpsl_pm.machinfo->discharge1(1);
|
||||
|
||||
/* Check battery voltage */
|
||||
for (i=0; i<5; i++) {
|
||||
buff[i] = sharpsl_pm.machinfo->read_devdata(SHARPSL_BATT_VOLT);
|
||||
mdelay(SHARPSL_CHECK_BATTERY_WAIT_TIME_VOLT);
|
||||
}
|
||||
|
||||
if (sharpsl_pm.machinfo->discharge1)
|
||||
sharpsl_pm.machinfo->discharge1(0);
|
||||
|
||||
sharpsl_pm.machinfo->discharge(0);
|
||||
|
||||
val = get_select_val(buff);
|
||||
dev_dbg(sharpsl_pm.dev, "Battery Voltage: %d\n", val);
|
||||
|
||||
if (val < SHARPSL_CHARGE_ON_VOLT)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sharpsl_ac_check(void)
|
||||
{
|
||||
int temp, i, buff[5];
|
||||
|
||||
for (i=0; i<5; i++) {
|
||||
buff[i] = sharpsl_pm.machinfo->read_devdata(SHARPSL_ACIN_VOLT);
|
||||
mdelay(SHARPSL_CHECK_BATTERY_WAIT_TIME_ACIN);
|
||||
}
|
||||
|
||||
temp = get_select_val(buff);
|
||||
dev_dbg(sharpsl_pm.dev, "AC Voltage: %d\n",temp);
|
||||
|
||||
if ((temp > SHARPSL_CHARGE_ON_ACIN_HIGH) || (temp < SHARPSL_CHARGE_ON_ACIN_LOW)) {
|
||||
dev_err(sharpsl_pm.dev, "Error: AC check failed.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PM
|
||||
static int sharpsl_pm_suspend(struct platform_device *pdev, pm_message_t state)
|
||||
{
|
||||
sharpsl_pm.flags |= SHARPSL_SUSPENDED;
|
||||
flush_scheduled_work();
|
||||
|
||||
if (sharpsl_pm.charge_mode == CHRG_ON)
|
||||
sharpsl_pm.flags |= SHARPSL_DO_OFFLINE_CHRG;
|
||||
else
|
||||
sharpsl_pm.flags &= ~SHARPSL_DO_OFFLINE_CHRG;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sharpsl_pm_resume(struct platform_device *pdev)
|
||||
{
|
||||
/* Clear the reset source indicators as they break the bootloader upon reboot */
|
||||
RCSR = 0x0f;
|
||||
sharpsl_average_clear();
|
||||
sharpsl_pm.flags &= ~SHARPSL_APM_QUEUED;
|
||||
sharpsl_pm.flags &= ~SHARPSL_SUSPENDED;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void corgi_goto_sleep(unsigned long alarm_time, unsigned int alarm_enable, suspend_state_t state)
|
||||
{
|
||||
dev_dbg(sharpsl_pm.dev, "Time is: %08x\n",RCNR);
|
||||
|
||||
dev_dbg(sharpsl_pm.dev, "Offline Charge Activate = %d\n",sharpsl_pm.flags & SHARPSL_DO_OFFLINE_CHRG);
|
||||
/* not charging and AC-IN! */
|
||||
|
||||
if ((sharpsl_pm.flags & SHARPSL_DO_OFFLINE_CHRG) && (sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_ACIN))) {
|
||||
dev_dbg(sharpsl_pm.dev, "Activating Offline Charger...\n");
|
||||
sharpsl_pm.charge_mode = CHRG_OFF;
|
||||
sharpsl_pm.flags &= ~SHARPSL_DO_OFFLINE_CHRG;
|
||||
sharpsl_off_charge_battery();
|
||||
}
|
||||
|
||||
sharpsl_pm.machinfo->presuspend();
|
||||
|
||||
PEDR = 0xffffffff; /* clear it */
|
||||
|
||||
sharpsl_pm.flags &= ~SHARPSL_ALARM_ACTIVE;
|
||||
if ((sharpsl_pm.charge_mode == CHRG_ON) && ((alarm_enable && ((alarm_time - RCNR) > (SHARPSL_BATCHK_TIME_SUSPEND + 30))) || !alarm_enable)) {
|
||||
RTSR &= RTSR_ALE;
|
||||
RTAR = RCNR + SHARPSL_BATCHK_TIME_SUSPEND;
|
||||
dev_dbg(sharpsl_pm.dev, "Charging alarm at: %08x\n",RTAR);
|
||||
sharpsl_pm.flags |= SHARPSL_ALARM_ACTIVE;
|
||||
} else if (alarm_enable) {
|
||||
RTSR &= RTSR_ALE;
|
||||
RTAR = alarm_time;
|
||||
dev_dbg(sharpsl_pm.dev, "User alarm at: %08x\n",RTAR);
|
||||
} else {
|
||||
dev_dbg(sharpsl_pm.dev, "No alarms set.\n");
|
||||
}
|
||||
|
||||
pxa_pm_enter(state);
|
||||
|
||||
sharpsl_pm.machinfo->postsuspend();
|
||||
|
||||
dev_dbg(sharpsl_pm.dev, "Corgi woken up from suspend: %08x\n",PEDR);
|
||||
}
|
||||
|
||||
static int corgi_enter_suspend(unsigned long alarm_time, unsigned int alarm_enable, suspend_state_t state)
|
||||
{
|
||||
if (!sharpsl_pm.machinfo->should_wakeup(!(sharpsl_pm.flags & SHARPSL_ALARM_ACTIVE) && alarm_enable) )
|
||||
{
|
||||
if (!(sharpsl_pm.flags & SHARPSL_ALARM_ACTIVE)) {
|
||||
dev_dbg(sharpsl_pm.dev, "No user triggered wakeup events and not charging. Strange. Suspend.\n");
|
||||
corgi_goto_sleep(alarm_time, alarm_enable, state);
|
||||
return 1;
|
||||
}
|
||||
if(sharpsl_off_charge_battery()) {
|
||||
dev_dbg(sharpsl_pm.dev, "Charging. Suspend...\n");
|
||||
corgi_goto_sleep(alarm_time, alarm_enable, state);
|
||||
return 1;
|
||||
}
|
||||
dev_dbg(sharpsl_pm.dev, "User triggered wakeup in offline charger.\n");
|
||||
}
|
||||
|
||||
if ((!sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_LOCK)) || (sharpsl_fatal_check() < 0) )
|
||||
{
|
||||
dev_err(sharpsl_pm.dev, "Fatal condition. Suspend.\n");
|
||||
corgi_goto_sleep(alarm_time, alarm_enable, state);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int corgi_pxa_pm_enter(suspend_state_t state)
|
||||
{
|
||||
unsigned long alarm_time = RTAR;
|
||||
unsigned int alarm_status = ((RTSR & RTSR_ALE) != 0);
|
||||
|
||||
dev_dbg(sharpsl_pm.dev, "SharpSL suspending for first time.\n");
|
||||
|
||||
corgi_goto_sleep(alarm_time, alarm_status, state);
|
||||
|
||||
while (corgi_enter_suspend(alarm_time,alarm_status,state))
|
||||
{}
|
||||
|
||||
dev_dbg(sharpsl_pm.dev, "SharpSL resuming...\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* Check for fatal battery errors
|
||||
* Fatal returns -1
|
||||
*/
|
||||
static int sharpsl_fatal_check(void)
|
||||
{
|
||||
int buff[5], temp, i, acin;
|
||||
|
||||
dev_dbg(sharpsl_pm.dev, "sharpsl_fatal_check entered\n");
|
||||
|
||||
/* Check AC-Adapter */
|
||||
acin = sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_ACIN);
|
||||
|
||||
if (acin && (sharpsl_pm.charge_mode == CHRG_ON)) {
|
||||
sharpsl_pm.machinfo->charge(0);
|
||||
udelay(100);
|
||||
sharpsl_pm.machinfo->discharge(1); /* enable discharge */
|
||||
mdelay(SHARPSL_WAIT_DISCHARGE_ON);
|
||||
}
|
||||
|
||||
if (sharpsl_pm.machinfo->discharge1)
|
||||
sharpsl_pm.machinfo->discharge1(1);
|
||||
|
||||
/* Check battery : check inserting battery ? */
|
||||
for (i=0; i<5; i++) {
|
||||
buff[i] = sharpsl_pm.machinfo->read_devdata(SHARPSL_BATT_VOLT);
|
||||
mdelay(SHARPSL_CHECK_BATTERY_WAIT_TIME_VOLT);
|
||||
}
|
||||
|
||||
if (sharpsl_pm.machinfo->discharge1)
|
||||
sharpsl_pm.machinfo->discharge1(0);
|
||||
|
||||
if (acin && (sharpsl_pm.charge_mode == CHRG_ON)) {
|
||||
udelay(100);
|
||||
sharpsl_pm.machinfo->charge(1);
|
||||
sharpsl_pm.machinfo->discharge(0);
|
||||
}
|
||||
|
||||
temp = get_select_val(buff);
|
||||
dev_dbg(sharpsl_pm.dev, "sharpsl_fatal_check: acin: %d, discharge voltage: %d, no discharge: %d\n", acin, temp, sharpsl_pm.machinfo->read_devdata(SHARPSL_BATT_VOLT));
|
||||
|
||||
if ((acin && (temp < SHARPSL_FATAL_ACIN_VOLT)) ||
|
||||
(!acin && (temp < SHARPSL_FATAL_NOACIN_VOLT)))
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sharpsl_off_charge_error(void)
|
||||
{
|
||||
dev_err(sharpsl_pm.dev, "Offline Charger: Error occured.\n");
|
||||
sharpsl_pm.machinfo->charge(0);
|
||||
sharpsl_pm_led(SHARPSL_LED_ERROR);
|
||||
sharpsl_pm.charge_mode = CHRG_ERROR;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Charging Control while suspended
|
||||
* Return 1 - go straight to sleep
|
||||
* Return 0 - sleep or wakeup depending on other factors
|
||||
*/
|
||||
static int sharpsl_off_charge_battery(void)
|
||||
{
|
||||
int time;
|
||||
|
||||
dev_dbg(sharpsl_pm.dev, "Charge Mode: %d\n", sharpsl_pm.charge_mode);
|
||||
|
||||
if (sharpsl_pm.charge_mode == CHRG_OFF) {
|
||||
dev_dbg(sharpsl_pm.dev, "Offline Charger: Step 1\n");
|
||||
|
||||
/* AC Check */
|
||||
if ((sharpsl_ac_check() < 0) || (sharpsl_check_battery_temp() < 0))
|
||||
return sharpsl_off_charge_error();
|
||||
|
||||
/* Start Charging */
|
||||
sharpsl_pm_led(SHARPSL_LED_ON);
|
||||
sharpsl_pm.machinfo->charge(0);
|
||||
mdelay(SHARPSL_CHARGE_WAIT_TIME);
|
||||
sharpsl_pm.machinfo->charge(1);
|
||||
|
||||
sharpsl_pm.charge_mode = CHRG_ON;
|
||||
sharpsl_pm.full_count = 0;
|
||||
|
||||
return 1;
|
||||
} else if (sharpsl_pm.charge_mode != CHRG_ON) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (sharpsl_pm.full_count == 0) {
|
||||
int time;
|
||||
|
||||
dev_dbg(sharpsl_pm.dev, "Offline Charger: Step 2\n");
|
||||
|
||||
if ((sharpsl_check_battery_temp() < 0) || (sharpsl_check_battery_voltage() < 0))
|
||||
return sharpsl_off_charge_error();
|
||||
|
||||
sharpsl_pm.machinfo->charge(0);
|
||||
mdelay(SHARPSL_CHARGE_WAIT_TIME);
|
||||
sharpsl_pm.machinfo->charge(1);
|
||||
sharpsl_pm.charge_mode = CHRG_ON;
|
||||
|
||||
mdelay(SHARPSL_CHARGE_CO_CHECK_TIME);
|
||||
|
||||
time = RCNR;
|
||||
while(1) {
|
||||
/* Check if any wakeup event had occured */
|
||||
if (sharpsl_pm.machinfo->charger_wakeup() != 0)
|
||||
return 0;
|
||||
/* Check for timeout */
|
||||
if ((RCNR - time) > SHARPSL_WAIT_CO_TIME)
|
||||
return 1;
|
||||
if (sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_CHRGFULL)) {
|
||||
dev_dbg(sharpsl_pm.dev, "Offline Charger: Charge full occured. Retrying to check\n");
|
||||
sharpsl_pm.full_count++;
|
||||
sharpsl_pm.machinfo->charge(0);
|
||||
mdelay(SHARPSL_CHARGE_WAIT_TIME);
|
||||
sharpsl_pm.machinfo->charge(1);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dev_dbg(sharpsl_pm.dev, "Offline Charger: Step 3\n");
|
||||
|
||||
mdelay(SHARPSL_CHARGE_CO_CHECK_TIME);
|
||||
|
||||
time = RCNR;
|
||||
while(1) {
|
||||
/* Check if any wakeup event had occured */
|
||||
if (sharpsl_pm.machinfo->charger_wakeup() != 0)
|
||||
return 0;
|
||||
/* Check for timeout */
|
||||
if ((RCNR-time) > SHARPSL_WAIT_CO_TIME) {
|
||||
if (sharpsl_pm.full_count > SHARPSL_CHARGE_RETRY_CNT) {
|
||||
dev_dbg(sharpsl_pm.dev, "Offline Charger: Not charged sufficiently. Retrying.\n");
|
||||
sharpsl_pm.full_count = 0;
|
||||
}
|
||||
sharpsl_pm.full_count++;
|
||||
return 1;
|
||||
}
|
||||
if (sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_CHRGFULL)) {
|
||||
dev_dbg(sharpsl_pm.dev, "Offline Charger: Charging complete.\n");
|
||||
sharpsl_pm_led(SHARPSL_LED_OFF);
|
||||
sharpsl_pm.machinfo->charge(0);
|
||||
sharpsl_pm.charge_mode = CHRG_DONE;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static ssize_t battery_percentage_show(struct device *dev, struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%d\n",sharpsl_pm.battstat.mainbat_percent);
|
||||
}
|
||||
|
||||
static ssize_t battery_voltage_show(struct device *dev, struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%d\n",sharpsl_pm.battstat.mainbat_voltage);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(battery_percentage, 0444, battery_percentage_show, NULL);
|
||||
static DEVICE_ATTR(battery_voltage, 0444, battery_voltage_show, NULL);
|
||||
|
||||
extern void (*apm_get_power_status)(struct apm_power_info *);
|
||||
|
||||
static void sharpsl_apm_get_power_status(struct apm_power_info *info)
|
||||
{
|
||||
info->ac_line_status = sharpsl_pm.battstat.ac_status;
|
||||
|
||||
if (sharpsl_pm.charge_mode == CHRG_ON)
|
||||
info->battery_status = APM_BATTERY_STATUS_CHARGING;
|
||||
else
|
||||
info->battery_status = sharpsl_pm.battstat.mainbat_status;
|
||||
|
||||
info->battery_flag = (1 << info->battery_status);
|
||||
info->battery_life = sharpsl_pm.battstat.mainbat_percent;
|
||||
}
|
||||
|
||||
static struct pm_ops sharpsl_pm_ops = {
|
||||
.pm_disk_mode = PM_DISK_FIRMWARE,
|
||||
.prepare = pxa_pm_prepare,
|
||||
.enter = corgi_pxa_pm_enter,
|
||||
.finish = pxa_pm_finish,
|
||||
};
|
||||
|
||||
static int __init sharpsl_pm_probe(struct platform_device *pdev)
|
||||
{
|
||||
if (!pdev->dev.platform_data)
|
||||
return -EINVAL;
|
||||
|
||||
sharpsl_pm.dev = &pdev->dev;
|
||||
sharpsl_pm.machinfo = pdev->dev.platform_data;
|
||||
sharpsl_pm.charge_mode = CHRG_OFF;
|
||||
sharpsl_pm.flags = 0;
|
||||
|
||||
init_timer(&sharpsl_pm.ac_timer);
|
||||
sharpsl_pm.ac_timer.function = sharpsl_ac_timer;
|
||||
|
||||
init_timer(&sharpsl_pm.chrg_full_timer);
|
||||
sharpsl_pm.chrg_full_timer.function = sharpsl_chrg_full_timer;
|
||||
|
||||
sharpsl_pm.machinfo->init();
|
||||
|
||||
device_create_file(&pdev->dev, &dev_attr_battery_percentage);
|
||||
device_create_file(&pdev->dev, &dev_attr_battery_voltage);
|
||||
|
||||
apm_get_power_status = sharpsl_apm_get_power_status;
|
||||
|
||||
pm_set_ops(&sharpsl_pm_ops);
|
||||
|
||||
mod_timer(&sharpsl_pm.ac_timer, jiffies + msecs_to_jiffies(250));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sharpsl_pm_remove(struct platform_device *pdev)
|
||||
{
|
||||
pm_set_ops(NULL);
|
||||
|
||||
device_remove_file(&pdev->dev, &dev_attr_battery_percentage);
|
||||
device_remove_file(&pdev->dev, &dev_attr_battery_voltage);
|
||||
|
||||
sharpsl_pm.machinfo->exit();
|
||||
|
||||
del_timer_sync(&sharpsl_pm.chrg_full_timer);
|
||||
del_timer_sync(&sharpsl_pm.ac_timer);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct platform_driver sharpsl_pm_driver = {
|
||||
.probe = sharpsl_pm_probe,
|
||||
.remove = sharpsl_pm_remove,
|
||||
.suspend = sharpsl_pm_suspend,
|
||||
.resume = sharpsl_pm_resume,
|
||||
.driver = {
|
||||
.name = "sharpsl-pm",
|
||||
},
|
||||
};
|
||||
|
||||
static int __devinit sharpsl_pm_init(void)
|
||||
{
|
||||
return platform_driver_register(&sharpsl_pm_driver);
|
||||
}
|
||||
|
||||
static void sharpsl_pm_exit(void)
|
||||
{
|
||||
platform_driver_unregister(&sharpsl_pm_driver);
|
||||
}
|
||||
|
||||
late_initcall(sharpsl_pm_init);
|
||||
module_exit(sharpsl_pm_exit);
|
|
@ -0,0 +1,92 @@
|
|||
/*
|
||||
* linux/arch/arm/common/vic.c
|
||||
*
|
||||
* Copyright (C) 1999 - 2003 ARM Limited
|
||||
* Copyright (C) 2000 Deep Blue Solutions Ltd
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
#include <linux/init.h>
|
||||
#include <linux/list.h>
|
||||
|
||||
#include <asm/io.h>
|
||||
#include <asm/irq.h>
|
||||
#include <asm/mach/irq.h>
|
||||
#include <asm/hardware/vic.h>
|
||||
|
||||
static void __iomem *vic_base;
|
||||
|
||||
static void vic_mask_irq(unsigned int irq)
|
||||
{
|
||||
irq -= IRQ_VIC_START;
|
||||
writel(1 << irq, vic_base + VIC_INT_ENABLE_CLEAR);
|
||||
}
|
||||
|
||||
static void vic_unmask_irq(unsigned int irq)
|
||||
{
|
||||
irq -= IRQ_VIC_START;
|
||||
writel(1 << irq, vic_base + VIC_INT_ENABLE);
|
||||
}
|
||||
|
||||
static struct irqchip vic_chip = {
|
||||
.ack = vic_mask_irq,
|
||||
.mask = vic_mask_irq,
|
||||
.unmask = vic_unmask_irq,
|
||||
};
|
||||
|
||||
void __init vic_init(void __iomem *base, u32 vic_sources)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
vic_base = base;
|
||||
|
||||
/* Disable all interrupts initially. */
|
||||
|
||||
writel(0, vic_base + VIC_INT_SELECT);
|
||||
writel(0, vic_base + VIC_INT_ENABLE);
|
||||
writel(~0, vic_base + VIC_INT_ENABLE_CLEAR);
|
||||
writel(0, vic_base + VIC_IRQ_STATUS);
|
||||
writel(0, vic_base + VIC_ITCR);
|
||||
writel(~0, vic_base + VIC_INT_SOFT_CLEAR);
|
||||
|
||||
/*
|
||||
* Make sure we clear all existing interrupts
|
||||
*/
|
||||
writel(0, vic_base + VIC_VECT_ADDR);
|
||||
for (i = 0; i < 19; i++) {
|
||||
unsigned int value;
|
||||
|
||||
value = readl(vic_base + VIC_VECT_ADDR);
|
||||
writel(value, vic_base + VIC_VECT_ADDR);
|
||||
}
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
void __iomem *reg = vic_base + VIC_VECT_CNTL0 + (i * 4);
|
||||
writel(VIC_VECT_CNTL_ENABLE | i, reg);
|
||||
}
|
||||
|
||||
writel(32, vic_base + VIC_DEF_VECT_ADDR);
|
||||
|
||||
for (i = 0; i < 32; i++) {
|
||||
unsigned int irq = IRQ_VIC_START + i;
|
||||
|
||||
set_irq_chip(irq, &vic_chip);
|
||||
|
||||
if (vic_sources & (1 << i)) {
|
||||
set_irq_handler(irq, do_level_IRQ);
|
||||
set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -63,7 +63,6 @@ CONFIG_OBSOLETE_MODPARM=y
|
|||
# CONFIG_ARCH_CLPS711X is not set
|
||||
# CONFIG_ARCH_CO285 is not set
|
||||
# CONFIG_ARCH_EBSA110 is not set
|
||||
# CONFIG_ARCH_CAMELOT is not set
|
||||
# CONFIG_ARCH_FOOTBRIDGE is not set
|
||||
# CONFIG_ARCH_INTEGRATOR is not set
|
||||
# CONFIG_ARCH_IOP3XX is not set
|
||||
|
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче