Discussion:
[ORLinux] [PATCH 1/2] openrisc: Add new atomic operations to or1k_atomic
Christian Svensson
2014-01-25 16:01:13 UTC
Permalink
OpenRISC does not offer any way to do atomic operations in user space.
In order to provide atomic operations we need to implement these
operations in kernel space.

These operations disables premption by disabling interrupts while
executing their critical sections.

The following operations has been added:
SWAP: Atomically swap the values in pointers 1 and 2.
CMPXCHG: Writes new to *mem if *mem == old. Returns old *mem.
XCHG: Store NEW in *MEM and return the old value.
ADD: Add VAL to *MEM and return the old value of *MEM.
DECPOS: Decrement *MEM if it is > 0, and return the old value.
AND: Atomically *mem &= mask and return the old value of *mem.
OR: Atomically *mem |= mask and return the old value of *mem.
UMAX: If *mem < val, set *mem = max. Returns old value of *mem.
UMIN: If *mem > val, set *mem = min. Returns old value of *mem.

Signed-off-by: Christian Svensson <blue at cmd.nu>
---
arch/openrisc/kernel/entry.S | 181
++++++++++++++++++++++++++++++++++++++++--
1 file changed, 174 insertions(+), 7 deletions(-)

diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S
index fec8bf9..eafd2c0 100644
--- a/arch/openrisc/kernel/entry.S
+++ b/arch/openrisc/kernel/entry.S
@@ -1106,20 +1106,101 @@ ENTRY(sys_rt_sigreturn)

/* This is a catch-all syscall for atomic instructions for the OpenRISC
1000.
* The functions takes a variable number of parameters depending on which
- * particular flavour of atomic you want... parameter 1 is a flag
identifying
- * the atomic in question. Currently, this function implements the
- * following variants:
+ * particular flavour of atomic you want.
+ * Parameter 1 is a flag identifying the atomic in question.
*
- * XCHG:
+ * TODO: Flag value 0 can be used, and it is recommended that the next
+ * atomic instruction to be implemented uses it. Currently it is a no-op.
+ *
+ * Currently, this function implements the following variants:
+ *
+ * SWAP:
* @flag: 1
* @ptr1:
* @ptr2:
- * Atomically exchange the values in pointers 1 and 2.
+ * Atomically swap the values in pointers 1 and 2.
+ *
+ * CMPXCHG:
+ * @flag: 2
+ * @ptr: mem
+ * @val1: old
+ * @val2: new
+ * Writes new to *mem if *mem == old. Returns old *mem.
+ *
+ * XCHG:
+ * @flag: 3
+ * @ptr: mem
+ * @val1: new
+ * Store NEW in *MEM and return the old value.
+ *
+ * ADD:
+ * @flag: 4
+ * @ptr: mem
+ * @val1: val
+ * Add VAL to *MEM and return the old value of *MEM.
+ *
+ * DECPOS:
+ * @flag: 5
+ * @ptr: mem
+ * Decrement *MEM if it is > 0, and return the old value.
+ *
+ * AND:
+ * @flag: 6
+ * @ptr: mem
+ * @val1: mask
+ * Atomically *mem &= mask and return the old value of *mem.
+ *
+ * OR:
+ * @flag: 7
+ * @ptr: mem
+ * @val1: mask
+ * Atomically *mem |= mask and return the old value of *mem.
+ *
+ * UMAX: unsigned
+ * @flag: 8
+ * @ptr: mem
+ * @val1: max
+ * If *mem < val, set *mem = max. Returns old value of *mem.
+ *
+ * UMIN: unsigned
+ * @flag: 9
+ * @ptr: mem
+ * @val1: min
+ * If *mem > val, set *mem = min. Returns old value of *mem.
*
*/
-
ENTRY(sys_or1k_atomic)
- /* FIXME: This ignores r3 and always does an XCHG */
+ /* TODO: validate mem ptr(s) */
+ /* TODO: check bounds of argument */
+ l.movhi r19,hi(atomic_table)
+ l.ori r19,r19,lo(atomic_table)
+ l.slli r3,r3,3
+ l.add r19,r19,r3
+ l.jr r19
+ l.nop
+atomic_table:
+ l.jr r9
+ l.nop
+ l.j _atomic_swap
+ l.nop
+ l.j _atomic_cmpxchg
+ l.nop
+ l.j _atomic_xchg
+ l.nop
+ l.j _atomic_add
+ l.nop
+ l.j _atomic_decpos
+ l.nop
+ l.j _atomic_and
+ l.nop
+ l.j _atomic_or
+ l.nop
+ l.j _atomic_max
+ l.nop
+ l.j _atomic_min
+ l.nop
+
+_atomic_swap:
DISABLE_INTERRUPTS(r17,r19)
l.lwz r29,0(r4)
l.lwz r27,0(r5)
@@ -1129,4 +1210,90 @@ ENTRY(sys_or1k_atomic)
l.jr r9
l.or r11,r0,r0

+_atomic_cmpxchg:
+ DISABLE_INTERRUPTS(r17,r19)
+ l.lwz r29,0(r4)
+ l.sfeq r29,r5
+ l.bnf _atomic_cmpxchg_done
+ l.nop
+
+ l.sw 0(r4),r6
+_atomic_cmpxchg_done:
+ ENABLE_INTERRUPTS(r17)
+ l.jr r9
+ l.or r11,r29,r0
+
+_atomic_xchg:
+ DISABLE_INTERRUPTS(r17,r19)
+ l.lwz r29,0(r4)
+ l.sw 0(r4),r5
+ ENABLE_INTERRUPTS(r17)
+ l.jr r9
+ l.or r11,r29,r0
+
+_atomic_add:
+ DISABLE_INTERRUPTS(r17,r19)
+ l.lwz r29,0(r4)
+ l.add r27,r29,r5
+ l.sw 0(r4),r27
+ ENABLE_INTERRUPTS(r17)
+ l.jr r9
+ l.or r11,r29,r0
+
+_atomic_decpos:
+ DISABLE_INTERRUPTS(r17,r19)
+ l.lwz r29,0(r4)
+ l.sfgtsi r29,0
+ l.bnf _atomic_decpos_done
+ l.addi r27,r29,-1
+
+ l.sw 0(r4),r27
+_atomic_decpos_done:
+ ENABLE_INTERRUPTS(r17)
+ l.jr r9
+ l.or r11,r29,r0
+
+_atomic_and:
+ DISABLE_INTERRUPTS(r17,r19)
+ l.lwz r29,0(r4)
+ l.and r27,r29,r5
+ l.sw 0(r4),r27
+ ENABLE_INTERRUPTS(r17)
+ l.jr r9
+ l.or r11,r29,r0
+
+_atomic_or:
+ DISABLE_INTERRUPTS(r17,r19)
+ l.lwz r29,0(r4)
+ l.or r27,r29,r5
+ l.sw 0(r4),r27
+ ENABLE_INTERRUPTS(r17)
+ l.jr r9
+ l.or r11,r29,r0
+
+_atomic_max:
+ DISABLE_INTERRUPTS(r17,r19)
+ l.lwz r29,0(r4)
+ l.sfltu r29,r5
+ l.bnf _atomic_max_done
+ l.nop
+
+ l.sw 0(r4),r5
+_atomic_max_done:
+ ENABLE_INTERRUPTS(r17)
+ l.jr r9
+ l.or r11,r29,r0
+
+_atomic_min:
+ DISABLE_INTERRUPTS(r17,r19)
+ l.lwz r29,0(r4)
+ l.sfgtu r29,r5
+ l.bnf _atomic_min_done
+ l.nop
+
+ l.sw 0(r4),r5
+_atomic_min_done:
+ ENABLE_INTERRUPTS(r17)
+ l.jr r9
+ l.or r11,r29,r0
/* ============================================================[ EOF ]===
*/
--
1.7.10.4
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openrisc.net/pipermail/linux/attachments/20140125/703b3339/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-openrisc-Add-new-atomic-operations-to-or1k_atomic.patch
Type: application/octet-stream
Size: 5563 bytes
Desc: not available
URL: <http://lists.openrisc.net/pipermail/linux/attachments/20140125/703b3339/attachment-0001.obj>
Stefan Kristiansson
2014-02-06 17:33:55 UTC
Permalink
Post by Christian Svensson
OpenRISC does not offer any way to do atomic operations in user space.
In order to provide atomic operations we need to implement these
operations in kernel space.
These operations disables premption by disabling interrupts while
Nit, s/premption/preemption
Post by Christian Svensson
executing their critical sections.
SWAP: Atomically swap the values in pointers 1 and 2.
CMPXCHG: Writes new to *mem if *mem == old. Returns old *mem.
XCHG: Store NEW in *MEM and return the old value.
ADD: Add VAL to *MEM and return the old value of *MEM.
DECPOS: Decrement *MEM if it is > 0, and return the old value.
AND: Atomically *mem &= mask and return the old value of *mem.
OR: Atomically *mem |= mask and return the old value of *mem.
UMAX: If *mem < val, set *mem = max. Returns old value of *mem.
UMIN: If *mem > val, set *mem = min. Returns old value of *mem.
I've gone through the implementations of the different operations, and as
far as I can see they are all correct.
I also suspect that having a larger set of operations defined in this syscall
makes sense, even though you could use a smaller set of operation to get the
same functionality.
But, perhaps you could elaborate on the reasoning behind the larger set?

The declaration of sys_or1k_atomic in arch/openrisc/include/asm/syscalls.h
should probably be updated to reflect this change as well.

Apart from that,
Reviewed-by: Stefan Kristiansson <stefan.kristiansson at saunalahti.fi>

Stefan
Christian Svensson
2014-02-07 04:28:09 UTC
Permalink
Thanks for looking at this!

On Thu, Feb 6, 2014 at 9:33 AM, Stefan Kristiansson <
Post by Stefan Kristiansson
Nit, s/premption/preemption
Thanks, will fix.
Post by Stefan Kristiansson
I also suspect that having a larger set of operations defined in this syscall
makes sense, even though you could use a smaller set of operation to get the
same functionality.
Yes you can. In fact glibc only requires you to implement cmpxchg.
Post by Stefan Kristiansson
But, perhaps you could elaborate on the reasoning behind the larger set
For example, the default implementation in glibc for 'exchange and add'
(called ADD above) is basically looping an cmpxchg until the right thing
happens.
See
https://github.com/bluecmd/or1k-glibc/blob/1688660582f50a123d38fb2298a23c5cfa4505d8/include/atomic.h
It was a while ago but I'm pretty sure I implemented just the functions
that relies on retries. I don't have any hard facts to support that this
helps, but it feels like the sane thing to do. A bit more kernel code vs.
potentially a storm of syscalls and slowness.

Do you want me to include this in the comment / commit log / or just here?
Post by Stefan Kristiansson
The declaration of sys_or1k_atomic in arch/openrisc/include/asm/syscalls.h
should probably be updated to reflect this change as well.
Agreed. Is this a good place for the operation constants?
They are currently living in:
https://github.com/bluecmd/or1k-glibc/blob/1688660582f50a123d38fb2298a23c5cfa4505d8/ports/sysdeps/unix/sysv/linux/or1k/nptl/bits/atomic.h

Christian
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openrisc.net/pipermail/linux/attachments/20140206/3bd2c5c0/attachment.html>
Jonas Bonn
2014-02-07 06:58:54 UTC
Permalink
Post by Christian Svensson
Thanks for looking at this!
On Thu, Feb 6, 2014 at 9:33 AM, Stefan Kristiansson <
Post by Stefan Kristiansson
Nit, s/premption/preemption
Thanks, will fix.
Post by Stefan Kristiansson
I also suspect that having a larger set of operations defined in this syscall
makes sense, even though you could use a smaller set of operation to get the
same functionality.
Yes you can. In fact glibc only requires you to implement cmpxchg.
Post by Stefan Kristiansson
But, perhaps you could elaborate on the reasoning behind the larger set
For example, the default implementation in glibc for 'exchange and add'
(called ADD above) is basically looping an cmpxchg until the right thing
happens.
See
https://github.com/bluecmd/or1k-glibc/blob/1688660582f50a123d38fb2298a23c5cfa4505d8/include/atomic.h
It was a while ago but I'm pretty sure I implemented just the functions
that relies on retries. I don't have any hard facts to support that this
helps, but it feels like the sane thing to do. A bit more kernel code vs.
potentially a storm of syscalls and slowness.
Do you want me to include this in the comment / commit log / or just here?
Yes, please extend your commit message with the information above... I
think it's valuable.

On the "storm of syscalls" point: yes, as things are currently
implemented, these 'atomics' are pretty heavy-weight syscalls that pass
all the signal and scheduling synchronization points. This is, in my
opinion, not strictly necessary for this class of syscall as many other
arch's will have these as HW ops and not pass these sync points.

That said, there's a couple of options here:

i) implement these as "light weight" syscalls that bypass the usual
syscall handling path (ie. if atomic op, jump straight to op and return
to userspace, saving minimum of regs)

ii) implement this as part of a VDSO... of course, we don't have VDSO
support for openrisc yet, but that's a detail ;)

I prefer approach ii), if someone wants to do the work... simply because
we end up with a cleaner code base and less branches in the syscall hot
path.

And, assuming that the VDSO approach is as efficient as I expect, then
it's possible that the looping constructs in glibc are not much inferior
to the "raw" implementations in the kernel and we might be better off
with a shorter list of atomic ops (just cmpxchg)... I'll leave this
point open to debate, though, since it's all speculation at this point.

Christian: can we, at this point, break this into two patches:

i) one adding CMPXCHG to the existing 'atomic' syscall
ii) and one adding the 'optional' ops

I see value in pushing CMPXCHG upstream (since we've got the 'atomic'
syscall already, anyway). If we are able, however, to move forward with
the VDSO approach, I'd be happy if the rest of the list perhaps just
lives in the openrisc.net tree for a while before we switch to the VDSO
approach and upstream that. Feel free to disagree with me here if you
feel otherwise.
Post by Christian Svensson
Post by Stefan Kristiansson
The declaration of sys_or1k_atomic in arch/openrisc/include/asm/syscalls.h
should probably be updated to reflect this change as well.
Agreed. Is this a good place for the operation constants?
https://github.com/bluecmd/or1k-glibc/blob/1688660582f50a123d38fb2298a23c5cfa4505d8/ports/sysdeps/unix/sysv/linux/or1k/nptl/bits/atomic.h
These should certainly live in a kernel header since the syscall
interface is available to everyone, not just libc.

/Jonas
Post by Christian Svensson
Christian
_______________________________________________
Linux mailing list
Linux at lists.openrisc.net
http://lists.openrisc.net/listinfo/linux
Christian Svensson
2014-02-07 07:34:01 UTC
Permalink
Hi,
Thanks for the feedback.
Post by Jonas Bonn
Post by Christian Svensson
Do you want me to include this in the comment / commit log / or just here?
Yes, please extend your commit message with the information above... I
think it's valuable.
Consider it done.
Post by Jonas Bonn
ii) implement this as part of a VDSO... of course, we don't have VDSO
support for openrisc yet, but that's a detail ;)
I didn't know about VDSO, but reading about them I fell in love with
them. I agree with you on this.
Post by Jonas Bonn
I prefer approach ii), if someone wants to do the work... simply because
we end up with a cleaner code base and less branches in the syscall hot
path.
This one I don't understand. How would VDSO affect code cleanness and
the syscall hot path?
Post by Jonas Bonn
And, assuming that the VDSO approach is as efficient as I expect, then
it's possible that the looping constructs in glibc are not much inferior
to the "raw" implementations in the kernel and we might be better off
with a shorter list of atomic ops (just cmpxchg)... I'll leave this
point open to debate, though, since it's all speculation at this point.
I disagree. I can make multiple arguments, but I'll go with just the
argument of predictability and speed.
In my opinion these critical constructs (used in pthreads among other
things) deserve some extra attention.
I know there are CPUs which only implement cmpxchg (like s390) and
presumably they do just fine - but then the operation is implemented
as one CPU instruction and no good alternatives might exist. We have
the alternative to implement these as fast as cmpxchg, I think it
makes sense to do that.
Post by Jonas Bonn
i) one adding CMPXCHG to the existing 'atomic' syscall
ii) and one adding the 'optional' ops
We can, but to me it makes as much sense to merge 1 as 2. Both modify
the syscall interface and the only user (glibc) needs both in order to
be useful in practice.

I feel like I should defend this syscall change from the evil 'If we
change syscalls we will be stuck with it' ABI.
if we indeed do use VDSO (which nobody has yet volunteered to
implement) we still need to keep the old or1k_atomic - which today
only contains a kind of worthless SWAP. Nothing says that we need to
implement the same things in VDSO, right?

So, in order to 1) let people run glibc today, 2) not have horrible
performance 3) not have a stub syscall I stand by that this patch
should be merged as a whole.
Post by Jonas Bonn
I see value in pushing CMPXCHG upstream (since we've got the 'atomic'
syscall already, anyway). If we are able, however, to move forward with
the VDSO approach, I'd be happy if the rest of the list perhaps just
lives in the openrisc.net tree for a while before we switch to the VDSO
approach and upstream that. Feel free to disagree with me here if you
feel otherwise.
I respectfully do disagree, as I hopefully have pointed out somewhat above.
Post by Jonas Bonn
Post by Christian Svensson
Agreed. Is this a good place for the operation constants?
https://github.com/bluecmd/or1k-glibc/blob/1688660582f50a123d38fb2298a23c5cfa4505d8/ports/sysdeps/unix/sysv/linux/or1k/nptl/bits/atomic.h
These should certainly live in a kernel header since the syscall
interface is available to everyone, not just libc.
The question was more among the lines if that file was the best file
to add the defines to, or if there exists a better file for the
purpose.

Christian
Jonas Bonn
2014-02-07 07:49:25 UTC
Permalink
Post by Christian Svensson
Hi,
Thanks for the feedback.
Post by Jonas Bonn
Post by Christian Svensson
Do you want me to include this in the comment / commit log / or just here?
Yes, please extend your commit message with the information above... I
think it's valuable.
Consider it done.
Post by Jonas Bonn
ii) implement this as part of a VDSO... of course, we don't have VDSO
support for openrisc yet, but that's a detail ;)
I didn't know about VDSO, but reading about them I fell in love with
them. I agree with you on this.
Post by Jonas Bonn
I prefer approach ii), if someone wants to do the work... simply because
we end up with a cleaner code base and less branches in the syscall hot
path.
This one I don't understand. How would VDSO affect code cleanness and
the syscall hot path?
You're right... it probably doesn't make much practical difference. I
had some ideas about TRAP'ing into the atomic handler rather than going
via a syscall, but it probably all amounts to about the same thing.

<snip>
Post by Christian Svensson
I respectfully do disagree, as I hopefully have pointed out somewhat above.
As you're fully entitled to do! Your points are good ones and you've
done the work so your opinion carries weight. One patch with all the
ops will be fine.
Post by Christian Svensson
Post by Jonas Bonn
Post by Christian Svensson
Agreed. Is this a good place for the operation constants?
https://github.com/bluecmd/or1k-glibc/blob/1688660582f50a123d38fb2298a23c5cfa4505d8/ports/sysdeps/unix/sysv/lin
ux/or1k/nptl/bits/atomic.h
Post by Christian Svensson
Post by Jonas Bonn
These should certainly live in a kernel header since the syscall
interface is available to everyone, not just libc.
The question was more among the lines if that file was the best file
to add the defines to, or if there exists a better file for the
purpose.
Yeah, my response was along the lines of: "I didn't look but hopefully
you'll find an appropriate place for these" :)

I don't know what's best... unistd.h? Was the original SWAP constant
ever defined anywhere???

/Jonas
Christian Svensson
2014-02-07 22:26:52 UTC
Permalink
New patch.

I added more stuff to the commit log, unistd.h (which arch/xtensa use
for sort of the same thing), and a bounds check.
Christian Svensson
2014-03-09 00:49:48 UTC
Permalink
Ping!
Post by Christian Svensson
New patch.
I added more stuff to the commit log, unistd.h (which arch/xtensa use
for sort of the same thing), and a bounds check.
From ccfe17892fde0e9032eee4de3519cdcb2f0282f5 Mon Sep 17 00:00:00 2001
From: Christian Svensson <blue at cmd.nu>
Date: Sat, 25 Jan 2014 15:42:36 +0000
Subject: [PATCH 1/2] openrisc: Add new atomic operations to or1k_atomic
OpenRISC does not offer any way to do atomic operations in user space.
In order to provide atomic operations we need to implement these
operations in kernel space.
These operations disables preemption by disabling interrupts while
executing their critical sections.
SWAP: Atomically swap the values in pointers 1 and 2.
CMPXCHG: Writes new to *mem if *mem == old. Returns old *mem.
XCHG: Store NEW in *MEM and return the old value.
ADD: Add VAL to *MEM and return the old value of *MEM.
DECPOS: Decrement *MEM if it is > 0, and return the old value.
AND: Atomically *mem &= mask and return the old value of *mem.
OR: Atomically *mem |= mask and return the old value of *mem.
UMAX: If *mem < val, set *mem = max. Returns old value of *mem.
UMIN: If *mem > val, set *mem = min. Returns old value of *mem.
These operations have been chosen to satisfy the need of atomic.h in
glibc. While it is possible to only use CMPXCHG, the glibc
implementation of the others use a do-while loop to retry the operation
until it succeeds. In order to offer linear performance for these
operations they are therefore implemented in the syscall.
Signed-off-by: Christian Svensson <blue at cmd.nu>
---
arch/openrisc/include/uapi/asm/unistd.h | 41 ++++++++++
arch/openrisc/kernel/entry.S | 136 ++++++++++++++++++++++++++++---
2 files changed, 167 insertions(+), 10 deletions(-)
diff --git a/arch/openrisc/include/uapi/asm/unistd.h
b/arch/openrisc/include/uapi/asm/unistd.h
index ce40b71..13eb792 100644
--- a/arch/openrisc/include/uapi/asm/unistd.h
+++ b/arch/openrisc/include/uapi/asm/unistd.h
@@ -25,5 +25,46 @@
#include <asm-generic/unistd.h>
+/*
+ * uint32 sys_or1k_atomic(uint32 op, ...)
+ *
+ * uint32 sys_or1k_atomic(SWAP, void *ptr1, void *ptr2)
+ * Atomically swap the values in pointers 1 and 2.
+ *
+ * uint32 sys_or1k_atomic(CMPXCHG, uint32 *mem, uint32 old, uint32 new)
+ * Writes new to *mem if *mem == old. Returns old *mem.
+ *
+ * uint32 sys_or1k_atomic(XCHG, uint32 *mem, uint32 new)
+ * Store NEW in *MEM and return the old value.
+ *
+ * uint32 sys_or1k_atomic(ADD, uint32 *mem, uint32 val)
+ * Add VAL to *MEM and return the old value of *MEM.
+ *
+ * uint32 sys_or1k_atomic(DECPOS, uint32 *mem)
+ * Decrement *MEM if it is > 0, and return the old value.
+ *
+ * uint32 sys_or1k_atomic(AND, uint32 *mem, uint32 mask)
+ * Atomically *mem &= mask and return the old value of *mem.
+ *
+ * uint32 sys_or1k_atomic(OR, uint32 *mem, uint32 mask)
+ * Atomically *mem |= mask and return the old value of *mem.
+ *
+ * uint32 sys_or1k_atomic(UMAX, uint32 *mem, uint32 max)
+ * If *mem < val, set *mem = max. Returns old value of *mem.
+ *
+ * uint32 sys_or1k_atomic(UMIN, uint32 *mem, uint32 min)
+ * If *mem > val, set *mem = min. Returns old value of *mem.
+ */
+#define SYS_OR1K_ATOMIC_SWAP 1
+#define SYS_OR1K_ATOMIC_CMPXCHG 2
+#define SYS_OR1K_ATOMIC_XCHG 3
+#define SYS_OR1K_ATOMIC_ADD 4
+#define SYS_OR1K_ATOMIC_DECPOS 5
+#define SYS_OR1K_ATOMIC_AND 6
+#define SYS_OR1K_ATOMIC_OR 7
+#define SYS_OR1K_ATOMIC_UMAX 8
+#define SYS_OR1K_ATOMIC_UMIN 9
+#define SYS_OR1K_ATOMIC_COUNT 10
+
#define __NR_or1k_atomic __NR_arch_specific_syscall
__SYSCALL(__NR_or1k_atomic, sys_or1k_atomic)
diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S
index fec8bf9..7d16c0f2 100644
--- a/arch/openrisc/kernel/entry.S
+++ b/arch/openrisc/kernel/entry.S
@@ -1106,20 +1106,50 @@ ENTRY(sys_rt_sigreturn)
/* This is a catch-all syscall for atomic instructions for the OpenRISC 1000.
* The functions takes a variable number of parameters depending on which
- * particular flavour of atomic you want... parameter 1 is a flag identifying
- * the atomic in question. Currently, this function implements the
+ * particular flavour of atomic you want.
+ * Parameter 1 is a flag identifying the atomic in question.
*
- * Atomically exchange the values in pointers 1 and 2.
+ * TODO: Flag value 0 can be used, and it is recommended that the next
+ * atomic instruction to be implemented uses it. Currently it is a no-op.
*
+ * See arch/openrisc/include/uapi/asm/unistd.h for usage.
*/
-
ENTRY(sys_or1k_atomic)
- /* FIXME: This ignores r3 and always does an XCHG */
+ /* TODO: validate mem ptr(s) */
+ l.sfgeui r3,SYS_OR1K_ATOMIC_COUNT
+ l.bf _atomic_invalid
+ l.movhi r19,hi(atomic_table)
+ l.ori r19,r19,lo(atomic_table)
+ l.slli r3,r3,3
+ l.add r19,r19,r3
+ l.jr r19
+ l.nop
+ l.jr r9
+ l.nop
+ l.jr r9
+ l.nop
+ l.j _atomic_swap
+ l.nop
+ l.j _atomic_cmpxchg
+ l.nop
+ l.j _atomic_xchg
+ l.nop
+ l.j _atomic_add
+ l.nop
+ l.j _atomic_decpos
+ l.nop
+ l.j _atomic_and
+ l.nop
+ l.j _atomic_or
+ l.nop
+ l.j _atomic_max
+ l.nop
+ l.j _atomic_min
+ l.nop
+
DISABLE_INTERRUPTS(r17,r19)
l.lwz r29,0(r4)
l.lwz r27,0(r5)
@@ -1129,4 +1159,90 @@ ENTRY(sys_or1k_atomic)
l.jr r9
l.or r11,r0,r0
+ DISABLE_INTERRUPTS(r17,r19)
+ l.lwz r29,0(r4)
+ l.sfeq r29,r5
+ l.bnf _atomic_cmpxchg_done
+ l.nop
+
+ l.sw 0(r4),r6
+ ENABLE_INTERRUPTS(r17)
+ l.jr r9
+ l.or r11,r29,r0
+
+ DISABLE_INTERRUPTS(r17,r19)
+ l.lwz r29,0(r4)
+ l.sw 0(r4),r5
+ ENABLE_INTERRUPTS(r17)
+ l.jr r9
+ l.or r11,r29,r0
+
+ DISABLE_INTERRUPTS(r17,r19)
+ l.lwz r29,0(r4)
+ l.add r27,r29,r5
+ l.sw 0(r4),r27
+ ENABLE_INTERRUPTS(r17)
+ l.jr r9
+ l.or r11,r29,r0
+
+ DISABLE_INTERRUPTS(r17,r19)
+ l.lwz r29,0(r4)
+ l.sfgtsi r29,0
+ l.bnf _atomic_decpos_done
+ l.addi r27,r29,-1
+
+ l.sw 0(r4),r27
+ ENABLE_INTERRUPTS(r17)
+ l.jr r9
+ l.or r11,r29,r0
+
+ DISABLE_INTERRUPTS(r17,r19)
+ l.lwz r29,0(r4)
+ l.and r27,r29,r5
+ l.sw 0(r4),r27
+ ENABLE_INTERRUPTS(r17)
+ l.jr r9
+ l.or r11,r29,r0
+
+ DISABLE_INTERRUPTS(r17,r19)
+ l.lwz r29,0(r4)
+ l.or r27,r29,r5
+ l.sw 0(r4),r27
+ ENABLE_INTERRUPTS(r17)
+ l.jr r9
+ l.or r11,r29,r0
+
+ DISABLE_INTERRUPTS(r17,r19)
+ l.lwz r29,0(r4)
+ l.sfltu r29,r5
+ l.bnf _atomic_max_done
+ l.nop
+
+ l.sw 0(r4),r5
+ ENABLE_INTERRUPTS(r17)
+ l.jr r9
+ l.or r11,r29,r0
+
+ DISABLE_INTERRUPTS(r17,r19)
+ l.lwz r29,0(r4)
+ l.sfgtu r29,r5
+ l.bnf _atomic_min_done
+ l.nop
+
+ l.sw 0(r4),r5
+ ENABLE_INTERRUPTS(r17)
+ l.jr r9
+ l.or r11,r29,r0
/* ============================================================[ EOF ]=== */
--
1.7.10.4
Christian Svensson
2014-01-25 15:42:36 UTC
Permalink
OpenRISC does not offer any way to do atomic operations in user space.
In order to provide atomic operations we need to implement these
operations in kernel space.

These operations disables preemption by disabling interrupts while
executing their critical sections.

The following operations has been added:
SWAP: Atomically swap the values in pointers 1 and 2.
CMPXCHG: Writes new to *mem if *mem == old. Returns old *mem.
XCHG: Store NEW in *MEM and return the old value.
ADD: Add VAL to *MEM and return the old value of *MEM.
DECPOS: Decrement *MEM if it is > 0, and return the old value.
AND: Atomically *mem &= mask and return the old value of *mem.
OR: Atomically *mem |= mask and return the old value of *mem.
UMAX: If *mem < val, set *mem = max. Returns old value of *mem.
UMIN: If *mem > val, set *mem = min. Returns old value of *mem.

These operations have been chosen to satisfy the need of atomic.h in
glibc. While it is possible to only use CMPXCHG, the glibc
implementation of the others use a do-while loop to retry the operation
until it succeeds. In order to offer linear performance for these
operations they are therefore implemented in the syscall.

Signed-off-by: Christian Svensson <blue at cmd.nu>
---
arch/openrisc/include/uapi/asm/unistd.h | 41 ++++++++++
arch/openrisc/kernel/entry.S | 136 ++++++++++++++++++++++++++++---
2 files changed, 167 insertions(+), 10 deletions(-)

diff --git a/arch/openrisc/include/uapi/asm/unistd.h
b/arch/openrisc/include/uapi/asm/unistd.h
index ce40b71..13eb792 100644
--- a/arch/openrisc/include/uapi/asm/unistd.h
+++ b/arch/openrisc/include/uapi/asm/unistd.h
@@ -25,5 +25,46 @@

#include <asm-generic/unistd.h>

+/*
+ * uint32 sys_or1k_atomic(uint32 op, ...)
+ *
+ * uint32 sys_or1k_atomic(SWAP, void *ptr1, void *ptr2)
+ * Atomically swap the values in pointers 1 and 2.
+ *
+ * uint32 sys_or1k_atomic(CMPXCHG, uint32 *mem, uint32 old, uint32 new)
+ * Writes new to *mem if *mem == old. Returns old *mem.
+ *
+ * uint32 sys_or1k_atomic(XCHG, uint32 *mem, uint32 new)
+ * Store NEW in *MEM and return the old value.
+ *
+ * uint32 sys_or1k_atomic(ADD, uint32 *mem, uint32 val)
+ * Add VAL to *MEM and return the old value of *MEM.
+ *
+ * uint32 sys_or1k_atomic(DECPOS, uint32 *mem)
+ * Decrement *MEM if it is > 0, and return the old value.
+ *
+ * uint32 sys_or1k_atomic(AND, uint32 *mem, uint32 mask)
+ * Atomically *mem &= mask and return the old value of *mem.
+ *
+ * uint32 sys_or1k_atomic(OR, uint32 *mem, uint32 mask)
+ * Atomically *mem |= mask and return the old value of *mem.
+ *
+ * uint32 sys_or1k_atomic(UMAX, uint32 *mem, uint32 max)
+ * If *mem < val, set *mem = max. Returns old value of *mem.
+ *
+ * uint32 sys_or1k_atomic(UMIN, uint32 *mem, uint32 min)
+ * If *mem > val, set *mem = min. Returns old value of *mem.
+ */
+#define SYS_OR1K_ATOMIC_SWAP 1
+#define SYS_OR1K_ATOMIC_CMPXCHG 2
+#define SYS_OR1K_ATOMIC_XCHG 3
+#define SYS_OR1K_ATOMIC_ADD 4
+#define SYS_OR1K_ATOMIC_DECPOS 5
+#define SYS_OR1K_ATOMIC_AND 6
+#define SYS_OR1K_ATOMIC_OR 7
+#define SYS_OR1K_ATOMIC_UMAX 8
+#define SYS_OR1K_ATOMIC_UMIN 9
+#define SYS_OR1K_ATOMIC_COUNT 10
+
#define __NR_or1k_atomic __NR_arch_specific_syscall
__SYSCALL(__NR_or1k_atomic, sys_or1k_atomic)
diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S
index fec8bf9..7d16c0f2 100644
--- a/arch/openrisc/kernel/entry.S
+++ b/arch/openrisc/kernel/entry.S
@@ -1106,20 +1106,50 @@ ENTRY(sys_rt_sigreturn)

/* This is a catch-all syscall for atomic instructions for the OpenRISC 1000.
* The functions takes a variable number of parameters depending on which
- * particular flavour of atomic you want... parameter 1 is a flag identifying
- * the atomic in question. Currently, this function implements the
- * following variants:
+ * particular flavour of atomic you want.
+ * Parameter 1 is a flag identifying the atomic in question.
*
- * XCHG:
- * @flag: 1
- * @ptr1:
- * @ptr2:
- * Atomically exchange the values in pointers 1 and 2.
+ * TODO: Flag value 0 can be used, and it is recommended that the next
+ * atomic instruction to be implemented uses it. Currently it is a no-op.
*
+ * See arch/openrisc/include/uapi/asm/unistd.h for usage.
*/
-
ENTRY(sys_or1k_atomic)
- /* FIXME: This ignores r3 and always does an XCHG */
+ /* TODO: validate mem ptr(s) */
+ l.sfgeui r3,SYS_OR1K_ATOMIC_COUNT
+ l.bf _atomic_invalid
+ l.movhi r19,hi(atomic_table)
+ l.ori r19,r19,lo(atomic_table)
+ l.slli r3,r3,3
+ l.add r19,r19,r3
+ l.jr r19
+ l.nop
+_atomic_invalid:
+ l.jr r9
+ l.nop
+atomic_table:
+ l.jr r9
+ l.nop
+ l.j _atomic_swap
+ l.nop
+ l.j _atomic_cmpxchg
+ l.nop
+ l.j _atomic_xchg
+ l.nop
+ l.j _atomic_add
+ l.nop
+ l.j _atomic_decpos
+ l.nop
+ l.j _atomic_and
+ l.nop
+ l.j _atomic_or
+ l.nop
+ l.j _atomic_max
+ l.nop
+ l.j _atomic_min
+ l.nop
+
+_atomic_swap:
DISABLE_INTERRUPTS(r17,r19)
l.lwz r29,0(r4)
l.lwz r27,0(r5)
@@ -1129,4 +1159,90 @@ ENTRY(sys_or1k_atomic)
l.jr r9
l.or r11,r0,r0

+_atomic_cmpxchg:
+ DISABLE_INTERRUPTS(r17,r19)
+ l.lwz r29,0(r4)
+ l.sfeq r29,r5
+ l.bnf _atomic_cmpxchg_done
+ l.nop
+
+ l.sw 0(r4),r6
+_atomic_cmpxchg_done:
+ ENABLE_INTERRUPTS(r17)
+ l.jr r9
+ l.or r11,r29,r0
+
+_atomic_xchg:
+ DISABLE_INTERRUPTS(r17,r19)
+ l.lwz r29,0(r4)
+ l.sw 0(r4),r5
+ ENABLE_INTERRUPTS(r17)
+ l.jr r9
+ l.or r11,r29,r0
+
+_atomic_add:
+ DISABLE_INTERRUPTS(r17,r19)
+ l.lwz r29,0(r4)
+ l.add r27,r29,r5
+ l.sw 0(r4),r27
+ ENABLE_INTERRUPTS(r17)
+ l.jr r9
+ l.or r11,r29,r0
+
+_atomic_decpos:
+ DISABLE_INTERRUPTS(r17,r19)
+ l.lwz r29,0(r4)
+ l.sfgtsi r29,0
+ l.bnf _atomic_decpos_done
+ l.addi r27,r29,-1
+
+ l.sw 0(r4),r27
+_atomic_decpos_done:
+ ENABLE_INTERRUPTS(r17)
+ l.jr r9
+ l.or r11,r29,r0
+
+_atomic_and:
+ DISABLE_INTERRUPTS(r17,r19)
+ l.lwz r29,0(r4)
+ l.and r27,r29,r5
+ l.sw 0(r4),r27
+ ENABLE_INTERRUPTS(r17)
+ l.jr r9
+ l.or r11,r29,r0
+
+_atomic_or:
+ DISABLE_INTERRUPTS(r17,r19)
+ l.lwz r29,0(r4)
+ l.or r27,r29,r5
+ l.sw 0(r4),r27
+ ENABLE_INTERRUPTS(r17)
+ l.jr r9
+ l.or r11,r29,r0
+
+_atomic_max:
+ DISABLE_INTERRUPTS(r17,r19)
+ l.lwz r29,0(r4)
+ l.sfltu r29,r5
+ l.bnf _atomic_max_done
+ l.nop
+
+ l.sw 0(r4),r5
+_atomic_max_done:
+ ENABLE_INTERRUPTS(r17)
+ l.jr r9
+ l.or r11,r29,r0
+
+_atomic_min:
+ DISABLE_INTERRUPTS(r17,r19)
+ l.lwz r29,0(r4)
+ l.sfgtu r29,r5
+ l.bnf _atomic_min_done
+ l.nop
+
+ l.sw 0(r4),r5
+_atomic_min_done:
+ ENABLE_INTERRUPTS(r17)
+ l.jr r9
+ l.or r11,r29,r0
/* ============================================================[ EOF ]=== */
--
1.7.10.4
Loading...