Quantcast

[Bug 217138] head (e.g.) -r313783 sh vs. jemalloc asserts: include/jemalloc/internal/tsd.h:687: Failed assertion: "tsd_booted"

classic Classic list List threaded Threaded
35 messages Options
12
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[Bug 217138] head (e.g.) -r313783 sh vs. jemalloc asserts: include/jemalloc/internal/tsd.h:687: Failed assertion: "tsd_booted"

bugzilla-noreply
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=217138

            Bug ID: 217138
           Summary: head (e.g.) -r313783 sh vs. jemalloc asserts:
                    include/jemalloc/internal/tsd.h:687: Failed assertion:
                    "tsd_booted"
           Product: Base System
           Version: CURRENT
          Hardware: amd64
                OS: Any
            Status: New
          Severity: Affects Only Me
          Priority: ---
         Component: bin
          Assignee: [hidden email]
          Reporter: [hidden email]
                CC: [hidden email]
                CC: [hidden email]

For head -r313783 I built with a production arm64 kernel
but world without MALLOC_PRODUCTION . I intermittently
get the following sort of thing when, for example, I use
^z to put a process in the background and to get back
to the shell --or quitting a program and getting back to
the shell. The context involves already having been
su'd to root. I can not cause the crash on demand: it
is intermittent and fairly rare so far.

[Note: This was found while trying to track down why sh
fails sometimes during buildworld on a pine64 when
world was built with MALLOC_PRODUCTION.]

<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"

(lldb) bt
* thread #1: tid = 100164, 0x0000000040554e18 libc.so.7`_thr_kill + 8, name =
'sh', stop reason = signal SIGABRT
  * frame #0: 0x0000000040554e18 libc.so.7`_thr_kill + 8
    frame #1: 0x0000000040554ddc libc.so.7`__raise(s=6) + 64 at raise.c:52
    frame #2: 0x0000000040554d50 libc.so.7`abort + 84 at abort.c:65
    frame #3: 0x0000000040528790 libc.so.7`__je_tsd_fetch [inlined]
__je_tsd_get + 248 at tsd.h:687
    frame #4: 0x000000004052876c libc.so.7`__je_tsd_fetch [inlined]
__je_tsd_fetch_impl(init=true) at tsd.h:692
    frame #5: 0x000000004052876c libc.so.7`__je_tsd_fetch + 212 at tsd.h:717
    frame #6: 0x0000000040550214 libc.so.7`ialloc_body(size=11,
zero=<unavailable>, tsdn=0x0000ffffffffe650, usize=0x0000ffffffffe648,
slow_path=true) + 56 at jemalloc_jemalloc.c:1586
    frame #7: 0x0000000040550184 libc.so.7`__malloc(size=1) + 184 at
jemalloc_jemalloc.c:1645
    frame #8: 0x000000000041126c sh`ckmalloc(nbytes=<unavailable>) + 32 at
memalloc.c:61
    frame #9: 0x000000000041bb6c sh`setvar(name=<unavailable>,
val=<unavailable>, flags=<unavailable>) + 176 at var.c:256
    frame #10: 0x0000000000406bf4 sh`evalcommand(cmd=<unavailable>,
flags=<unavailable>, backcmd=<unavailable>) + 3468 at eval.c:1180
    frame #11: 0x0000000000405570 sh`evaltree(n=0x0000000040ab9060,
flags=<unavailable>) + 212 at eval.c:290
    frame #12: 0x000000000041105c sh`cmdloop(top=<unavailable>) + 252 at
main.c:231
    frame #13: 0x0000000000410ed0 sh`main(argc=<unavailable>,
argv=<unavailable>) + 660 at main.c:178
    frame #14: 0x0000000000402f30 sh`__start + 360
    frame #15: 0x0000000040434658 ld-elf.so.1`.rtld_start + 24 at
rtld_start.S:41
(lldb) up 10
frame #10: 0x0000000000406bf4 sh`evalcommand(cmd=<unavailable>,
flags=<unavailable>, backcmd=<unavailable>) + 3468 at eval.c:1180
   1177
   1178 out:
   1179         if (lastarg)
-> 1180                 setvar("_", lastarg, 0);
   1181         if (do_clearcmdentry)
   1182                 clearcmdentry();
   1183 }

Unless tsd_booted has been trashed it would appear that
tsd_boot0() never happened before the attempted setvar
above indirectly tries the __je_tsd_get. Supporting
details from the source code:

/usr/src/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h
establishes:

#define JEMALLOC_MALLOC_THREAD_CLEANUP
#define JEMALLOC_TLS

which is context that is needed when looking things up.

/* malloc_tsd_externs(). */
#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
#define malloc_tsd_externs(a_name, a_type)                              \
extern __thread a_type  a_name##tsd_tls;                                \
extern __thread bool    a_name##tsd_initialized;                        \
extern bool             a_name##tsd_booted;
. . .
#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer)          \
. . .                                    \
a_attr bool             a_name##tsd_booted = false;
. . .

#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,         \
    a_cleanup)                                                          \
. . .
a_name##tsd_boot0(void)                                                 \
{                                                                       \
                                                                        \
        if (a_cleanup != malloc_tsd_no_cleanup) {                       \
                malloc_tsd_cleanup_register(                            \
                    &a_name##tsd_cleanup_wrapper);                      \
        }                                                               \
        a_name##tsd_booted = true;                                      \
        return (false);                                                 \
}                                                                       \
. . .
a_attr bool                                                             \
a_name##tsd_boot(void)                                                  \
{                                                                       \
                                                                        \
        return (a_name##tsd_boot0());                                   \
}                                                                       \
. . .
/* Get/set. */                                                          \
a_attr a_type *                                                         \
a_name##tsd_get(bool init)                                              \
{                                                                       \
                                                                        \
        assert(a_name##tsd_booted);                                     \
        return (&a_name##tsd_tls);                                      \
}                                                                       \
a_attr void                                                             \
a_name##tsd_set(a_type *val)                                            \
{                                                                       \
                                                                        \
        assert(a_name##tsd_booted);                                     \
        a_name##tsd_tls = (*val);                                       \
        if (a_cleanup != malloc_tsd_no_cleanup)                         \
                a_name##tsd_initialized = true;                         \
}
. . .

#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_))
malloc_tsd_externs(, tsd_t)
malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup)

. . .

tsd_t *
malloc_tsd_boot0(void)
{
        tsd_t *tsd;

        ncleanups = 0;
        if (tsd_boot0())
                return (NULL);
        tsd = tsd_fetch();
        *tsd_arenas_tdata_bypassp_get(tsd) = true;
        return (tsd);
}
. . .
static bool
malloc_init_hard(void)
{
. . .
        tsd = malloc_tsd_boot0();
. . .

JEMALLOC_ALWAYS_INLINE_C bool
malloc_init(void)
{

        if (unlikely(!malloc_initialized()) && malloc_init_hard())
                return (true);
        malloc_thread_init();

        return (false);
}      
. . .
typedef enum {
        malloc_init_uninitialized       = 3,
        malloc_init_a0_initialized      = 2,
        malloc_init_recursible          = 1,
        malloc_init_initialized         = 0 /* Common case --> jnz. */
} malloc_init_t;
static malloc_init_t    malloc_init_state = malloc_init_uninitialized;
. . .
JEMALLOC_ALWAYS_INLINE_C bool
malloc_initialized(void)
{

        return (malloc_init_state == malloc_init_initialized);
}

--
You are receiving this mail because:
You are on the CC list for the bug.
_______________________________________________
[hidden email] mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to "[hidden email]"
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[Bug 217138] head (e.g.) -r313783 sh vs. jemalloc asserts: include/jemalloc/internal/tsd.h:687: Failed assertion: "tsd_booted"

bugzilla-noreply
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=217138

--- Comment #1 from Mark Millard <[hidden email]> ---
(In reply to Mark Millard from comment #0)
It turns out that the sh failure during buildworld
also gets to __je_tsd_get (but a different way) and
then fails the same assertion for "tsd_booted":

<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"

A back trace is:

(lldb) bt
* thread #1: tid = 100194, 0x0000000040554e18 libc.so.7`_thr_kill + 8, name =
'sh', stop reason = signal SIGABRT
  * frame #0: 0x0000000040554e18 libc.so.7`_thr_kill + 8
    frame #1: 0x0000000040554ddc libc.so.7`__raise(s=6) + 64 at raise.c:52
    frame #2: 0x0000000040554d50 libc.so.7`abort + 84 at abort.c:65
    frame #3: 0x0000000040528790 libc.so.7`__je_tsd_fetch [inlined]
__je_tsd_get + 248 at tsd.h:687
    frame #4: 0x000000004052876c libc.so.7`__je_tsd_fetch [inlined]
__je_tsd_fetch_impl(init=true) at tsd.h:692
    frame #5: 0x000000004052876c libc.so.7`__je_tsd_fetch + 212 at tsd.h:717
    frame #6: 0x0000000040550cc0 libc.so.7`__free(ptr=0x0000000040a17720) + 64
at jemalloc_jemalloc.c:2011
    frame #7: 0x0000000000411328 sh`ckfree(p=<unavailable>) + 32 at
memalloc.c:88
    frame #8: 0x0000000000407cd8 sh`clearcmdentry + 76 at exec.c:505
    frame #9: 0x0000000000406bfc sh`evalcommand(cmd=<unavailable>,
flags=<unavailable>, backcmd=<unavailable>) + 3476 at eval.c:1182
    frame #10: 0x0000000000405570 sh`evaltree(n=0x0000000040a1cde8,
flags=<unavailable>) + 212 at eval.c:290
    frame #11: 0x000000000041105c sh`cmdloop(top=<unavailable>) + 252 at
main.c:231
    frame #12: 0x0000000000410ed0 sh`main(argc=<unavailable>,
argv=<unavailable>) + 660 at main.c:178
    frame #13: 0x0000000000402f30 sh`__start + 360
    frame #14: 0x0000000040434658 ld-elf.so.1`.rtld_start + 24 at
rtld_start.S:41

It appears that setvar was not used but clearcmdentry
(indirectly) gets the same sort of problem when this
happens:

(lldb) up 9
frame #9: 0x0000000000406bfc sh`evalcommand(cmd=<unavailable>,
flags=<unavailable>, backcmd=<unavailable>) + 3476 at eval.c:1182
   1179         if (lastarg)
   1180                 setvar("_", lastarg, 0);
   1181         if (do_clearcmdentry)
-> 1182                 clearcmdentry();
   1183 }

--
You are receiving this mail because:
You are on the CC list for the bug.
_______________________________________________
[hidden email] mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to "[hidden email]"
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[Bug 217138] head (e.g.) -r313783 sh vs. jemalloc asserts: include/jemalloc/internal/tsd.h:687: Failed assertion: "tsd_booted"

bugzilla-noreply
In reply to this post by bugzilla-noreply
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=217138

--- Comment #2 from Mark Millard <[hidden email]> ---
If one is going to look into this in a amd64
context it is important to be using head -r313772
or later in order to avoid fork sometimes not
preserving the stack pointer on the child-process
side of things --at least if experimenting with
port or buildworld buildkernel builds as a means
of testing.

Getting past that stack pointer problem is what
allowed me to see this problem during build
activity, which started me down this exploration.

[My tests for aborting in sh`forkshell if fork changes
the stack pointer are still in place but there have
been no failures so far.]

--
You are receiving this mail because:
You are on the CC list for the bug.
_______________________________________________
[hidden email] mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to "[hidden email]"
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[Bug 217138] head (e.g.) -r313783 sh vs. jemalloc asserts: include/jemalloc/internal/tsd.h:687: Failed assertion: "tsd_booted"

bugzilla-noreply
In reply to this post by bugzilla-noreply
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=217138

--- Comment #3 from Mark Millard <[hidden email]> ---
Before starting a round of updates to a newer version of head
I got a couple of sh core dumps that showed the same sort of
failures. But in these I'd added recording the pid that was
returned by fork.

The result was that both the core dumps were from the parent-
process side of the most recent fork.

[It will be a bit before I'm done with updating to a newer
version of head.]

--
You are receiving this mail because:
You are on the CC list for the bug.
_______________________________________________
[hidden email] mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to "[hidden email]"
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[Bug 217138] head (e.g.) -r313783 sh vs. jemalloc asserts: include/jemalloc/internal/tsd.h:687: Failed assertion: "tsd_booted"

bugzilla-noreply
In reply to this post by bugzilla-noreply
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=217138

--- Comment #4 from Mark Millard <[hidden email]> ---
I got a somewhat different trace back this time:

(lldb) bt
* thread #1: tid = 100105, 0x0000000040554e18 libc.so.7`_thr_kill + 8, name =
'sh', stop reason = signal SIGABRT
  * frame #0: 0x0000000040554e18 libc.so.7`_thr_kill + 8
    frame #1: 0x0000000040554ddc libc.so.7`__raise(s=6) + 64 at raise.c:52
    frame #2: 0x0000000040554d50 libc.so.7`abort + 84 at abort.c:65
    frame #3: 0x0000000040528790 libc.so.7`__je_tsd_fetch [inlined]
__je_tsd_get + 248 at tsd.h:687
    frame #4: 0x000000004052876c libc.so.7`__je_tsd_fetch [inlined]
__je_tsd_fetch_impl(init=true) at tsd.h:692
    frame #5: 0x000000004052876c libc.so.7`__je_tsd_fetch + 212 at tsd.h:717
    frame #6: 0x0000000040550cc0 libc.so.7`__free(ptr=0x0000000040a390e0) + 64
at jemalloc_jemalloc.c:2011
    frame #7: 0x0000000000411330 sh`ckfree(p=<unavailable>) + 32 at
memalloc.c:88
    frame #8: 0x000000000040e6c8 sh`freejob(jp=<unavailable>) + 108 at
jobs.c:475
    frame #9: 0x000000000040dd0c sh`waitforjob(jp=<unavailable>,
origstatus=<unavailable>) + 352 at jobs.c:1066
    frame #10: 0x0000000000406988 sh`evalcommand(cmd=<unavailable>,
flags=<unavailable>, backcmd=<unavailable>) + 2848 at eval.c:1166
    frame #11: 0x0000000000405570 sh`evaltree(n=0x0000000040ab9060,
flags=<unavailable>) + 212 at eval.c:290
    frame #12: 0x0000000000411064 sh`cmdloop(top=<unavailable>) + 252 at
main.c:231
    frame #13: 0x0000000000410ed8 sh`main(argc=<unavailable>,
argv=<unavailable>) + 660 at main.c:178
    frame #14: 0x0000000000402f30 sh`__start + 360
    frame #15: 0x0000000040434658 ld-elf.so.1`.rtld_start + 24 at
rtld_start.S:41

(lldb) up 10
frame #10: 0x0000000000406988 sh`evalcommand(cmd=<unavailable>,
flags=<unavailable>, backcmd=<unavailable>) + 2848 at eval.c:1166
   1163 parent: /* parent process gets here (if we forked) */
   1164         if (mode == FORK_FG) {  /* argument to fork */
   1165                 INTOFF;
-> 1166                 exitstatus = waitforjob(jp, &realstatus);
   1167                 INTON;
   1168                 if (iflag && loopnest > 0 && WIFSIGNALED(realstatus)) {
   1169                         evalskip = SKIPBREAK;

--
You are receiving this mail because:
You are on the CC list for the bug.
_______________________________________________
[hidden email] mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to "[hidden email]"
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[Bug 217138] head (e.g.) -r313783 sh vs. jemalloc asserts: include/jemalloc/internal/tsd.h:687: Failed assertion: "tsd_booted"

bugzilla-noreply
In reply to this post by bugzilla-noreply
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=217138

Mark Millard <[hidden email]> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
           Hardware|amd64                       |arm64
                 CC|                            |[hidden email]

--- Comment #5 from Mark Millard <[hidden email]> ---
[Note: the kernels after -r313266 are messed up
for at least PowerMac so-called "Quad-Core"s so
the below is based on a -r313864 buildworld and
a -r313266 buildkernel because I had to revert
the kernel to about the last that would avoid
randomly timed panics.]

I did a buildworld of -r313864 for powerpc64
without having MALLOC_PRODUCTION defined and
installed and booted with it in order to see
if powerpc64 had the same sort of problem that
aarch64 is having with jemalloc, in
particular during self-hosted buildworld
buildkernel activity (my test case).

powerpc64 worked fine with no aborts. So. . .

include/jemalloc/internal/tsd.h:687: Failed assertion: "tsd_booted"

seems to be aarch64 (arm64) specific in some way.

--
You are receiving this mail because:
You are on the CC list for the bug.
_______________________________________________
[hidden email] mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to "[hidden email]"
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[Bug 217138] head (e.g.) -r313864 for arm64: sh vs. jemalloc asserts: include/jemalloc/internal/tsd.h:687: Failed assertion: "tsd_booted"

bugzilla-noreply
In reply to this post by bugzilla-noreply
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=217138

Mark Millard <[hidden email]> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
            Summary|head (e.g.) -r313783 sh vs. |head (e.g.) -r313864 for
                   |jemalloc asserts:           |arm64: sh vs. jemalloc
                   |include/jemalloc/internal/t |asserts:
                   |sd.h:687: Failed assertion: |include/jemalloc/internal/t
                   |"tsd_booted"                |sd.h:687: Failed assertion:
                   |                            |"tsd_booted"

--
You are receiving this mail because:
You are on the CC list for the bug.
_______________________________________________
[hidden email] mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to "[hidden email]"
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[Bug 217138] head (e.g.) -r313864 for arm64: sh vs. jemalloc asserts: include/jemalloc/internal/tsd.h:687: Failed assertion: "tsd_booted"

bugzilla-noreply
In reply to this post by bugzilla-noreply
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=217138

--- Comment #6 from Mark Millard <[hidden email]> ---
See also bugzilla 217239 for another type of arm64
specific problem with jemalloc without
MALLOC_PRODUCTION.

--
You are receiving this mail because:
You are on the CC list for the bug.
_______________________________________________
[hidden email] mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to "[hidden email]"
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[Bug 217138] head (e.g.) -r313864 for arm64: sh vs. jemalloc asserts: include/jemalloc/internal/tsd.h:687: Failed assertion: "tsd_booted"

bugzilla-noreply
In reply to this post by bugzilla-noreply
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=217138

--- Comment #7 from Mark Millard <[hidden email]> ---
The following describes a reproducible sequence
in my context, unfortunately involving hours of
buildworld activity. It fails every time that I
have tried it and at the same places each time.
I give a contrast to a working context as well.

Context: doing buildworld buidlkernel on a pine64+ with
2 GiBytes of RAM. Multiple head revisions, most recently:

# uname -apKU
FreeBSD pine64 12.0-CURRENT FreeBSD 12.0-CURRENT  r313999M  arm64 aarch64
1200021 1200021

The pine64 is running what was a cross build that had
MALLOC_PRODUCTION not defined. (Unlike my usual
way of building.)

Problem: sh core dumps via failing an assert. (script core dumps
as well for other reasons for one stage but I'm focused on the
earliest failures for now: sh failures.)

The following happens when I buildworld buildkernel on the
pine64+ using:

WITH_CLANG=
WITH_CLANG_IS_CC=
WITH_CLANG_FULL=
WITH_CLANG_EXTRAS=
WITH_LLD=
WITH_LLDB=

but not when using:

WITHOUT_CLANG=
WITHOUT_CLANG_IS_CC=
WITHOUT_CLANG_FULL=
WITHOUT_CLANG_EXTRAS=
WITHOUT_LLD=
WITHOUT_LLDB=

(The rest being the same, starting after using cleanworld
in both cases.)

But note that the first failures happen long after the
those have built what they contribute to the _generic_libs
stage. (I have not yet tried isolating subsets.) Similarly
for the later 2nd stage: well after "everything" did its
llvm related activity.

I've tried the failing case under both:

2 GiBytes RAM + 3 GiBytes swap
and:
2 GiBytes RAM + 6 GiBytes swap

It made no difference and there have been no messages
about running out of swap space or other forms of resource
limitation based process killing or the like.


From sysutils/DTraceToolkit 's /usr/local/share/dtrace-toolkit/execsnoop :

. . .
2017 Feb 22 16:37:02     0  61019  61018 make install DIRPRFX=lib/libusb/\0
2017 Feb 22 16:37:02     0  61020  61019 sh -e\0
2017 Feb 22 16:37:02     0  61021  61019 sh -e\0
2017 Feb 22 16:37:02     0  61022  61019 sh -e\0
2017 Feb 22 16:37:02     0  61023  61020 sh /usr/src/tools/install.sh -C -o
root -g wheel -m 444 libusb.a
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/lib/\0
2017 Feb 22 16:37:02     0  61024  61021 sh /usr/src/tools/install.sh -o root
-g wheel -m 444 /usr/src/lib/libusb/libusb-0.1.pc
/usr/src/lib/libusb/libusb-1.0.pc /usr/src/lib/libusb/libusb-2.0.pc
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/libdata/pkgconfig/\0
2017 Feb 22 16:37:02     0  61025  61022 sh /usr/src/tools/install.sh -C -o
root -g wheel -m 444 /usr/src/lib/libusb/libusb20.h
/usr/src/lib/libusb/libusb20_desc.h /usr/src/lib/libusb/usb.h
/usr/src/lib/libusb/libusb.h
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/include/\0
2017 Feb 22 16:37:02     0  61023  61020 sh /usr/src/tools/install.sh -C -o
root -g wheel -m 444 libusb.a
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/lib/\0
2017 Feb 22 16:37:02     0  61023  61020 install -p libusb.a
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/lib/\0
2017 Feb 22 16:37:02     0  61024  61021 sh /usr/src/tools/install.sh -o root
-g wheel -m 444 /usr/src/lib/libusb/libusb-0.1.pc
/usr/src/lib/libusb/libusb-1.0.pc /usr/src/lib/libusb/libusb-2.0.pc
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/libdata/pkgconfig/\0
2017 Feb 22 16:37:02     0  61025  61022 sh /usr/src/tools/install.sh -C -o
root -g wheel -m 444 /usr/src/lib/libusb/libusb20.h
/usr/src/lib/libusb/libusb20_desc.h /usr/src/lib/libusb/usb.h
/usr/src/lib/libusb/libusb.h
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/include/\0
2017 Feb 22 16:37:02     0  61024  61021 install -p
/usr/src/lib/libusb/libusb-0.1.pc /usr/src/lib/libusb/libusb-1.0.pc
/usr/src/lib/libusb/libusb-2.0.pc
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/libdata/pkgconfig/\0
2017 Feb 22 16:37:02     0  61025  61022 install -p
/usr/src/lib/libusb/libusb20.h /usr/src/lib/libusb/libusb20_desc.h
/usr/src/lib/libusb/usb.h /usr/src/lib/libusb/libusb.h
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/include/\0
2017 Feb 22 16:37:02     0  61026  61020 sh /usr/src/tools/install.sh -s -o
root -g wheel -m 444 libusb.so.3
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/lib/\0
2017 Feb 22 16:37:02     0  61026  61020 sh /usr/src/tools/install.sh -s -o
root -g wheel -m 444 libusb.so.3
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/lib/\0
2017 Feb 22 16:37:02     0  61026  61020 install -p libusb.so.3
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/lib/\0
2017 Feb 22 16:37:02     0  61027  61020 sh /usr/src/tools/install.sh -o root
-g wheel -m 444 libusb.so.3.debug
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/lib/debug/usr/lib/\0
2017 Feb 22 16:37:02     0  61027  61020 sh /usr/src/tools/install.sh -o root
-g wheel -m 444 libusb.so.3.debug
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/lib/debug/usr/lib/\0
2017 Feb 22 16:37:02     0  61027  61020 install -p libusb.so.3.debug
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/lib/debug/usr/lib/\0
2017 Feb 22 16:37:02     0  61028  61020 sh /usr/src/tools/install.sh -l rs
libusb.so.3 /usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/lib/libusb.so\0
2017 Feb 22 16:37:02     0  61029  61028 ln -fsn libusb.so.3
/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp/usr/lib/libusb.so\0

(That last was it for the build.) That is the end of the exec
activity for the _generic_libs part of the build (and since the
build stops: the last for the build overall).

(The below ps -daux output is from some time before the problem happened
but with later, related core files listed as well.)

root   91353  0.0  0.1   6856   1500 u0  I+   10:28     0:00.02     `-- /bin/sh
/root/sys_build_scripts.pine64-host/make_pine64_nodebug_clang_bootstrap-pine64-host.sh
-j 4 buildworld buildkernel
root   91356  0.0  0.1   6204   1560 u0  S+   10:28     0:06.59       `--
script
/root/sys_typescripts/typescript_make_pine64_nodebug_clang_bootstrap-pine64-host-2017-02-22:10:28:28
env __MAKE_CONF=/
-rw-------  1 root  wheel  4657152 Feb 22 16:37:04 2017 script.91356.core
(from: ls -ltTU)

root   91357  0.0  0.0   4948    204  1  Ss+  10:28     0:01.87         `--
make -j 4 buildworld buildkernel
root   91373  0.0  0.1   6856   1500  1  I    10:28     0:00.01           `--
sh -ev
-rw-------  1 root  wheel  4702208 Feb 22 16:37:03 2017 sh.91373.core
(from: ls -ltTU)

root   91374  0.0  0.0   4948    204  1  S    10:28     0:01.69             `--
make -m /usr/src/share/mk -f Makefile.inc1 TARGET=arm64 TARGET_ARCH=aarch64
buildworld
root   10803  0.0  0.1   6856   1500  1  I    10:43     0:00.01              
`-- sh -ev
-rw-------  1 root  wheel  4702208 Feb 22 16:37:02 2017 sh.10803.core
(from: ls -ltTU)

root   10804  0.0  0.0   4948    200  1  S    10:43     3:00.18                
`-- make -f Makefile.inc1
DESTDIR=/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp -DNO_FSCHG MK_HTML=no
-DNO_LINT MK_MA
root   10811  0.0  0.1   6856   1500  1  I    10:43     0:00.01                
  `-- sh -ev
root   38075  0.0  0.0   4948    204  1  S    11:14     0:00.75                
    `-- make -f Makefile.inc1 _generic_libs
root   38085  0.0  0.1   6856   1500  1  I    11:14     0:00.01                
      `-- sh -ev
. . .

"shutdown -r now" here makes no difference to the below
when done after the reboot.

(Of course there is some llvm related build activity during the
"everything" stage below.)

Doing another buildworld buildkernel to continue the build results
in:

. . .
2017 Feb 22 18:48:46     0  51772  51454 sh -e\0
2017 Feb 22 18:48:46     0  51773  51772 sed -E s,(^| |B|`)svn,\\1svnlite,g
/usr/src/contrib/subversion/subversion/svn/svn.1\0
2017 Feb 22 18:48:46     0  51774  51454 sh -e\0
2017 Feb 22 18:48:46     0  51775  51774 gzip -cn svnlite.1\0
2017 Feb 22 18:48:48     0  51776  51454 sh -e\0
2017 Feb 22 18:48:48     0  51777  51776 \0
2017 Feb 22 18:48:48     0  51778  51777 \0
2017 Feb 22 18:48:49     0  51779  51454 sh -e\0
2017 Feb 22 18:48:49     0  51780  51779 /usr/local/aarch64-freebsd/bin/objcopy
--only-keep-debug svnlite.full svnlite.debug\0
2017 Feb 22 18:48:50     0  51781  51454 sh -e\0
2017 Feb 22 18:48:50     0  51782  51781 /usr/local/aarch64-freebsd/bin/objcopy
--strip-debug --add-gnu-debuglink=svnlite.debug svnlite.full svnlite\0

The above is the end of the "everything" exec activity
but before the buildworld_epilogue (that does not happen).
Again it is the last exec activity for the build because the
build stops.

(Again ps -daux from sometime before the failure mixed
with core file ls -ltTU information below:)

root   61122   0.0  0.1   6856   1500 u0  I+   17:13     0:00.01     `--
/bin/sh
/root/sys_build_scripts.pine64-host/make_pine64_nodebug_clang_bootstrap-pine64-host.sh
-j 4 buildworld buildkernel
root   61125   0.0  0.1   6204   1560 u0  S+   17:13     0:09.56       `--
script
/root/sys_typescripts/typescript_make_pine64_nodebug_clang_bootstrap-pine64-host-2017-02-22:17:13:45
env __MAKE_CONF=
root   61126   0.0  0.0   4948    204  1  Ss+  17:13     0:02.36         `--
make -j 4 buildworld buildkernel
root   61142   0.0  0.1   6856   1500  1  I    17:13     0:00.01           `--
sh -ev
-rw-------  1 root  wheel  4702208 Feb 22 18:48:51 2017 sh.61142.core

root   61143   0.0  0.0   4948    204  1  S    17:13     0:02.08            
`-- make -m /usr/src/share/mk -f Makefile.inc1 TARGET=arm64 TARGET_ARCH=aarch64
buildworld
root   81104   0.0  0.1   6856   1500  1  I    17:19     0:00.01              
`-- sh -ev
-rw-------  1 root  wheel  4702208 Feb 22 18:48:50 2017 sh.81104.core

root   81105   0.0  0.0   4948    220  1  S    17:19     0:02.57              
 `-- make -f Makefile.inc1
DESTDIR=/usr/obj/pine64_clang/arm64.aarch64/usr/src/tmp all
root   13358   0.0  0.1   6856   1500  1  I    17:49     0:00.01              
   |-- sh -e
. . .

(Yep: script does  not core dump for this 2nd stage context.)

A 3rd buildworld buildkernel finishes the build, with buildworld
being essentially a large no-op and then doing the buildkernel.



Context details:

# more ~/src.configs/make.conf
CFLAGS.gcc+= -v

(But this was not a gcc based build.)

# more ~/src.configs/src.conf.pine64-clang-bootstrap.pine64-host
TO_TYPE=aarch64
TOOLS_TO_TYPE=${TO_TYPE}
#
KERNCONF=GENERIC-NODBG
TARGET=arm64
.if ${.MAKE.LEVEL} == 0
TARGET_ARCH=${TO_TYPE}
.export TARGET_ARCH
.endif
#
#WITH_CROSS_COMPILER=
WITH_SYSTEM_COMPILER=
#
#CPUTYPE=soft
WITH_LIBCPLUSPLUS=
WITHOUT_BINUTILS_BOOTSTRAP=
WITHOUT_ELFTOOLCHAIN_BOOTSTRAP=
#WITHOUT_CLANG_BOOTSTRAP=
WITH_CLANG=
WITH_CLANG_IS_CC=
WITH_CLANG_FULL=
WITH_CLANG_EXTRAS=
WITH_LLD=
WITH_LLDB=
#
WITH_BOOT=
WITHOUT_LIB32=
WITHOUT_LIBSOFT=
#
WITHOUT_GCC_BOOTSTRAP=
WITHOUT_GCC=
WITHOUT_GCC_IS_CC=
WITHOUT_GNUCXX=
#
NO_WERROR=
#WERROR=
MALLOC_PRODUCTION=
#
WITH_REPRODUCIBLE_BUILD=
WITH_DEBUG_FILES=
#
CROSS_BINUTILS_PREFIX=/usr/local/${TOOLS_TO_TYPE}-freebsd/bin/
AS=/usr/local/${TOOLS_TO_TYPE}-freebsd/bin/as
AR=/usr/local/${TOOLS_TO_TYPE}-freebsd/bin/ar
LD=/usr/local/${TOOLS_TO_TYPE}-freebsd/bin/ld
NM=/usr/local/${TOOLS_TO_TYPE}-freebsd/bin/nm
OBJCOPY=/usr/local/${TOOLS_TO_TYPE}-freebsd/bin/objcopy
OBJDUMP=/usr/local/${TOOLS_TO_TYPE}-freebsd/bin/objdump
RANLIB=/usr/local/${TOOLS_TO_TYPE}-freebsd/bin/ranlib
SIZE=/usr/local/${TOOLS_TO_TYPE}-freebsd/bin/size
STRINGS=/usr/local/${TOOLS_TO_TYPE}-freebsd/bin/strings
.export AS
.export AR
.export LD
.export NM
.export OBJCOPY
.export OBJDUMP
.export RANLIB
.export SIZE
.export STRINGS


# svnlite status /usr/src/ | sort
?       /usr/src/sys/amd64/conf/GENERIC-DBG
?       /usr/src/sys/amd64/conf/GENERIC-NODBG
?       /usr/src/sys/arm/conf/BPIM3-DBG
?       /usr/src/sys/arm/conf/BPIM3-NODBG
?       /usr/src/sys/arm/conf/RPI2-DBG
?       /usr/src/sys/arm/conf/RPI2-NODBG
?       /usr/src/sys/arm64/conf/GENERIC-DBG
?       /usr/src/sys/arm64/conf/GENERIC-NODBG
?       /usr/src/sys/powerpc/conf/GENERIC64vtsc-DBG
?       /usr/src/sys/powerpc/conf/GENERIC64vtsc-NODBG
?       /usr/src/sys/powerpc/conf/GENERICvtsc-DBG
?       /usr/src/sys/powerpc/conf/GENERICvtsc-NODBG
M       /usr/src/bin/sh/jobs.c
M       /usr/src/bin/sh/miscbltin.c
M       /usr/src/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
M       /usr/src/contrib/llvm/tools/lld/ELF/Target.cpp
M       /usr/src/lib/csu/powerpc64/Makefile
M       /usr/src/libexec/rtld-elf/Makefile
M       /usr/src/sys/arm/arm/gic.c
M       /usr/src/sys/boot/ofw/Makefile.inc
M       /usr/src/sys/boot/powerpc/Makefile.inc
M       /usr/src/sys/boot/powerpc/kboot/Makefile
M       /usr/src/sys/boot/uboot/Makefile.inc
M       /usr/src/sys/conf/Makefile.powerpc
M       /usr/src/sys/conf/kmod.mk
M       /usr/src/sys/ddb/db_main.c
M       /usr/src/sys/ddb/db_script.c
M       /usr/src/sys/powerpc/ofw/ofw_machdep.c

The . . ./conf/*-*DBG files include the standard files
and then make adjustments to have a production
style kernel build, including the arm64 case.

Below the first two files are as they were used to isolate
forks' original lack of preserving the sp value for the
child process side when interrupts happen. (Since fixed
in head but not in stable/11 last I looked.)

# svnlite diff /usr/src/bin/sh/jobs.c /usr/src/bin/sh/miscbltin.c
/usr/src/sys/arm/arm/gic.c
Index: /usr/src/bin/sh/jobs.c
===================================================================
--- /usr/src/bin/sh/jobs.c      (revision 313999)
+++ /usr/src/bin/sh/jobs.c      (working copy)
@@ -51,6 +51,9 @@
 #include <stdlib.h>
 #include <unistd.h>

+/* JUST FOR TESTING */
+#include <stdint.h>
+
 #include "shell.h"
 #if JOBS
 #include <termios.h>
@@ -833,6 +836,13 @@
  * in a pipeline).
  */

+extern uintptr_t example_stack_address(void);
+
+uintptr_t stack_address_before_fork = 0;
+uintptr_t stack_address_after_fork = 0;
+
+pid_t pid_from_fork = -1;
+
 pid_t
 forkshell(struct job *jp, union node *n, int mode)
 {
@@ -845,7 +855,10 @@
        if (mode == FORK_BG && (jp == NULL || jp->nprocs == 0))
                checkzombies();
        flushall();
-       pid = fork();
+       stack_address_before_fork = example_stack_address();
+       pid_from_fork = pid = fork();
+       stack_address_after_fork  = example_stack_address();
+       if (stack_address_after_fork != stack_address_before_fork) abort();
        if (pid == -1) {
                TRACE(("Fork failed, errno=%d\n", errno));
                INTON;
@@ -946,7 +959,6 @@
        return pid;
 }

-
 pid_t
 vforkexecshell(struct job *jp, char **argv, char **envp, const char *path, int
idx, int pip[2])
 {
Index: /usr/src/bin/sh/miscbltin.c
===================================================================
--- /usr/src/bin/sh/miscbltin.c (revision 313999)
+++ /usr/src/bin/sh/miscbltin.c (working copy)
@@ -64,6 +64,15 @@

 #undef eflag

+
+/* JUST FOR TESTING */
+uintptr_t example_stack_address(void)
+{
+    volatile uintptr_t test = 0;
+    return (uintptr_t)(void*)&test;
+}
+
+
 int readcmd(int, char **);
 int umaskcmd(int, char **);
 int ulimitcmd(int, char **);
Index: /usr/src/sys/arm/arm/gic.c
===================================================================
--- /usr/src/sys/arm/arm/gic.c  (revision 313999)
+++ /usr/src/sys/arm/arm/gic.c  (working copy)
@@ -672,9 +672,13 @@

        if (irq >= sc->nirqs) {
 #ifdef GIC_DEBUG_SPURIOUS
+#define EXPECTED_SPURIOUS_IRQ 1023
+           if (irq != EXPECTED_SPURIOUS_IRQ) {
                device_printf(sc->gic_dev,
-                   "Spurious interrupt detected: last irq: %d on CPU%d\n",
+                   "Spurious interrupt %d detected of %d: last irq: %d on
CPU%d\n",
+                   irq, sc->nirqs,
                    sc->last_irq[PCPU_GET(cpuid)], PCPU_GET(cpuid));
+            }
 #endif
                return (FILTER_HANDLED);
        }
@@ -720,6 +724,16 @@
        if (irq < sc->nirqs)
                goto dispatch_irq;

+       if (irq != EXPECTED_SPURIOUS_IRQ) {
+#undef EXPECTED_SPURIOUS_IRQ
+#ifdef GIC_DEBUG_SPURIOUS
+               device_printf(sc->gic_dev,
+                   "Spurious end interrupt %d detected of %d: last irq: %d on
CPU%d\n",
+                   irq, sc->nirqs,
+                   sc->last_irq[PCPU_GET(cpuid)], PCPU_GET(cpuid));
+#endif
+       }
+
        return (FILTER_HANDLED);
 }


The gic.c change just avoids getting uninteresting spurious
interrupt messages on the console.

Other changes are generally tied to my powerpc64 and powerpc
investigations.

--
You are receiving this mail because:
You are on the CC list for the bug.
_______________________________________________
[hidden email] mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to "[hidden email]"
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[Bug 217138] head (e.g.) -r313864 for arm64: sh vs. jemalloc asserts: include/jemalloc/internal/tsd.h:687: Failed assertion: "tsd_booted"

bugzilla-noreply
In reply to this post by bugzilla-noreply
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=217138

--- Comment #8 from Mark Millard <[hidden email]> ---
(In reply to Mark Millard from comment #7)

I should have explicitly mentioned that the
processes that get the failures were created
and initialized long before the clang/llvm
related parts of the build occur, even though
the failures occur after some of the potential
clang/llvm build activity.

--
You are receiving this mail because:
You are on the CC list for the bug.
_______________________________________________
[hidden email] mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to "[hidden email]"
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[Bug 217138] head (e.g.) -r313864 for arm64: sh vs. jemalloc asserts: include/jemalloc/internal/tsd.h:687: Failed assertion: "tsd_booted"

bugzilla-noreply
In reply to this post by bugzilla-noreply
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=217138

--- Comment #9 from Mark Millard <[hidden email]> ---
(In reply to Mark Millard from comment #8)

A note on script failures:

I'll note that the script failures are
part of the activity reported in bugzilla
217239, where for script the failures look
in some respects like the powerd and su
failures reported there: they are not
td_booted asserts like reported here.

Still there might be a common cause since
the script failures are repeatable as
reported here in 217138.

--
You are receiving this mail because:
You are on the CC list for the bug.
_______________________________________________
[hidden email] mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to "[hidden email]"
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[Bug 217138] head (e.g.) -r313864 for arm64: sh vs. jemalloc asserts: include/jemalloc/internal/tsd.h:687: Failed assertion: "tsd_booted" once swapped in after being swapped out (comment 10)

bugzilla-noreply
In reply to this post by bugzilla-noreply
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=217138

Mark Millard <[hidden email]> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
            Summary|head (e.g.) -r313864 for    |head (e.g.) -r313864 for
                   |arm64: sh vs. jemalloc      |arm64: sh vs. jemalloc
                   |asserts:                    |asserts:
                   |include/jemalloc/internal/t |include/jemalloc/internal/t
                   |sd.h:687: Failed assertion: |sd.h:687: Failed assertion:
                   |"tsd_booted"                |"tsd_booted" once swapped
                   |                            |in after being swapped out
                   |                            |(comment 10)

--- Comment #10 from Mark Millard <[hidden email]> ---
I've reduced the testing context to the following
type of example (no longer involving buildworld
buildkernel):

# sh
# sh
# sh
. . .
# sh

(So a bunch of nested sh's in an ssh session that
will mostly be swapped out. I happened to have
done this under 2 users for the example material
below: on I did an su first. Plus I had another
session without such a nesting but with an su.)

After forcing these to mostly swap out (see below)
I use ^D to exit a currently interactive one. Then
the rest of the swapped out ones from the nesting
get the tsd_booted failure.

So, for example:

# stress -m 1 --vm-bytes 1536M

The context is a PINE64+ with 2GB of RAM. The above
was enough in my context to cause the needed swapouts:

# ps -aOuser,flags
  PID USER          F TT  STAT    TIME COMMAND
  688 root       4102 u0  IWs  0:00.00 login [pam] (login)
  689 root   10004002 u0  I+   0:00.06 -sh (sh)
72611 markmi     4002  0  IWs  0:00.00 -sh (sh)
72613 markmi     4002  0  IW   0:00.00 sh
72614 markmi     4002  0  IW   0:00.00 sh
72615 markmi     4002  0  IW   0:00.00 sh
72616 markmi     4002  0  IW   0:00.00 sh
72617 markmi     4002  0  IW   0:00.00 sh
72618 markmi     4002  0  IW   0:00.00 sh
72619 markmi     4002  0  IW   0:00.00 sh
72620 markmi     4002  0  IW   0:00.00 sh
72621 markmi     4002  0  IW   0:00.00 sh
72622 markmi     4002  0  IW   0:00.00 sh
72623 markmi     4002  0  IW   0:00.00 sh
72624 markmi     4002  0  IW   0:00.00 sh
72625 markmi     4002  0  IW   0:00.00 sh
72626 markmi 10004002  0  I+   0:00.01 sh
  167 markmi     4002  1  IWs  0:00.00 -sh (sh)
  169 root       4102  1  IW   0:00.00 su
  170 root       4002  1  IW   0:00.00 su (sh)
  171 root       4002  1  IW   0:00.00 sh
  172 root       4002  1  IW   0:00.00 sh
  173 root       4002  1  IW   0:00.00 sh
  174 root       4002  1  IW   0:00.00 sh
  175 root       4002  1  IW   0:00.00 sh
  176 root       4002  1  IW   0:00.00 sh
  177 root       4002  1  IW   0:00.00 sh
  178 root       4002  1  IW   0:00.00 sh
  179 root   10004002  1  I+   0:00.01 sh
60961 root       4002  2  IW   0:00.00 stress -m 1 --vm-bytes 1536M
60962 root   10000002  2  R    0:29.41 stress -m 1 --vm-bytes 1536M
60964 root   10004002  2  R+   0:00.01 ps -aOuser,flags
82389 markmi     4002  2  IWs  0:00.00 -sh (sh)
82391 root       4102  2  IW   0:00.00 su
82392 root   10004002  2  S    0:00.22 su (sh)

So with that swapped out context established:

(The markmi user case:)

$ ^D<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"
<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"
<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"
<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"
<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"
<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"
<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"
<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"
<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"
<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"
<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"
<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"
<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"
<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"


(The su'd user case:)

# ^D<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"
<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"
<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"
<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"
<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"
<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"
<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"
<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"
<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"
<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"

The one without the deeply nested sh's (but an su):

# exit
<jemalloc>: /usr/src/contrib/jemalloc/include/jemalloc/internal/tsd.h:687:
Failed assertion: "tsd_booted"

--
You are receiving this mail because:
You are on the CC list for the bug.
_______________________________________________
[hidden email] mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to "[hidden email]"
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[Bug 217138] head (e.g.) -r313864 for arm64: sh vs. jemalloc asserts: include/jemalloc/internal/tsd.h:687: Failed assertion: "tsd_booted" once swapped in after being swapped out (comment 10)

bugzilla-noreply
In reply to this post by bugzilla-noreply
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=217138

--- Comment #11 from Mark Millard <[hidden email]> ---
(In reply to Mark Millard from comment #10)

I'll note that the sh.*.core files show that
__je_tsd_booted and a very large area around
it is all zero. (I've been seeing this for a
long time but until the swapped-out-before
discovery I had little information about
when the big block of zeros showed up.)

Some possibilities:

A) The memory was not restored at all.

B) Incorrect content was restored (possibly
   incorrect content was swapped out earlier).

C) The memory and caches and such were
   not forced to be coherent in one or both
   directions of transfer.

There may be other things that could be involved.

--
You are receiving this mail because:
You are on the CC list for the bug.
_______________________________________________
[hidden email] mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to "[hidden email]"
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[Bug 217138] head (e.g.) -r313999 for arm64: sh vs. jemalloc asserts: include/jemalloc/internal/tsd.h:687: Failed assertion: "tsd_booted" once swapped in after being swapped out (comment 10)

bugzilla-noreply
In reply to this post by bugzilla-noreply
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=217138

Mark Millard <[hidden email]> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
            Summary|head (e.g.) -r313864 for    |head (e.g.) -r313999 for
                   |arm64: sh vs. jemalloc      |arm64: sh vs. jemalloc
                   |asserts:                    |asserts:
                   |include/jemalloc/internal/t |include/jemalloc/internal/t
                   |sd.h:687: Failed assertion: |sd.h:687: Failed assertion:
                   |"tsd_booted" once swapped   |"tsd_booted" once swapped
                   |in after being swapped out  |in after being swapped out
                   |(comment 10)                |(comment 10)

--
You are receiving this mail because:
You are on the CC list for the bug.
_______________________________________________
[hidden email] mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to "[hidden email]"
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[Bug 217138] head (e.g.) -r313999 for arm64: sh vs. jemalloc asserts: include/jemalloc/internal/tsd.h:687: Failed assertion: "tsd_booted" once swapped in after being swapped out (comment 10)

bugzilla-noreply
In reply to this post by bugzilla-noreply
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=217138

Mark Millard <[hidden email]> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
           Severity|Affects Only Me             |Affects Some People

--- Comment #12 from Mark Millard <[hidden email]> ---
Other folks on the lists have reported getting the
Failed assertion: "tsd_booted" notices.

So I'm changing the Importance field to track that
now that a fairly simple, quick way to reproduce the
problem is known.

(Of course my results do involve only the equipment
that I have access to: one PINE64+, powered USB hub,
USB SSD, heat-sink, fan, case. So it is possible
that I'm assuming too much about reproduction based
on comments 10 and later.)

--
You are receiving this mail because:
You are on the CC list for the bug.
_______________________________________________
[hidden email] mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to "[hidden email]"
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[Bug 217138] head (e.g.) -r313999 for arm64: sh vs. jemalloc asserts: include/jemalloc/internal/tsd.h:687: Failed assertion: "tsd_booted" once swapped in after being swapped out (comment 10)

bugzilla-noreply
In reply to this post by bugzilla-noreply
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=217138

--- Comment #13 from Mark Millard <[hidden email]> ---
(In reply to Mark Millard from comment #12)

Here is an even stronger reproduction claim
from the lists (for an rpi3 instead of for a
pine64+):


I can confirm process corruption on RPi3 as of r313567.

--
Poul-Henning Kamp       | UNIX since Zilog Zeus 3.20
[hidden email]         | TCP/IP since RFC 956
FreeBSD committer       | BSD since 4.3-tahoe    
Never attribute to malice what can adequately be explained by incompetence.

--
You are receiving this mail because:
You are on the CC list for the bug.
_______________________________________________
[hidden email] mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to "[hidden email]"
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[Bug 217138] head (e.g.) -r313999 for arm64: sh vs. jemalloc asserts: include/jemalloc/internal/tsd.h:687: Failed assertion: "tsd_booted" once swapped in after being swapped out (comment 10)

bugzilla-noreply
In reply to this post by bugzilla-noreply
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=217138

--- Comment #14 from Mark Millard <[hidden email]> ---
(In reply to Mark Millard from comment #11)

On reading about FreeBSD's swapping I see that
swap-in does certain essentials, including setting
up to demand-page most of the restored material
if/when referenced.

Thus another possibility for the problem is:

D) The configuration to enable this demand-paging
   might be incorrect (possibly incomplete?) such
   that some of demand paging does not happen.

--
You are receiving this mail because:
You are on the CC list for the bug.
_______________________________________________
[hidden email] mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to "[hidden email]"
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[Bug 217138] head (e.g.) -r313999 for arm64: sh vs. jemalloc asserts: include/jemalloc/internal/tsd.h:687: Failed assertion: "tsd_booted" once swapped in after being swapped out (comment 10)

bugzilla-noreply
In reply to this post by bugzilla-noreply
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=217138

--- Comment #15 from Mark Millard <[hidden email]> ---
(In reply to Mark Millard from comment #10)

If I try just after booting

stress -m 1 --vm-bytes 1800M

is more like what I need to cause the
swap-outs. 1700M is too little in that
context.


Also:

Trying a debug kernel got the same results with
only one notice: a vfs_bio.c bufwait vs.
ufs_dirhash.c dirhash lock order reversal that
likely is a well known one.

(I guess the LOR is from writing core files into
/var/crash/ .)

--
You are receiving this mail because:
You are on the CC list for the bug.
_______________________________________________
[hidden email] mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to "[hidden email]"
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[Bug 217138] head (e.g.) -r313999 for arm64: sh vs. jemalloc asserts: include/jemalloc/internal/tsd.h:687: Failed assertion: "tsd_booted" once swapped in after being swapped out (comment 10)

bugzilla-noreply
In reply to this post by bugzilla-noreply
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=217138

--- Comment #16 from Mark Millard <[hidden email]> ---
(In reply to Mark Millard from comment #15)

The problem only seems to occur when top shows
RES 0K and a positive SWAP for the failing sh
process before it executes.

When top shows both a positive RES and a positive
SWAP before the execution the sh process does not
die.

For example the following two did not die:

  PID USERNAME    THR PRI NICE   SIZE    RES   SWAP STATE   C   TIME     CPU
COMMAND
11167 root          1  20    0  6880K   908K  1216K ttyin   0   0:00   0.00% su
(sh)
. . .
11144 root          1  20    0  6880K   568K  1556K ttyin   2   0:00   0.00% su
(sh)

--
You are receiving this mail because:
You are on the CC list for the bug.
_______________________________________________
[hidden email] mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to "[hidden email]"
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[Bug 217138] head (e.g.) -r313999 for arm64: sh vs. jemalloc asserts: include/jemalloc/internal/tsd.h:687: Failed assertion: "tsd_booted" once swapped in after being swapped out (comment 10)

bugzilla-noreply
In reply to this post by bugzilla-noreply
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=217138

--- Comment #17 from Mark Millard <[hidden email]> ---
https://lists.freebsd.org/pipermail/freebsd-arm/2017-March/015869.html

shows an about 110 line C program that shows data
corruption for fork-then-swap-out-then-swap-in.

The known-corrupted data is a memory region that was
malloc'd and that passed its byte pattern test before
the fork.

I'll also add the source as an attachment.

--
You are receiving this mail because:
You are on the CC list for the bug.
_______________________________________________
[hidden email] mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to "[hidden email]"
12
Loading...