Monday, October 18, 2021

to be improved

 The gml4gtk graph viewer has several kwown issue to be fixed and is a development version, but nothing is wrong or to worry about and nobody cares. so be it. for example, below is a gml graph of the lemon parser generator tool with edge labels which is to be improved.
/*
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *  
 *  SPDX-License-Identifier: GPL-3.0+
 *  License-Filename: LICENSE
 */
THE PARSER SOURCE CODE IN THIS PROGRAM DOES NOT HAVE THE GNU BISON SPECIAL
Free Software Foundation EXCEPTION AND IS GNU GPL FREE SOFTWARE VERSION 3+
Because of this the whole program is GNU GPL Free Software version 3+
This is a development version with several known issues to be fixed by now


Thursday, September 30, 2021

windows programs crash faster then linux programs

 windows programs crash faster then linux programs because of very low stackspace in windos
This difference is that windows programs crash 8 times or more faster then the Linux version.
The stackspace of a program is limited when the max memory usage depends on ram memory and disk swap space.

This does not depend on programming language and cannot be fixed using rust programming even with their optimistic propaganda.

The initial stack size can be updated in the Linux kernel without problems and is undefined in standards then a program has a problem when it wants to be portable or probe the stack space and adjust itself.

The 8mb linux stackspace limit can be hit for example with the gcc compiler when compiling big source.

one reason was the recursive splay in splay-tree.c at
https://gcc.gnu.org/git/?p=gcc.git;a=blob;f=libiberty/splay-tree.c;h=7c8973c63c8fead8e1363f4f42a5f686fb16ac8c;hb=HEAD

The comment says:  "it doesn't toast the stack for large trees."

/* A splay-tree datatype.  
   Copyright (C) 1998-2021 Free Software Foundation, Inc.
   Contributed by Mark Mitchell (mark@markmitchell.com).

This file is part of GNU CC.
   
GNU CC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.

GNU CC is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License
along with GNU CC; see the file COPYING.  If not, write to
the Free Software Foundation, 51 Franklin Street - Fifth Floor,
Boston, MA 02110-1301, USA.  */

/* For an easily readable description of splay-trees, see:

     Lewis, Harry R. and Denenberg, Larry.  Data Structures and Their
     Algorithms.  Harper-Collins, Inc.  1991.  */


/* Deallocate NODE (a member of SP), and all its sub-trees.  */

static void
splay_tree_delete_helper (splay_tree sp, splay_tree_node node)
{
  splay_tree_node pending = 0;
  splay_tree_node active = 0;

  if (!node)
    return;

#define KDEL(x)  if (sp->delete_key) (*sp->delete_key)(x);
#define VDEL(x)  if (sp->delete_value) (*sp->delete_value)(x);

  KDEL (node->key);
  VDEL (node->value);

  /* We use the "key" field to hold the "next" pointer.  */
  node->key = (splay_tree_key)pending;
  pending = (splay_tree_node)node;

  /* Now, keep processing the pending list until there aren't any
     more.  This is a little more complicated than just recursing, but
     it doesn't toast the stack for large trees.  */

  while (pending)
    {
      active = pending;
      pending = 0;
      while (active)
    {
      splay_tree_node temp;

      /* active points to a node which has its key and value
         deallocated, we just need to process left and right.  */

      if (active->left)
        {
          KDEL (active->left->key);
          VDEL (active->left->value);
          active->left->key = (splay_tree_key)pending;
          pending = (splay_tree_node)(active->left);
        }
      if (active->right)
        {
          KDEL (active->right->key);
          VDEL (active->right->value);
          active->right->key = (splay_tree_key)pending;
          pending = (splay_tree_node)(active->right);
        }

      temp = active;
      active = (splay_tree_node)(temp->key);
      (*sp->deallocate) ((char*) temp, sp->allocate_data);
    }
    }
#undef KDEL
#undef VDEL
}

/* Deallocate SP.  */

void
splay_tree_delete (splay_tree sp)
{
  splay_tree_delete_helper (sp, sp->root);
  (*sp->deallocate) ((char*) sp, sp->allocate_data);
}

similar fix is in foreach()

/* Call FN, passing it the DATA, for every node below NODE, all of
   which are from SP, following an in-order traversal.  If FN every
   returns a non-zero value, the iteration ceases immediately, and the
   value is returned.  Otherwise, this function returns 0.  */

static int
splay_tree_foreach_helper (splay_tree_node node,
                           splay_tree_foreach_fn fn, void *data)
{
  int val;

  splay_tree_node *sn; /* start */
  splay_tree_node *stack; /* array with nodes */
  int stack_ptr, stack_size;

  /* A non-recursive implementation is used to avoid filling the stack
     for large trees.  Splay trees are worst case O(n) in the depth of
     the tree.  */

#define INITIAL_STACK_SIZE 100
  stack_size = INITIAL_STACK_SIZE;
  stack_ptr = 0;
  stack = XNEWVEC (splay_tree_node, stack_size);
  val = 0;

  for (;;)
    {
      /* first collect the pointers of the nodes in a memory array[] */
      while (node != NULL)
    {
      if (stack_ptr == stack_size)
        {
          stack_size *= 2; /* this factor 2 can cause unexpected high memory use at once and may allocate too much memory */
          stack = XRESIZEVEC (splay_tree_node, stack, stack_size);
        }
      stack[stack_ptr++] = node;
      node = node->left;
    }

    /* see more below for test program and doing this can save 1.3 Gb with 256 million nodes. */
      /* also possible to first count number of needed entries

     val = 0;

      sn = node; /* save start */
      while (node != NULL)
    {
          val++;
          node = node->left;
        }      
        if (val) {

          if val > stack_size) {

            stack_size = val;
          stack = XRESIZEVEC (splay_tree_node, stack, stack_size);

         }

         stack_ptr = 0; 

         /* now copy the exact number of node data */

 

          while (sn != 0)

          {

 stack[stack_ptr++] = sn;
      sn = sn->left;

           }


        }

      maybe correct now or improve and send to https://gcc.gnu.org/
      */
 

    /* stop if no entries in stack[] */

      if (stack_ptr == 0)
    break;


      node = stack[--stack_ptr];
      /* this runs specified function with the node */
      val = (*fn) (node, data);
      if (val)
    break;

      node = node->right;
    }

  XDELETEVEC (stack);
  return val;
}

/* Call FN, passing it the DATA, for every node in SP, following an
   in-order traversal.  If FN every returns a non-zero value, the
   iteration ceases immediately, and the value is returned.
   Otherwise, this function returns 0.  */

int
splay_tree_foreach (splay_tree sp, splay_tree_foreach_fn fn, void *data)
{
  return splay_tree_foreach_helper (sp->root, fn, data);
}

and XRESIZEVEC() is a realloc() with risk it could try to alloc too much memory at once because of the *2 factor it can be even safer to first count the needed entries and then do a malloc()

The older version used recursive routines which could casue stack smashing

/* Deallocate NODE (a member of SP), and all its sub-trees.  */

static void
splay_tree_delete_helper (sp, node)
     splay_tree sp;
     splay_tree_node node;
{
  if (!node)
    return;

  /* this recurses and may cause stack toasting */
  splay_tree_delete_helper (sp, node->left);
  splay_tree_delete_helper (sp, node->right);

  if (sp->delete_key)
    (*sp->delete_key)(node->key);
  if (sp->delete_value)
    (*sp->delete_value)(node->value);

  free ((char*) node);
}

The gcc compiler has not option to warn about recursive or cycles of routines increasing stack space.

See also
https://ariadne.space/2021/06/25/understanding-thread-stack-sizes-and-how-alpine-is-different/

https://utcc.utoronto.ca/~cks/space/blog/unix/UnixAPIAndCRuntime
https://utcc.utoronto.ca/~cks/space/blog/programming/CStackSizeInvisible

This are the stacksizes on different systems
OS     Process Stack Size     Thread Stack Size
Darwin (macOS, iOS, etc)     8 MiB     512 KiB
FreeBSD     8 MiB     2 MiB
OpenBSD (before 4.6)     8 MiB     64 KiB
OpenBSD (4.6 and later)     8 MiB     512 KiB
Windows     1 MiB     1 MiB
Alpine 3.10 and older     8 MiB     80 KiB
Alpine 3.11 and newer     8 MiB     128 KiB
GNU/Linux     8 MiB     8 MiB

Now most data structure libs and routines have often recursive routines and without warning so if this can be issue check those routines manually.

For graph layout the recursive routines in the algorithms as dfs() can be done without recursion if the program needs to run with very big graph data.

also python uses realloc() with a bug it can crash because of uncontrolled unexpected too much memory use and maybe not all realloc() bugs in python are fixed. see also https://bugs.python.org/issue26415

the gnulib obstack routines have also a realloc() issue

simply scan all source for realloc() and fix those bugs with the question "can this realloc() cause not needed high memory usage suddenly?"

Can only run gcc splay-tree.c with 280 million nodes and changing foreach() can save much memory, and a version without realloc() see the test program here:

This does use all ram and disk swap space on test computer reaching 370 million nodes with improved foreach() and it does not crash GNU/Linux or other programs

update: newer version with added new splay routine reaches 423 million nodes on same Linux test computer.

and no memory leaks even at 16G+ memory usage.

At a test on a GNU/Linux server with 128 GB it can handle a GCC splay tree with 3384 million splay nodes without crashes and no memory leak using a new foreach()

This is the compact callgraph of splay-tree.c using gml4gtk graph viewer



/* GNU/Linux splay tree test program based on oct 2021 gcc version
 * this may use all ram and disk swap then the programs stops but GNU/Linux does not crash or crashe other programs
 * https://gcc.gnu.org/git/?p=gcc.git;a=blob;f=libiberty/splay-tree.c;h=7c8973c63c8fead8e1363f4f42a5f686fb16ac8c;hb=HEAD
 * int stack_ptr in foreach() allows only splay tree with max size of 2G, should be size_t
 * splay_tree_xmalloc_allocate(int size, void *data ATTRIBUTE_UNUSED) should use size_t size
 * 1000*1000*280 are 280 million splay tree nodes hits already a limit
 * splay_tree_foreach() is the gcc original
 * splay_tree_foreach2() is the memory saving version
 * splay_tree_foreach3() does not use realloc()
 * used compiler settings from airbus aerospace
 * see https://github.com/airbus-seclab/c-compiler-security
 * AIRBUS_GCC_COMPILER_WARNING="$CFLAGS -O2 -Wall -Wextra -Wpedantic -Wformat=2 -Wformat-overflow=2 -Wformat-truncation=2 -Wformat-security -Wnull-dereference -Wstack-protector -Wtrampolines -Walloca -Wvla -Warray-bounds=2 -Wimplicit-fallthrough=3  -Wshift-overflow=2 -Wcast-qual -Wstringop-overflow=4 -Wconversion -Warith-conversion -Wlogical-op -Wduplicated-cond -Wduplicated-branches -Wformat-signedness -Wshadow -Wstrict-overflow=4 -Wundef -Wstrict-prototypes -Wswitch-default -Wswitch-enum -Wstack-usage=1000000 -Wcast-align=strict -D_FORTIFY_SOURCE=2 -fstack-protector-strong -fstack-clash-protection -fPIE -Wl,-z,relro -Wl,-z,now -Wl,-z,noexecstack -Wl,-z,separate-code"
 * ./spt
 * testing old splay_tree_foreach()
 * status=0 10 tree nodes stack used max 100 entries using 0 megabyte 800 bytes 0 realloc()'s
 * testing old splay_tree_foreach()
 * status=0 1000 tree nodes stack used max 1600 entries using 0 megabyte 12800 bytes 4 realloc()'s
 * testing new splay_tree_foreach()
 * status=0 1000 tree nodes stack used max 1000 entries using 0 megabyte 8000 bytes 1 realloc()'s saved 0 Mb
 * testing old splay_tree_foreach()
 * status=0 100000000 tree nodes stack used max 104857600 entries using 800 megabyte 838860800 bytes 20 realloc()'s
 * testing new splay_tree_foreach()
 * status=0 100000000 tree nodes stack used max 100000000 entries using 762 megabyte 800000000 bytes 1 realloc()'s saved 37 Mb
 * testing old splay_tree_foreach()
 * status=0 280000000 tree nodes stack used max 419430400 entries using 3200 megabyte 3355443200 bytes 22 realloc()'s
 * testing new splay_tree_foreach()
 * status=0 280000000 tree nodes stack used max 280000000 entries using 2136 megabyte 2240000000 bytes 1 realloc()'s saved 1063 Mb
 * testing splay_tree_foreach() without realloc()
 * splay_tree_foreach3(): splay tree has 3000000 nodes
 * status=0 3000000 tree nodes stack used max 3000000 entries using 22 megabyte 24000000 bytes 0 realloc()'s
 * On debian Linux the limit is 280 million splay tree nodes
 * at more the program stops and is killed by some security software on debian Linux
 * testing old splay_tree_foreach()
 * status=0 350000000 tree nodes stack used max 419430400 entries using 3200 megabyte 3355443200 bytes 22 realloc()'s
 * testing new splay_tree_foreach()
 * status=0 350000000 tree nodes stack used max 350000000 entries using 2670 megabyte 2800000000 bytes 1 realloc()'s saved 529 Mb
 * On Fedora Linux the limit is 350 million splay tree nodes
 * at more the Fedora desktop causes a logout and the program stops
 * at using only the splay_tree_foreach2() ith less memory consumption
 * testing new splay_tree_foreach() with maximum test machine limit
 * status=0 370000000 tree nodes stack used max 370000000 entries using 2822 megabyte 2960000000 bytes 1 realloc()'s
 * At more all ram and all disk swap space is used on the test computer.
 * Now try this on WSL
 */

/* A splay-tree datatype.
   Copyright (C) 1998-2021 Free Software Foundation, Inc.
   Contributed by Mark Mitchell (mark@markmitchell.com).

This file is part of GNU CC.
   
GNU CC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.

GNU CC is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License
along with GNU CC; see the file COPYING.  If not, write to
the Free Software Foundation, 51 Franklin Street - Fifth Floor,
Boston, MA 02110-1301, USA.  */

/* For an easily readable description of splay-trees, see:

     Lewis, Harry R. and Denenberg, Larry.  Data Structures and Their
     Algorithms.  Harper-Collins, Inc.  1991.  */

/*
  SPDX-License-Identifier: GPL-3.0+
 */

/* orig
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_STRING_H
#include <string.h>
#endif

#include <stdio.h>

#include "libiberty.h"
#include "splay-tree.h"
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* needed for type unintptr_t or use long long int */
#include <stdint.h>

#define ATTRIBUTE_UNUSED /**/
/* how many stack entries used max 4 Giga */
static unsigned int maxstack = 0;

/* how many realloc() done */
static int nrealloc = 0;

/* Use typedefs for the key and data types to facilitate changing
   these types, if necessary.  These types should be sufficiently wide
   that any pointer or scalar can be cast to these types, and then
   cast back, without loss of precision.  */
typedef uintptr_t splay_tree_key;    /* 64bits unsigned int */
typedef uintptr_t splay_tree_value;

/* Forward declaration for a node in the tree.  */
typedef struct splay_tree_node_s *splay_tree_node;

/* The type of a function which compares two splay-tree keys.  The
   function should return values as for qsort.  */
typedef int (*splay_tree_compare_fn)(splay_tree_key, splay_tree_key);

/* The type of a function used to deallocate any resources associated
   with the key.  If you provide this function, the splay tree
   will take the ownership of the memory of the splay_tree_key arg
   of splay_tree_insert.  This function is called to release the keys
   present in the tree when calling splay_tree_delete or splay_tree_remove.
   If splay_tree_insert is called with a key equal to a key already
   present in the tree, the old key and old value will be released.  */
typedef void (*splay_tree_delete_key_fn)(splay_tree_key);

/* The type of a function used to deallocate any resources associated
   with the value.  If you provide this function, the memory of the
   splay_tree_value arg of splay_tree_insert is managed similarly to
   the splay_tree_key memory: see splay_tree_delete_key_fn.  */
typedef void (*splay_tree_delete_value_fn)(splay_tree_value);

/* The type of a function used to iterate over the tree.  */
typedef int (*splay_tree_foreach_fn)(splay_tree_node, void *);

/* The type of a function used to allocate memory for tree root and
   node structures.  The first argument is the number of bytes needed;
   the second is a data pointer the splay tree functions pass through
   to the allocator.  This function must never return zero.  */
/* old typedef void *(*splay_tree_allocate_fn)(int, void *); */
typedef void *(*splay_tree_allocate_fn)(size_t, void *);

/* The type of a function used to free memory allocated using the
   corresponding splay_tree_allocate_fn.  The first argument is the
   memory to be freed; the latter is a data pointer the splay tree
   functions pass through to the freer.  */
typedef void (*splay_tree_deallocate_fn)(void *, void *);

/* The nodes in the splay tree.  */
struct splay_tree_node_s {
    /* The key.  */
    splay_tree_key key;

    /* The value.  */
    splay_tree_value value;

    /* The left and right children, respectively.  */
    splay_tree_node left;
    splay_tree_node right;
};

/* The splay tree itself.  */
struct splay_tree_s {
    /* The root of the tree.  */
    splay_tree_node root;

    /* The comparision function.  */
    splay_tree_compare_fn comp;

    /* The deallocate-key function.  NULL if no cleanup is necessary.  */
    splay_tree_delete_key_fn delete_key;

    /* The deallocate-value function.  NULL if no cleanup is necessary.  */
    splay_tree_delete_value_fn delete_value;

    /* Node allocate function.  Takes allocate_data as a parameter. */
    splay_tree_allocate_fn allocate;

    /* Free function for nodes and trees.  Takes allocate_data as a parameter.  */
    splay_tree_deallocate_fn deallocate;

    /* Parameter for allocate/free functions.  */
    void *allocate_data;
};

typedef struct splay_tree_s *splay_tree;

/* these routines are here */
extern splay_tree splay_tree_new(splay_tree_compare_fn, splay_tree_delete_key_fn, splay_tree_delete_value_fn);
extern splay_tree splay_tree_new_with_allocator(splay_tree_compare_fn,
                        splay_tree_delete_key_fn,
                        splay_tree_delete_value_fn,
                        splay_tree_allocate_fn, splay_tree_deallocate_fn, void *);
extern splay_tree splay_tree_new_typed_alloc(splay_tree_compare_fn,
                         splay_tree_delete_key_fn,
                         splay_tree_delete_value_fn,
                         splay_tree_allocate_fn, splay_tree_allocate_fn, splay_tree_deallocate_fn, void *);
extern void splay_tree_delete(splay_tree);
extern splay_tree_node splay_tree_insert(splay_tree, splay_tree_key, splay_tree_value);
extern void splay_tree_remove(splay_tree, splay_tree_key);
extern splay_tree_node splay_tree_lookup(splay_tree, splay_tree_key);
extern splay_tree_node splay_tree_predecessor(splay_tree, splay_tree_key);
extern splay_tree_node splay_tree_successor(splay_tree, splay_tree_key);
extern splay_tree_node splay_tree_max(splay_tree);
extern splay_tree_node splay_tree_min(splay_tree);
extern int splay_tree_foreach(splay_tree, splay_tree_foreach_fn, void *);
extern int splay_tree_compare_ints(splay_tree_key, splay_tree_key);
extern int splay_tree_compare_pointers(splay_tree_key, splay_tree_key);
extern int splay_tree_compare_strings(splay_tree_key, splay_tree_key);
extern void splay_tree_delete_pointers(splay_tree_value);

/* old static void *splay_tree_xmalloc_allocate(int size, void *data ATTRIBUTE_UNUSED); */
static void *splay_tree_xmalloc_allocate(size_t size, void *data ATTRIBUTE_UNUSED);
static void splay_tree_xmalloc_deallocate(void *object, void *data ATTRIBUTE_UNUSED);

/* liberty.h Array allocators.  */

#define XALLOCAVEC(T, N)    ((T *) alloca (sizeof (T) * (N)))
#define XNEWVEC(T, N)        ((T *) xmalloc (sizeof (T) * (N)))
#define XCNEWVEC(T, N)        ((T *) xcalloc ((N), sizeof (T)))
#define XDUPVEC(T, P, N)    ((T *) xmemdup ((P), sizeof (T) * (N), sizeof (T) * (N)))
#define XRESIZEVEC(T, P, N)    ((T *) xrealloc ((void *) (P), sizeof (T) * (N)))
#define XDELETEVEC(P)        free ((void*) (P))

/* xmalloc substitute */
#define xmalloc(x) calloc((size_t)1,x)
#define xrealloc(p,n) realloc(p,n)

static void splay_tree_delete_helper(splay_tree, splay_tree_node);
static inline void rotate_left(splay_tree_node *, splay_tree_node, splay_tree_node);
static inline void rotate_right(splay_tree_node *, splay_tree_node, splay_tree_node);
static void splay_tree_splay(splay_tree, splay_tree_key);
static int splay_tree_foreach_helper(splay_tree_node, splay_tree_foreach_fn, void *);

/* Deallocate NODE (a member of SP), and all its sub-trees.  */

static void splay_tree_delete_helper(splay_tree sp, splay_tree_node node)
{
    splay_tree_node pending = 0;
    splay_tree_node active = 0;

    if (!node)
        return;

#define KDEL(x)  if (sp->delete_key) (*sp->delete_key)(x);
#define VDEL(x)  if (sp->delete_value) (*sp->delete_value)(x);

    KDEL(node->key);
    VDEL(node->value);

    /* We use the "key" field to hold the "next" pointer.  */
    node->key = (splay_tree_key) pending;
    pending = (splay_tree_node) node;

    /* Now, keep processing the pending list until there aren't any
       more.  This is a little more complicated than just recursing, but
       it doesn't toast the stack for large trees.  */

    while (pending) {
        active = pending;
        pending = 0;
        while (active) {
            splay_tree_node temp;

            /* active points to a node which has its key and value
               deallocated, we just need to process left and right.  */

            if (active->left) {
                KDEL(active->left->key);
                VDEL(active->left->value);
                active->left->key = (splay_tree_key) pending;
                pending = (splay_tree_node) (active->left);
            }
            if (active->right) {
                KDEL(active->right->key);
                VDEL(active->right->value);
                active->right->key = (splay_tree_key) pending;
                pending = (splay_tree_node) (active->right);
            }

            temp = active;
            active = (splay_tree_node) (temp->key);
            (*sp->deallocate) ((char *)temp, sp->allocate_data);
        }
    }
#undef KDEL
#undef VDEL
}

/* Rotate the edge joining the left child N with its parent P.  PP is the
   grandparents' pointer to P.  */

static inline void rotate_left(splay_tree_node * pp, splay_tree_node p, splay_tree_node n)
{
    splay_tree_node tmp;
    tmp = n->right;
    n->right = p;
    p->left = tmp;
    *pp = n;
}

/* Rotate the edge joining the right child N with its parent P.  PP is the
   grandparents' pointer to P.  */

static inline void rotate_right(splay_tree_node * pp, splay_tree_node p, splay_tree_node n)
{
    splay_tree_node tmp;
    tmp = n->left;
    n->left = p;
    p->right = tmp;
    *pp = n;
}

/* Bottom up splay of key.  */

static void splay_tree_splay(splay_tree sp, splay_tree_key key)
{
    if (sp->root == 0)
        return;

    do {
        int cmp1, cmp2;
        splay_tree_node n, c;

        n = sp->root;
        cmp1 = (*sp->comp) (key, n->key);

        /* Found.  */
        if (cmp1 == 0)
            return;

        /* Left or right?  If no child, then we're done.  */
        if (cmp1 < 0)
            c = n->left;
        else
            c = n->right;
        if (!c)
            return;

        /* Next one left or right?  If found or no child, we're done
           after one rotation.  */
        cmp2 = (*sp->comp) (key, c->key);
        if (cmp2 == 0 || (cmp2 < 0 && !c->left) || (cmp2 > 0 && !c->right)) {
            if (cmp1 < 0)
                rotate_left(&sp->root, n, c);
            else
                rotate_right(&sp->root, n, c);
            return;
        }

        /* Now we have the four cases of double-rotation.  */
        if (cmp1 < 0 && cmp2 < 0) {
            rotate_left(&n->left, c, c->left);
            rotate_left(&sp->root, n, n->left);
        } else if (cmp1 > 0 && cmp2 > 0) {
            rotate_right(&n->right, c, c->right);
            rotate_right(&sp->root, n, n->right);
        } else if (cmp1 < 0 && cmp2 > 0) {
            rotate_right(&n->left, c, c->right);
            rotate_left(&sp->root, n, n->left);
        } else if (cmp1 > 0 && cmp2 < 0) {
            rotate_left(&n->right, c, c->left);
            rotate_right(&sp->root, n, n->right);
        }
    }
    while (1);
}

/* Call FN, passing it the DATA, for every node below NODE, all of
   which are from SP, following an in-order traversal.  If FN every
   returns a non-zero value, the iteration ceases immediately, and the
   value is returned.  Otherwise, this function returns 0.  */

static int splay_tree_foreach_helper(splay_tree_node node, splay_tree_foreach_fn fn, void *data)
{
    int val;
    splay_tree_node *stack;
    int stack_ptr, stack_size;

    /* A non-recursive implementation is used to avoid filling the stack
       for large trees.  Splay trees are worst case O(n) in the depth of
       the tree.  */

#define INITIAL_STACK_SIZE 100
    stack_size = INITIAL_STACK_SIZE;
    stack_ptr = 0;
    stack = XNEWVEC(splay_tree_node, (long unsigned int)stack_size);
    val = 0;

    for (;;) {
        /* added */
        if ((unsigned int)stack_size > maxstack) {
            maxstack = (unsigned int)stack_size;
        }

        while (node != NULL) {
            if (stack_ptr == stack_size) {
                stack_size *= 2;
                stack = XRESIZEVEC(splay_tree_node, stack, (long unsigned int)stack_size);
                /* how many realloc()'s */
                nrealloc++;
            }
            stack[stack_ptr++] = node;
            node = node->left;
        }

        if (stack_ptr == 0)
            break;

        node = stack[--stack_ptr];

        val = (*fn) (node, data);
        if (val)
            break;

        node = node->right;
    }

    XDELETEVEC(stack);
    return val;
}

/* Call FN, passing it the DATA, for every node below NODE, all of
   which are from SP, following an in-order traversal.  If FN every
   returns a non-zero value, the iteration ceases immediately, and the
   value is returned.  Otherwise, this function returns 0.  */
/* modified */
static int splay_tree_foreach_helper2(splay_tree_node node, splay_tree_foreach_fn fn, void *data)
{
    int val;
    splay_tree_node *stack;
    splay_tree_node sn;
    int stack_ptr, stack_size;    /* this allows only 2G entries */

    /* A non-recursive implementation is used to avoid filling the stack
       for large trees.  Splay trees are worst case O(n) in the depth of
       the tree.  */

#define INITIAL_STACK_SIZE 100
    stack_size = INITIAL_STACK_SIZE;
    stack_ptr = 0;
    stack = XNEWVEC(splay_tree_node, (long unsigned int)stack_size);
    val = 0;

    for (;;) {
        /* added */
        if ((unsigned int)stack_size > maxstack) {
            maxstack = (unsigned int)stack_size;
        }

        sn = node;    /* save copy */
        val = 0;
        /* count how many */
        while (node != NULL) {
            val++;
            node = node->left;
        }

        if (val) {
            if (val > stack_size) {
                stack_size = val;

                if ((unsigned int)stack_size > maxstack) {
                    maxstack = (unsigned int)stack_size;
                }

                /* allocate exact as much as needed */
                stack = XRESIZEVEC(splay_tree_node, stack, (long unsigned int)stack_size);

                /* how many realloc()'s */
                nrealloc++;
            }
        }

        /* copy the pointers */
        while (sn != NULL) {
            stack[stack_ptr++] = sn;
            sn = sn->left;
        }

        if (stack_ptr == 0)
            break;

        node = stack[--stack_ptr];

        val = (*fn) (node, data);
        if (val)
            break;

        node = node->right;
    }

    XDELETEVEC(stack);
    return val;
}

/* Call FN, passing it the DATA, for every node below NODE, all of
   which are from SP, following an in-order traversal.  If FN every
   returns a non-zero value, the iteration ceases immediately, and the
   value is returned.  Otherwise, this function returns 0.  */
/* modified does not use realloc() in XRESIZEVEC() */
static int splay_tree_foreach_helper3(splay_tree_node node, splay_tree_foreach_fn fn, void *data, unsigned int count)
{
    int val;
    splay_tree_node *stack;
    unsigned int stack_ptr = 0;
    unsigned int stack_size = 0;    /* this allows only 4G entries */

    /* A non-recursive implementation is used to avoid filling the stack
       for large trees.  Splay trees are worst case O(n) in the depth of
       the tree.  */

    stack_size = count;
    stack_ptr = 0;
    stack = XNEWVEC(splay_tree_node, (long unsigned int)stack_size);
    val = 0;

    for (;;) {
        /* added */
        if (stack_size > maxstack) {
            maxstack = stack_size;
        }

        /* copy the pointers */
        while (node != NULL) {
            stack[stack_ptr++] = node;
            node = node->left;
        }

        if (stack_ptr == 0)
            break;

        node = stack[--stack_ptr];

        val = (*fn) (node, data);
        if (val)
            break;

        node = node->right;
    }

    XDELETEVEC(stack);
    return val;
}

/* An allocator and deallocator based on xmalloc.  */
static void *splay_tree_xmalloc_allocate(size_t size, void *data ATTRIBUTE_UNUSED)
{
    if (data) {        /* not used */
    }
    return (void *)xmalloc((size_t)size);
}

static void splay_tree_xmalloc_deallocate(void *object, void *data ATTRIBUTE_UNUSED)
{
    if (object) {
        free(object);
    }
    if (data) {        /* not used */
    }
}

/* Allocate a new splay tree, using COMPARE_FN to compare nodes,
   DELETE_KEY_FN to deallocate keys, and DELETE_VALUE_FN to deallocate
   values.  Use xmalloc to allocate the splay tree structure, and any
   nodes added.  */

splay_tree
splay_tree_new(splay_tree_compare_fn compare_fn, splay_tree_delete_key_fn delete_key_fn, splay_tree_delete_value_fn delete_value_fn)
{
    return (splay_tree_new_with_allocator
        (compare_fn, delete_key_fn, delete_value_fn, splay_tree_xmalloc_allocate, splay_tree_xmalloc_deallocate, 0));
}

/* Allocate a new splay tree, using COMPARE_FN to compare nodes,
   DELETE_KEY_FN to deallocate keys, and DELETE_VALUE_FN to deallocate
   values.  */

splay_tree
splay_tree_new_with_allocator(splay_tree_compare_fn compare_fn,
                  splay_tree_delete_key_fn delete_key_fn,
                  splay_tree_delete_value_fn delete_value_fn,
                  splay_tree_allocate_fn allocate_fn, splay_tree_deallocate_fn deallocate_fn, void *allocate_data)
{
    return
        splay_tree_new_typed_alloc(compare_fn, delete_key_fn, delete_value_fn,
                       allocate_fn, allocate_fn, deallocate_fn, allocate_data);
}

/*

@deftypefn Supplemental splay_tree splay_tree_new_with_typed_alloc @
(splay_tree_compare_fn @var{compare_fn}, @
splay_tree_delete_key_fn @var{delete_key_fn}, @
splay_tree_delete_value_fn @var{delete_value_fn}, @
splay_tree_allocate_fn @var{tree_allocate_fn}, @
splay_tree_allocate_fn @var{node_allocate_fn}, @
splay_tree_deallocate_fn @var{deallocate_fn}, @
void * @var{allocate_data})

This function creates a splay tree that uses two different allocators
@var{tree_allocate_fn} and @var{node_allocate_fn} to use for allocating the
tree itself and its nodes respectively.  This is useful when variables of
different types need to be allocated with different allocators.

The splay tree will use @var{compare_fn} to compare nodes,
@var{delete_key_fn} to deallocate keys, and @var{delete_value_fn} to
deallocate values.  Keys and values will be deallocated when the
tree is deleted using splay_tree_delete or when a node is removed
using splay_tree_remove.  splay_tree_insert will release the previously
inserted key and value using @var{delete_key_fn} and @var{delete_value_fn}
if the inserted key is already found in the tree.

@end deftypefn

*/

splay_tree
splay_tree_new_typed_alloc(splay_tree_compare_fn compare_fn,
               splay_tree_delete_key_fn delete_key_fn,
               splay_tree_delete_value_fn delete_value_fn,
               splay_tree_allocate_fn tree_allocate_fn,
               splay_tree_allocate_fn node_allocate_fn, splay_tree_deallocate_fn deallocate_fn, void *allocate_data)
{
    splay_tree sp = (splay_tree) (*tree_allocate_fn)
        (sizeof(struct splay_tree_s), allocate_data);

    sp->root = 0;
    sp->comp = compare_fn;
    sp->delete_key = delete_key_fn;
    sp->delete_value = delete_value_fn;
    sp->allocate = node_allocate_fn;
    sp->deallocate = deallocate_fn;
    sp->allocate_data = allocate_data;

    return sp;
}

/* Deallocate SP.  */

void splay_tree_delete(splay_tree sp)
{
    splay_tree_delete_helper(sp, sp->root);
    (*sp->deallocate) ((char *)sp, sp->allocate_data);
}

/* Insert a new node (associating KEY with DATA) into SP.  If a
   previous node with the indicated KEY exists, its data is replaced
   with the new value.  Returns the new node.  */

splay_tree_node splay_tree_insert(splay_tree sp, splay_tree_key key, splay_tree_value value)
{
    int comparison = 0;

    splay_tree_splay(sp, key);

    if (sp->root)
        comparison = (*sp->comp) (sp->root->key, key);

    if (sp->root && comparison == 0) {
        /* If the root of the tree already has the indicated KEY, delete
           the old key and old value, and replace them with KEY and  VALUE.  */
        if (sp->delete_key)
            (*sp->delete_key) (sp->root->key);
        if (sp->delete_value)
            (*sp->delete_value) (sp->root->value);
        sp->root->key = key;
        sp->root->value = value;
    } else {
        /* Create a new node, and insert it at the root.  */
        splay_tree_node node;

        node = ((splay_tree_node)
            (*sp->allocate) (sizeof(struct splay_tree_node_s), sp->allocate_data));
        node->key = key;
        node->value = value;

        if (!sp->root)
            node->left = node->right = 0;
        else if (comparison < 0) {
            node->left = sp->root;
            node->right = node->left->right;
            node->left->right = 0;
        } else {
            node->right = sp->root;
            node->left = node->right->left;
            node->right->left = 0;
        }

        sp->root = node;
    }

    return sp->root;
}

/* Remove KEY from SP.  It is not an error if it did not exist.  */

void splay_tree_remove(splay_tree sp, splay_tree_key key)
{
    splay_tree_splay(sp, key);

    if (sp->root && (*sp->comp) (sp->root->key, key) == 0) {
        splay_tree_node left, right;

        left = sp->root->left;
        right = sp->root->right;

        /* Delete the root node itself.  */
        if (sp->delete_key)
            (*sp->delete_key) (sp->root->key);
        if (sp->delete_value)
            (*sp->delete_value) (sp->root->value);
        (*sp->deallocate) (sp->root, sp->allocate_data);

        /* One of the children is now the root.  Doesn't matter much
           which, so long as we preserve the properties of the tree.  */
        if (left) {
            sp->root = left;

            /* If there was a right child as well, hang it off the 
               right-most leaf of the left child.  */
            if (right) {
                while (left->right)
                    left = left->right;
                left->right = right;
            }
        } else
            sp->root = right;
    }
}

/* Lookup KEY in SP, returning VALUE if present, and NULL 
   otherwise.  */

splay_tree_node splay_tree_lookup(splay_tree sp, splay_tree_key key)
{
    splay_tree_splay(sp, key);

    if (sp->root && (*sp->comp) (sp->root->key, key) == 0)
        return sp->root;
    else
        return 0;
}

/* Return the node in SP with the greatest key.  */

splay_tree_node splay_tree_max(splay_tree sp)
{
    splay_tree_node n = sp->root;

    if (!n)
        return NULL;

    while (n->right)
        n = n->right;

    return n;
}

/* Return the node in SP with the smallest key.  */

splay_tree_node splay_tree_min(splay_tree sp)
{
    splay_tree_node n = sp->root;

    if (!n)
        return NULL;

    while (n->left)
        n = n->left;

    return n;
}

/* Return the immediate predecessor KEY, or NULL if there is no
   predecessor.  KEY need not be present in the tree.  */

splay_tree_node splay_tree_predecessor(splay_tree sp, splay_tree_key key)
{
    int comparison;
    splay_tree_node node;

    /* If the tree is empty, there is certainly no predecessor.  */
    if (!sp->root)
        return NULL;

    /* Splay the tree around KEY.  That will leave either the KEY
       itself, its predecessor, or its successor at the root.  */
    splay_tree_splay(sp, key);
    comparison = (*sp->comp) (sp->root->key, key);

    /* If the predecessor is at the root, just return it.  */
    if (comparison < 0)
        return sp->root;

    /* Otherwise, find the rightmost element of the left subtree.  */
    node = sp->root->left;
    if (node)
        while (node->right)
            node = node->right;

    return node;
}

/* Return the immediate successor KEY, or NULL if there is no
   successor.  KEY need not be present in the tree.  */

splay_tree_node splay_tree_successor(splay_tree sp, splay_tree_key key)
{
    int comparison;
    splay_tree_node node;

    /* If the tree is empty, there is certainly no successor.  */
    if (!sp->root)
        return NULL;

    /* Splay the tree around KEY.  That will leave either the KEY
       itself, its predecessor, or its successor at the root.  */
    splay_tree_splay(sp, key);
    comparison = (*sp->comp) (sp->root->key, key);

    /* If the successor is at the root, just return it.  */
    if (comparison > 0)
        return sp->root;

    /* Otherwise, find the leftmost element of the right subtree.  */
    node = sp->root->right;
    if (node)
        while (node->left)
            node = node->left;

    return node;
}

/* Call FN, passing it the DATA, for every node in SP, following an
   in-order traversal.  If FN every returns a non-zero value, the
   iteration ceases immediately, and the value is returned.
   Otherwise, this function returns 0.  */

int splay_tree_foreach(splay_tree sp, splay_tree_foreach_fn fn, void *data)
{
    return splay_tree_foreach_helper(sp->root, fn, data);
}

/* other way */
int splay_tree_foreach2(splay_tree sp, splay_tree_foreach_fn fn, void *data)
{
    return splay_tree_foreach_helper2(sp->root, fn, data);
}

/* other way does not use realloc() */
int splay_tree_foreach3(splay_tree sp, splay_tree_foreach_fn fn, void *data)
{
    splay_tree_node spn;
    splay_tree_key key;
    unsigned int count = 0;    /* allows 4 Giga tree nodes */
    if ((splay_tree) 0 == sp) {
        /* no data */
        return (0);
    }
    if (!sp->root) {
        /* no data */
        return (0);
    }
    /* this counting is very slow
     * or splay tree should maintain the count
     * of number of nodes in the splay tree
     */
    spn = splay_tree_min(sp);
    while (spn) {
        key = (splay_tree_key) spn->key;
        count++;
        spn = splay_tree_successor(sp, key);
    }
    if (count == 0) {
        /* no data */
    }
    printf("%s(): splay tree has %u nodes\n", __func__, count);
    fflush(stdout);
    return splay_tree_foreach_helper3(sp->root, fn, data, count);
}

/* Splay-tree comparison function, treating the keys as ints.  */

int splay_tree_compare_ints(splay_tree_key k1, splay_tree_key k2)
{
    if ((int)k1 < (int)k2)
        return -1;
    else if ((int)k1 > (int)k2)
        return 1;
    else
        return 0;
}

/* Splay-tree comparison function, treating the keys as pointers.  */

int splay_tree_compare_pointers(splay_tree_key k1, splay_tree_key k2)
{
    if ((char *)k1 < (char *)k2)
        return -1;
    else if ((char *)k1 > (char *)k2)
        return 1;
    else
        return 0;
}

/* Splay-tree comparison function, treating the keys as strings.  */

int splay_tree_compare_strings(splay_tree_key k1, splay_tree_key k2)
{
    return strcmp((char *)k1, (char *)k2);
}

/* Splay-tree delete function, simply using free.  */

void splay_tree_delete_pointers(splay_tree_value value)
{
    if (value) {
        free((void *)value);
    }
}

/* fn0 small splay tree The type of a function used to iterate over the tree.  */
int fn0(splay_tree_node spn, void *data)
{
    if (spn == (splay_tree_node) 0) {
        /* sgould not happen */
        return (1);
    }
    printf("%d ", (int)spn->key);
    if (data) {        /* not used */
    }
    /* return 0 to continue */
    return (0);
}

/* fn1 bigger splay tree The type of a function used to iterate over the tree.  */
int fn1(splay_tree_node spn, void *data)
{
    if (spn == (splay_tree_node) 0) {
        /* sgould not happen */
        return (1);
    }
    if (((int)spn->key % 100) == 0) {
        printf("%d \n", (int)spn->key);
    }
    if (data) {        /* not used */
    }
    /* return 0 to continue */
    return (0);
}

/* fn3 even bigger splay tree The type of a function used to iterate over the tree.  */
int fn3(splay_tree_node spn, void *data)
{
    if (spn == (splay_tree_node) 0) {
        /* sgould not happen */
        return (1);
    }
    if (data) {        /* not used */
    }
    /* return 0 to continue */
    return (0);
}

/* the test code */
int main(int argc, char *argv[])
{
    splay_tree spt;
    unsigned long long int n;    /* 64bits */
    unsigned long long int mb = 0;    /* mem use in mb */
    unsigned long long int mbs = 0;    /* saved mem use in mb */
    splay_tree_node spn;
    int status = 0;
    unsigned int v0 = 0;
    unsigned int v1 = 0;

    if (argc) {
    }
    if (argv) {
    }

    printf("testing old splay_tree_foreach()\n");

    spt = splay_tree_new(splay_tree_compare_ints /* compare_fn */ ,
                 (splay_tree_delete_key_fn) 0 /* delete_key_fn */ ,
                 (splay_tree_delete_value_fn) 0    /* delete_value_fn */
        );

    /* create small splay tree does not use realloc() */
    for (n = 0; n < 10; n++) {
        spn /* splay_tree_node */  =
            splay_tree_insert((splay_tree) spt, (splay_tree_key) n, (splay_tree_value) 0);
        if (!spn) {    /* shouldnothappen */
        }
    }

    /* how much stack used */
    maxstack = 0;
    nrealloc = 0;

    /* traverse */
    status = splay_tree_foreach((splay_tree) spt, (splay_tree_foreach_fn) fn0, (void *)0 /* data */ );

    /* how much mem used */
    mb = sizeof(splay_tree_node);
    mb = mb * maxstack;
    mb = mb / 1024;        /* kb */
    mb = mb / 1024;        /* mb */

    printf
        ("status=%d %llu tree nodes stack used max %u entries using %llu megabyte %lu bytes %d realloc()'s\n",
         status, n, maxstack, mb, sizeof(splay_tree_node) * maxstack, nrealloc);

    splay_tree_delete((splay_tree) spt);

    printf("testing old splay_tree_foreach()\n");

    spt = splay_tree_new(splay_tree_compare_ints /* compare_fn */ ,
                 (splay_tree_delete_key_fn) 0 /* delete_key_fn */ ,
                 (splay_tree_delete_value_fn) 0    /* delete_value_fn */
        );

    /* create bigher splay tree causes realloc */
    for (n = 0; n < 1000; n++) {
        spn /* splay_tree_node */  =
            splay_tree_insert((splay_tree) spt, (splay_tree_key) n, (splay_tree_value) 0);
        if (!spn) {    /* shouldnothappen */
        }
    }

    /* how much stack used */
    maxstack = 0;
    nrealloc = 0;

    /* traverse */
    status = splay_tree_foreach((splay_tree) spt, (splay_tree_foreach_fn) fn1, (void *)0 /* data */ );

    v0 = maxstack;

    /* how much mem used */
    mb = sizeof(splay_tree_node);
    mb = mb * maxstack;
    mb = mb / 1024;        /* kb */
    mb = mb / 1024;        /* mb */

    printf
        ("status=%d %llu tree nodes stack used max %u entries using %llu megabyte %lu bytes %d realloc()'s\n",
         status, n, maxstack, mb, sizeof(splay_tree_node) * maxstack, nrealloc);

    splay_tree_delete((splay_tree) spt);

    printf("testing new splay_tree_foreach()\n");

    /* now same with other foreach */
    spt = splay_tree_new(splay_tree_compare_ints /* compare_fn */ ,
                 (splay_tree_delete_key_fn) 0 /* delete_key_fn */ ,
                 (splay_tree_delete_value_fn) 0    /* delete_value_fn */
        );

    /* create bigger splay tree causes realloc */
    for (n = 0; n < 1000; n++) {
        spn /* splay_tree_node */  =
            splay_tree_insert((splay_tree) spt, (splay_tree_key) n, (splay_tree_value) 0);
        if (!spn) {    /* shouldnothappen */
        }
    }

    /* how much stack used */
    maxstack = 0;
    nrealloc = 0;

    /* traverse */
    status = splay_tree_foreach2((splay_tree) spt, (splay_tree_foreach_fn) fn1, (void *)0 /* data */ );

    v1 = maxstack;

    /* how much mem used */
    mb = sizeof(splay_tree_node);
    mb = mb * maxstack;
    mb = mb / 1024;        /* kb */
    mb = mb / 1024;        /* mb */

    /* how much mem saved */
    mbs = sizeof(splay_tree_node);
    mbs = mbs * (v0 - v1);
    mbs = mbs / 1024;    /* kb */
    mbs = mbs / 1024;    /* mb */

    printf
        ("status=%d %llu tree nodes stack used max %u entries using %llu megabyte %lu bytes %d realloc()'s saved %llu Mb\n",
         status, n, maxstack, mb, sizeof(splay_tree_node) * maxstack, nrealloc, mbs);

    splay_tree_delete((splay_tree) spt);

    printf("testing old splay_tree_foreach()\n");

    /* now going really big but not more the 2G nodes because that is too much but can be fixed */
    spt = splay_tree_new(splay_tree_compare_ints /* compare_fn */ ,
                 (splay_tree_delete_key_fn) 0 /* delete_key_fn */ ,
                 (splay_tree_delete_value_fn) 0    /* delete_value_fn */
        );

    /* create bigger splay tree causes realloc */
    for (n = 0; n < 1000 * 1000 * 100; n++) {
        spn /* splay_tree_node */  =
            splay_tree_insert((splay_tree) spt, (splay_tree_key) n, (splay_tree_value) 0);
        if (!spn) {    /* shouldnothappen */
        }
    }

    /* how much stack used */
    maxstack = 0;
    nrealloc = 0;

    /* traverse */
    status = splay_tree_foreach((splay_tree) spt, (splay_tree_foreach_fn) fn3, (void *)0 /* data */ );

    v0 = maxstack;

    /* how much mem used */
    mb = sizeof(splay_tree_node);
    mb = mb * maxstack;
    mb = mb / 1024;        /* kb */
    mb = mb / 1024;        /* mb */

    printf
        ("status=%d %llu tree nodes stack used max %u entries using %llu megabyte %lu bytes %d realloc()'s\n",
         status, n, maxstack, mb, sizeof(splay_tree_node) * maxstack, nrealloc);

    splay_tree_delete((splay_tree) spt);

    printf("testing new splay_tree_foreach()\n");

    /* now going really big but not more the 2G nodes because that is too much but can be fixed */
    spt = splay_tree_new(splay_tree_compare_ints /* compare_fn */ ,
                 (splay_tree_delete_key_fn) 0 /* delete_key_fn */ ,
                 (splay_tree_delete_value_fn) 0    /* delete_value_fn */
        );

    /* create bigger splay tree causes realloc */
    for (n = 0; n < 1000 * 1000 * 100; n++) {
        spn /* splay_tree_node */  =
            splay_tree_insert((splay_tree) spt, (splay_tree_key) n, (splay_tree_value) 0);
        if (!spn) {    /* shouldnothappen */
        }
    }

    /* how much stack used */
    maxstack = 0;
    nrealloc = 0;

    /* traverse */
    status = splay_tree_foreach2((splay_tree) spt, (splay_tree_foreach_fn) fn3, (void *)0 /* data */ );

    v1 = maxstack;

    /* how much mem saved */
    mbs = sizeof(splay_tree_node);
    mbs = mbs * (v0 - v1);
    mbs = mbs / 1024;    /* kb */
    mbs = mbs / 1024;    /* mb */

    /* how much mem used */
    mb = sizeof(splay_tree_node);
    mb = mb * maxstack;
    mb = mb / 1024;        /* kb */
    mb = mb / 1024;        /* mb */

    printf
        ("status=%d %llu tree nodes stack used max %u entries using %llu megabyte %lu bytes %d realloc()'s saved %llu Mb\n",
         status, n, maxstack, mb, sizeof(splay_tree_node) * maxstack, nrealloc, mbs);

    splay_tree_delete((splay_tree) spt);

    printf("testing old splay_tree_foreach()\n");

    /* now going really big but not more the 2G nodes because that is too much but can be fixed */
    spt = splay_tree_new(splay_tree_compare_ints /* compare_fn */ ,
                 (splay_tree_delete_key_fn) 0 /* delete_key_fn */ ,
                 (splay_tree_delete_value_fn) 0    /* delete_value_fn */
        );

    /* create bigger splay tree causes realloc */
    for (n = 0; n < 1000 * 1000 * 350; n++) {
        spn /* splay_tree_node */  =
            splay_tree_insert((splay_tree) spt, (splay_tree_key) n, (splay_tree_value) 0);
        if (!spn) {    /* shouldnothappen */
        }
    }

    /* how much stack used */
    maxstack = 0;
    nrealloc = 0;

    /* traverse */
    status = splay_tree_foreach((splay_tree) spt, (splay_tree_foreach_fn) fn3, (void *)0 /* data */ );

    v0 = maxstack;

    /* how much mem used */
    mb = sizeof(splay_tree_node);
    mb = mb * maxstack;
    mb = mb / 1024;        /* kb */
    mb = mb / 1024;        /* mb */

    printf
        ("status=%d %llu tree nodes stack used max %u entries using %llu megabyte %lu bytes %d realloc()'s\n",
         status, n, maxstack, mb, sizeof(splay_tree_node) * maxstack, nrealloc);

    splay_tree_delete((splay_tree) spt);

    printf("testing new splay_tree_foreach()\n");

    /* now going really big but not more the 2G nodes because that is too much but can be fixed */
    spt = splay_tree_new(splay_tree_compare_ints /* compare_fn */ ,
                 (splay_tree_delete_key_fn) 0 /* delete_key_fn */ ,
                 (splay_tree_delete_value_fn) 0    /* delete_value_fn */
        );

    /* create bigger splay tree causes realloc */
    for (n = 0; n < 1000 * 1000 * 350; n++) {
        spn /* splay_tree_node */  =
            splay_tree_insert((splay_tree) spt, (splay_tree_key) n, (splay_tree_value) 0);
        if (!spn) {    /* shouldnothappen */
        }
    }

    /* how much stack used */
    maxstack = 0;
    nrealloc = 0;

    /* traverse */
    status = splay_tree_foreach2((splay_tree) spt, (splay_tree_foreach_fn) fn3, (void *)0 /* data */ );

    v1 = maxstack;

    /* how much mem saved */
    mbs = sizeof(splay_tree_node);
    mbs = mbs * (v0 - v1);
    mbs = mbs / 1024;    /* kb */
    mbs = mbs / 1024;    /* mb */

    /* how much mem used */
    mb = sizeof(splay_tree_node);
    mb = mb * maxstack;
    mb = mb / 1024;        /* kb */
    mb = mb / 1024;        /* mb */

    printf
        ("status=%d %llu tree nodes stack used max %u entries using %llu megabyte %lu bytes %d realloc()'s saved %llu Mb\n",
         status, n, maxstack, mb, sizeof(splay_tree_node) * maxstack, nrealloc, mbs);

    splay_tree_delete((splay_tree) spt);

    printf("testing splay_tree_foreach() without realloc()\n");

    /* now going really big but not more the 2G nodes because that is too much but can be fixed */
    spt = splay_tree_new(splay_tree_compare_ints /* compare_fn */ ,
                 (splay_tree_delete_key_fn) 0 /* delete_key_fn */ ,
                 (splay_tree_delete_value_fn) 0    /* delete_value_fn */
        );

    /* create bigger splay tree no realloc */
    for (n = 0; n < 1000 * 3; n++) {
        spn /* splay_tree_node */  =
            splay_tree_insert((splay_tree) spt, (splay_tree_key) n, (splay_tree_value) 0);
        if (!spn) {    /* shouldnothappen */
        }
    }

    /* how much stack used */
    maxstack = 0;
    nrealloc = 0;

    /* traverse */
    status = splay_tree_foreach3((splay_tree) spt, (splay_tree_foreach_fn) fn3, (void *)0 /* data */ );

    v0 = maxstack;

    /* how much mem used */
    mb = sizeof(splay_tree_node);
    mb = mb * maxstack;
    mb = mb / 1024;        /* kb */
    mb = mb / 1024;        /* mb */

    printf
        ("status=%d %llu tree nodes stack used max %u entries using %llu megabyte %lu bytes %d realloc()'s\n",
         status, n, maxstack, mb, sizeof(splay_tree_node) * maxstack, nrealloc);

    splay_tree_delete((splay_tree) spt);

    printf("testing new splay_tree_foreach() with maximum test machine limit using all physical ram and all disk swap space\n");

    /* now going really big but not more the 2G nodes because that is too much but can be fixed */
    spt = splay_tree_new(splay_tree_compare_ints /* compare_fn */ ,
                 (splay_tree_delete_key_fn) 0 /* delete_key_fn */ ,
                 (splay_tree_delete_value_fn) 0    /* delete_value_fn */
        );

    /* create bigger splay tree causes realloc */
    for (n = 0; n < 1000 * 1000 * 370; n++) {
        spn /* splay_tree_node */  =
            splay_tree_insert((splay_tree) spt, (splay_tree_key) n, (splay_tree_value) 0);
        if (!spn) {    /* shouldnothappen */
        }
    }

    /* how much stack used */
    maxstack = 0;
    nrealloc = 0;

    /* traverse and do NOT waste memory */
    status = splay_tree_foreach2((splay_tree) spt, (splay_tree_foreach_fn) fn3, (void *)0 /* data */ );

    v0 = maxstack;

    /* how much mem used */
    mb = sizeof(splay_tree_node);
    mb = mb * maxstack;
    mb = mb / 1024;        /* kb */
    mb = mb / 1024;        /* mb */

    printf
        ("status=%d %llu tree nodes stack used max %u entries using %llu megabyte %lu bytes %d realloc()'s\n",
         status, n, maxstack, mb, sizeof(splay_tree_node) * maxstack, nrealloc);

    splay_tree_delete((splay_tree) spt);

    return (0);
}

/* end. */


graph refine used to reason about programs in a particular graph representation

 The program source can be represented as graph data and open source graph refine tool is using this data to verify correctness of C source : "The toolset consists of a python program and a collection of Isabelle/HOL theories used to reason about programs in a particular graph representation. "

https://trustworthy.systems/software/TS/graph-refine/

The theory is described in "Thomas Sewell, Magnus Myreen and Gerwin Klein
Translation validation for a verified OS kernel
ACM SIGPLAN Conference on Programming Language Design and Implementation, pp. 471–481, Seattle, Washington, USA, June, 2013"

graph-refine-0.1.tgz (Released 18 Jul, 2013, Isabelle 2012)

looking at compiler graph data is similar as metadata without the details as done by google to understand how powerfull that can be.


This is how C source is translated

#struct tree *
#rotate_right (struct tree *t) {
#  struct tree *head = t->left;
#  t->left = head->right;
#  head->right = t;
#  return head;


# C functions, in no particular order
Function C.rotate_right 4 C.t Word 32 Mem Mem HTD HTD PMS PMS 4 C.ret__ptr_to_struct_tree_C Word 32 Mem Mem HTD HTD PMS PMS
1 Cond Ret Err Op False Bool 0
2 Basic Ret 1 C.ret__ptr_to_struct_tree_C Word 32 Var C.head Word 32
3 Basic 2 1 Mem Mem Op MemUpdate Mem 3 Var Mem Mem Op Plus Word 32 2 Var C.head Word 32 Num 8 Word 32 Var C.t Word 32
4 Cond 3 Err Op And Bool 2 Op PValid Bool 3 Var HTD HTD Type Struct C.tree_C Var C.head Word 32 Op PValid Bool 3 Var HTD HTD Type Struct C.tree_C Var C.head Word 32
5 Basic 4 1 Mem Mem Op MemUpdate Mem 3 Var Mem Mem Op Plus Word 32 2 Var C.t Word 32 Num 4 Word 32 Op MemAcc Word 32 2 Var Mem Mem Op Plus Word 32 2 Var C.head Word 32 Num 8 Word 32
6 Cond 5 Err Op And Bool 2 Op And Bool 2 Op And Bool 2 Op PValid Bool 3 Var HTD HTD Type Struct C.tree_C Var C.t Word 32 Op PValid Bool 3 Var HTD HTD Type Struct C.tree_C Var C.t Word 32 Op PValid Bool 3 Var HTD HTD Type Struct C.tree_C Var C.head Word 32 Op PValid Bool 3 Var HTD HTD Type Struct C.tree_C Var C.head Word 32
7 Basic 6 1 C.head Word 32 Op MemAcc Word 32 2 Var Mem Mem Op Plus Word 32 2 Var C.t Word 32 Num 4 Word 32
8 Cond 7 Err Op And Bool 2 Op PValid Bool 3 Var HTD HTD Type Struct C.tree_C Var C.t Word 32 Op PValid Bool 3 Var HTD HTD Type Struct C.tree_C Var C.t Word 32
9 Cond 8 8 Op True Bool 0
EntryPoint 9




This is part of the open source sel4 kernel with many tools at https://github.com/sel4/

"seL4 microkernel and related repositories Provided by the seL4 Foundation"

The project page is at http://sel4.systems/

The seL4® Microkernel
Security is no excuse for bad performance
The world’s most high-assured operating system kernel

This is C source described at http://sel4.systems/About/

seL4 is a high-assurance, high-performance operating system microkernel. It is unique because of its comprehensive formal verification, without compromising performance. It is meant to be used as a trustworthy foundation for building safety- and security-critical systems. It is available as open source on GitHub and supported by the seL4 Foundation.

The whole project has these components at https://trustworthy.systems/software/TS/

 From time to time we release software as open source, or occasionally under a binary-only licence.
Github

You can find most of our software on github:

    http://github.com/sel4
    http://github.com/sel4proj
    http://github.com/echronos

Cogent

Cogent is a programming language with a certifying compiler for developing high-assurance systems components. The code can be found at https://github.com/NICTA/cogent/.
CakeML

CakeML is an impure functional language with an end-to-end-verified optimizing compiler. The code can be found at https://code.cakeml.org/.
C Parser

The ML-Tool and Isabelle/HOL code that translates a subset of C-99 into something that Isabelle/HOL can understand is available from here.
AutoCorres

AutoCorres is an Isabelle/HOL tool that automatically abstracts and simplifies programs that have been translated by the C parser.
Graph refine

Graph refine is a collection of tools and Isabelle/HOL theories used to reason about programs in a particular graph representation.
WCET tools

WCET Tools are a set of tools to compute the worst-case execution time for ARM binaries.
The seL4 kernel

seL4 is a proven correct microkernel available under an open source licence. It is hosted on github, but read the instructions at http://docs.sel4.systems/ before attempting to get it.
Channel Matrix Tools

A set of tools for generating and analysing large, sparse channel matrices.
Bitfield Generator

A DSL compiler for co-generation of bitfield code and associated correctness proofs.
CAmkES

CAmkES is a component platform for seL4.

the gml4gtk graph viewer can be used with gcc and clang compiler graph data and maybe usable with this special kind of software, on sourceforge at https://sourceforge.net/projects/gml4gtk/



Tuesday, September 28, 2021

Machine learning in static analysis of program source code

 The programmer can do more work to fix bugs and issues and improve the program using more source code checking software and that makes the user happy and will cause less bug reports and will save time and maybe money.

Note that after all these source code checking there still can be bugs because of algorithm errors or otherwise which analysis software cannot detect.

The main git repo's have a feature to compile the source at update on a virtual machine with email message when there is issue to fix asap.

The newest GNU GCC compiler is version 11.2 and using option -fanalyzer will generate info what can be issues to fix and is at https://gcc.gnu.org/

The -fverbose-asm option of the compilers can be used to manual check the assembly output when needed

To be sure newest GCC compiler verions is needed with the analyzer and the video with ads is here at https://www.youtube.com/watch?v=zUw0ZVXCwoM


The clang compiler has the scan-build command which can generate a report about possible issues to fix and is at https://clang.llvm.org/

The sparse tool is daily used to check millions of lines with C source of the Linux kernel and can be used with own C source at https://www.kernel.org/doc/html/v4.11/dev-tools/sparse.html

Newest sparse tool is in debian at  https://buildd.debian.org/status/package.php?p=sparse&suite=bullseye


The smatch tool is based on sparse and can do even more source code checks of C source and is very easy to use at  https://repo.or.cz/w/smatch.git

Running the binary with valgrind and fixing all memory issues has high priority and valgrind is at https://www.valgrind.org/

After this the extra safety compiler options can be used as documented by airbus aviation at https://github.com/airbus-seclab/c-compiler-security

This did find hidden bugs not found with the other software tools

To get to maximum the misra rules can be applied but that is partial available and the only unmaintained open source is here at
https://github.com/search?q=misra&type=Repositories

"Machine learning in static analysis of program source code" from the commercial pvs software with descriptions and critism is at
https://pvs-studio.com/en/blog/posts/0706/

and they have more blog examples with amazing things this kind of software can do to improve source code without bugs

this software does detect that actually used sources have weird statements as " if (a > b || b < a) { } "



This does mention open source tools tools as

"A static analyzer for Java, C, C++, and Objective-C"
https://github.com/facebook/infer

sourced
https://github.com/src-d/sourced-ce

Here is a list of mostly commercial and few open source code checking software at https://github.com/analysis-tools-dev/static-analysis

The journ project takes compiler data a step more putting compiler data
in searchable database for use with code checking, see  https://docs.joern.io/home

mutation testing is another way to scan for bugs using llvm, for c, c++ at
https://mull.readthedocs.io/en/latest/GettingStarted.html

The rosecheckers project has MIT Free software to check for cert rules at

https://sourceforge.net/projects/rosecheckers/

These checkers enforce the CERT Secure Coding Standards for C and C++.
The standards are available at https://www.securecoding.cert.org

this is a list of similar tools at
https://awesomeopensource.com/projects/mutation-testing

c and c++ source code improvements ideas from intel
https://software.intel.com/content/www/us/en/develop/articles/the-ultimate-question-of-programming-refactoring-and-everything.html

The older intel icc compiler is avilable and has interesting other messages about the compilation and can generate documented assembly and a report what the optimizer did.

undefined in c++ summary
https://github.com/shafik/cpp_undefined_behavior_enumerated

and at last the gml4gtk graph viewer on sourceforge needs improvement of the algorithms to get a better drawing but can already be used with graphviz dot data from gcc or clang compilers to have a close look how the source is compiled in maximal detail on sourceforge at https://sourceforge.net/projects/gml4gtk/

When the source is checked and ready it is possible to generate the call graph data of the binary code using radare2 which has a graph layout tool generating ascii art with the control flow graph at https://rada.re/n/

The GNU assemble may do some additional optimizing the the compiler assembly language can be different from the binary code and GNU AS is at  https://www.gnu.org/software/binutils/

  the generated source code from bison and flex parser tools is flawless and can be tweaked more and when analyzer software generates issues it may need to use newest gcc compiler version 11.2 and option -fanalyzer

Because recommended smatch software is so very easy to use with C source added this logo

Because every program it has limited stack and thread stack size and with recursive routines this may cause crashes.
For windows the stack size is much smaller then Linux, so a windows program may crasha faster the the Linux version.
The solution is to be aware of recursive routines and tree routines and check if they may cause crashes. gcc splay.c cause crasehes.
The programming language does not matter because this has todo how low level lib routines are programmed.
The gcc compiler has not option to warn about recursive or cycles of routines increasing stack space.
See also
https://ariadne.space/2021/06/25/understanding-thread-stack-sizes-and-how-alpine-is-different/
https://utcc.utoronto.ca/~cks/space/blog/unix/UnixAPIAndCRuntime
https://utcc.utoronto.ca/~cks/space/blog/programming/CStackSizeInvisible
OS     Process Stack Size     Thread Stack Size
Darwin (macOS, iOS, etc)     8 MiB     512 KiB
FreeBSD     8 MiB     2 MiB
OpenBSD (before 4.6)     8 MiB     64 KiB
OpenBSD (4.6 and later)     8 MiB     512 KiB
Windows     1 MiB     1 MiB
Alpine 3.10 and older     8 MiB     80 KiB
Alpine 3.11 and newer     8 MiB     128 KiB
GNU/Linux     8 MiB     8 MiB

 For example of open source perfect C source "The seL4® Microkernel
Security is no excuse for bad performance
The world’s most high-assured operating system kernel"
at http://sel4.systems/


 

It seems that the most productive day of the unix inventor was when he threw away some C source because he made a mess of it.
Then rebooted himself and created  a new good source using the things learned.
such situations happen and better be prepared for it as programmer.

Polygeist MLIR Polyhedral Compiler for C/C++ for Linux

 Polygeist MLIR Polyhedral Compiler for C/C++ connects the MLIR compiler infrastructure to cutting edge polyhedral optimization tools. 

Our goal with Polygeist is to connect decades of research in the polyhedral model to the new MLIR compiler infrastructure.

https://polygeist.mit.edu/

https://github.com/wsmoses/Polygeist


Polygeist uses clang frontend and AST. Polygeist uses MLIR’s in-tree dialects for representation and MLIR’s framework to define additional representations

Polymer is a essential component to the following two papers:

    Polygeist: Affine C in MLIR. This paper gives an overview of the whole Polygeist framework, in which Polymer does the polyhedral optimisation part of work.
    Phism: Polyhedral HLS in MLIR. This paper demonstrates an interesting way to leverage Polymer for polyhedral HLS within the MLIR ecosystem.

Polygeist is composed of three pieces:

    A frontend to emit MLIR SCF from a broad range of exisiting C or C++ code.
    A set of compilation passes to raise SCF constructs to the Affine dialect.
    A set of compilation passes to have a bi-directional conversion between MLIR and OpenScop exchange format.



This is a download of 4.3 Gb C++ sources

git clone --recursive https://github.com/wsmoses/Polygeist.git
cd Polygeist
mkdir llvm-project/build
cd llvm-project/build
cmake -G Ninja ../llvm \
  -DLLVM_ENABLE_PROJECTS="mlir;clang" \
  -DLLVM_TARGETS_TO_BUILD="host" \
  -DLLVM_ENABLE_ASSERTIONS=ON \
  -DCMAKE_BUILD_TYPE=DEBUG
ninja
ninja check-mlir
mkdir build
cd build
cmake -G Ninja .. \
  -DMLIR_DIR=$PWD/../llvm-project/build/lib/cmake/mlir \
  -DCLANG_DIR=$PWD/../llvm-project/build/lib/cmake/clang \
  -DLLVM_TARGETS_TO_BUILD="host" \
  -DLLVM_ENABLE_ASSERTIONS=ON \
  -DCMAKE_BUILD_TYPE=DEBUG
ninja
ninja check-mlir-clang


To cite Polygeist, please cite the following:

@inproceedings{polygeistPACT,
  title = {Polygeist: Raising C to Polyhedral MLIR},
  author = {Moses, William S. and Chelini, Lorenzo and Zhao, Ruizhe and Zinenko, Oleksandr},
  booktitle = {Proceedings of the ACM International Conference on Parallel Architectures and Compilation Techniques},
  numpages = {12},
  location = {Virtual Event},
  series = {PACT '21},
  publisher = {Association for Computing Machinery},
  year = {2021},
  address = {New York, NY, USA},
  keywords = {Polygeist, MLIR, Polyhedral, LLVM, Compiler, C++, Pluto, Polly, OpenScop, Parallel, OpenMP, Affine, Raising, Transformation, Splitting, Automatic-Parallelization, Reduction, Polybench},
}

With the generated compiler graph data from this compiler the gml4gtk graph viewer can be used with a GTK+ gui on sourceforge at https://sourceforge.net/projects/gml4gtk/

The gml4gtk graph viewer is in active development and needs imrpovements of the algorithms to get a beter drawing output

The benefit of gml4gtk viewer is that is has a gtk+ gui with options to turn-off edgelabels and node labels to get a compact graph drawing and using popup the node labels can be made visible.

Also the gml4gtk graph viewer project does document related tools on this blog and on https://graphviewer.nl/

Monday, September 27, 2021

teach how compilers work backwards

The gml4gtk graph viewer can show images how the compiler translates a function() in a source file into smaller compiler blocks which are moved around by the compiler during optimizing on sourceforge at https://sourceforge.net/projects/gml4gtk/

It can be easier to teach how compilers work backwards beginning to read the generating assembly source

Then it can be easier to understand the abstract parts of a compiler as ast and optimizing.

A global variable in C is a memory location which can be used by all routines in the program and a static variable can be used by only the routines in the same source file.

int var = 91;
void boo (void)
{
 var = var + 3;
}

void baz (void)
{
 var++;
 boo();
}

and gcc compiler generated this assembly with gcc -S -O0 -fverbose-asm test.c

    .text
    .globl    var
    .data
    .align 4
    .type    var, @object
    .size    var, 4
var:
    .long    91   <--- memory location of var with initial decimal value 91
    .text
    .globl    boo
    .type    boo, @function
boo:
.LFB0:
    .cfi_startproc
    pushq    %rbp    #
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp    #,
    .cfi_def_cfa_register 6
# m.c:4:  var = var + 3;
    movl    var(%rip), %eax    # var, var.0_1
    addl    $3, %eax    #, _2  <--- increment var with +3
# m.c:4:  var = var + 3;
    movl    %eax, var(%rip)    # _2, var
# m.c:5: }
    nop   
    popq    %rbp    #
    .cfi_def_cfa 7, 8
    ret   
    .cfi_endproc
.LFE0:
    .size    boo, .-boo
    .globl    baz
    .type    baz, @function
baz:
.LFB1:
    .cfi_startproc
    pushq    %rbp    #
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp    #,
    .cfi_def_cfa_register 6
# m.c:9:  var++;
    movl    var(%rip), %eax    # var, var.1_1
    addl    $1, %eax    #, _2   <---- increment var with +1
    movl    %eax, var(%rip)    # _2, var
# m.c:10:  boo();
    call    boo    #
# m.c:11: }
    nop   
    popq    %rbp    #
    .cfi_def_cfa 7, 8
    ret   
    .cfi_endproc

Now this can be done with every detail of C, C++ or rust language to show programmer how the program actually works on the silicon cpu chip.

The optimizer replaces assembly parts with faster assembly and the optimizer from old amsterdam compiler is literally doing that at http://tack.sourceforge.net/
 
The Linux kernel developers have the good habit checking the generated assembly code to make sure to avoid bugs and that extra work does prevent many bugs.

To understand the line in the unix source code with "you are not supposed to understand this" needs a study in the details how the hardware chips work.

The gcc -O0 option is needed to turn optimizing off to make it easier to understand the assembly source


 

Sunday, September 26, 2021

Visualizing Control Flow Graphs azu_etd_19102_sip1_m.pdf

 The gml4gtk graph viewer can be used with most GNU GCC compiler and clang compiler graph data and is on sourceforge at https://sourceforge.net/projects/gml4gtk/
The gml4gtk graph viewer is in development and needs more improved algorithms to get better drawing results.

For all those people who like reading about compiler topics and theaory there is "Visualizing Control Flow Graphs"  azu_etd_19102_sip1_m.pdf

https://repository.arizona.edu/handle/10150/661593

The summary alone is a long read but it starts with:
"Control Flow Graphs (CFGs) are directed graphs that represent all possible paths a computer program can take during its execution. They are used to analyze computer programs for purposes such as compilation, performance, and security. Experts in these areas visualize the contents of CFGs alongside other artifacts generated during the program analysis to develop and debug new algorithms, "

They developed a cfgconf library here at github https://github.com/hdc-arizona/cfgConf and documented here at https://github.com/hdc-arizona/cfgConf/wiki

"Welcome to CFGConf, a JSON-based language for visualizing Control Flow Graphs. It provides a simple and intuitive way to generate custom visualization for graphs. CFGConf is designed to make it convenient to generate quick and easy drawings of graphs that match the task at hand. With just a few lines of JSON code, you will be able to create graphs suited to your analysis requirements. With its domain-specific layout for visualizing loops and flexible ways to filter and collapse the graph, you will be able to visualize graphs without writing complicated program code to process the graph and generate the corresponding drawing."

This used graphviz and a small webserver.

It is possible to patch GCC or llvm to generate GML graph data which is much easier to used then dot and there are way more tool for gml data, one example is this gcc patchings

https://notabug.org/mooigraph/gcc-10.1-gml

or "Convert a GNU GCC Compiler tree dump file in a GML graph (Graph-Model-Language) to visualize with gml4gtk GML graph viewer or other tools"

https://notabug.org/mooigraph/gcctree2gml


This gml graph data can be used with gml4gtk or tulip software at https://tulip.labri.fr/site/

Also possible to use the big C++ ogdf lib at https://ogdf.uos.de/

Or igraph lib lib at https://igraph.org/

most users are not using the C core directly, but are using it through one of the interfaces: Python, R or Mathematica, and the C lib is nowadays c++

the igraph C lib has layered sugiyama graph layout with brandes positioning algorithm as GNU GPL Free software

Here are questions about igraph and examples what people are doing with it

https://stackoverflow.com/questions/tagged/igraph


This is easy example using igraph lib at  http://cneurocvs.rmki.kfki.hu/igraph/doc/html/ch03s01.html

in Leiden at https://www.universiteitleiden.nl/en/news/2020/05/network-analysis-package-receives-funding-from-czi

There are much more tools for GML graph data and using these tools and libs someday it will help to improve programming and the compilers for sure

using compiler option -fverbose-asm the control flow can be made readable in the generated assembly with the original source code.
This is another way to teach and learn how compilers work and needs small amount of knowhow about assembly programming available as pdf or website or book on the internet.

  For example, given this C source file:

    int test (int n)
    {
      int i;
      int total = 0;

      for (i = 0; i < n; i++)
        total += i * i;

      return total;
    }

    compiling to (x86_64) assembly via -S and emitting the result direct to stdout via -o -

    gcc -S test.c -fverbose-asm -Os -o -

    gives output similar to this:

        .file    "test.c"
    # GNU C11 (GCC) version 7.0.0 20160809 (experimental) (x86_64-pc-linux-gnu)
      [...snip...]
    # options passed:
      [...snip...]

        .text
        .globl    test
        .type    test, @function
    test:
    .LFB0:
        .cfi_startproc
    # test.c:4:   int total = 0;
        xorl    %eax, %eax    # <retval>
    # test.c:6:   for (i = 0; i < n; i++)
        xorl    %edx, %edx    # i
    .L2:
    # test.c:6:   for (i = 0; i < n; i++)
        cmpl    %edi, %edx    # n, i
        jge    .L5    #,
    # test.c:7:     total += i * i;
        movl    %edx, %ecx    # i, tmp92
        imull    %edx, %ecx    # i, tmp92
    # test.c:6:   for (i = 0; i < n; i++)
        incl    %edx    # i
    # test.c:7:     total += i * i;
        addl    %ecx, %eax    # tmp92, <retval>
        jmp    .L2    #
    .L5:
    # test.c:10: }
        ret
        .cfi_endproc
    .LFE0:
        .size    test, .-test
        .ident    "GCC: (GNU) 7.0.0 20160809 (experimental)"
        .section    .note.GNU-stack,"",@progbits

it can be easier to teach how a compiler works backwards by starting reading the generated assembly output