#!/bin/sh

# A Poor (but Free) Man's dtrace
#
# Copyright (C) 2014-2024 Free Software Foundation, Inc.
#
# Contributed by Oracle, Inc.
#
# This file is part of GDB.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see
# <http://www.gnu.org/licenses/>.

# DISCLAIMER DISCLAIMER DISCLAIMER
# This script is a test tool.  As such it is in no way intended to
# replace the "real" dtrace command for any practical purpose, apart
# from testing the DTrace USDT probes support in GDB.

# that said...
#
# pdtrace is a limited dtrace program, implementing a subset of its
# functionality:
#
# - The generation of an ELF file containing an embedded dtrace
#   program.  Equivalent to dtrace -G.
#
# - The generation of a header file with definitions for static
#   probes.  Equivalent to dtrace -h.
#
# This allows to generate DTrace static probes without having to use
# the user-level DTrace components.  The generated objects are 100%
# compatible with DTrace and can be traced by the dtrace kernel module
# like if they were generated by dtrace.
#
# Some of the known limitations of this implementation are:
# - The input d-script must describe one provider, and only one.
# - The "probe " directives in the d-file must not include argument
#   names, just the types.  Thus something like `char *' is valid, but
#   `char *name' is not.
# - The command line options must precede other arguments, since the
#   script uses the (more) portable getopts.
# - Each probe header in the d-script must be contained in
#   a single line.
# - strip -K removes the debugging information from the input object
#   file.
# - The supported target platforms are i[3456]86 and x86_64.
#
# Please keep this code as portable as possible.  Restrict yourself to
# POSIX sh.

# This script uses the following external programs, defined in
# variables.  Some of them are substituted by autoconf.

TR=tr
NM=@NM_TRANSFORM_NAME@
EGREP=egrep
SED=sed
CUT=cut
READELF=@READELF_TRANSFORM_NAME@
SORT=sort
EXPR=expr
WC=wc
UNIQ=uniq
HEAD=head
SEQ=seq
AS=@GAS_TRANSFORM_NAME@
STRIP=@STRIP_TRANSFORM_NAME@
TRUE=true

# Sizes for several DOF structures, in bytes.
#
# See linux/dtrace/dof.h for the definition of the referred
# structures.

dof_hdrsize=64      # sizeof(dtrace_dof_hdr)
dof_secsize=32      # sizeof(dtrace_dof_sect)
dof_probesize=48    # sizeof(dtrace_dof_probe)
dof_providersize=44 # sizeof(dtrace_dof_provider)

# Types for the several DOF sections.
#
# See linux/dtrace/dof_defines.h for a complete list of section types
# along with their values.

dof_sect_type_strtab=8
dof_sect_type_provider=15
dof_sect_type_probes=16
dof_sect_type_prargs=17
dof_sect_type_proffs=18
dof_sect_type_prenoffs=26

### Functions

# Write a message to the standard error output and exit with an error
# status.
#
# Arguments:
#   $1 error message.

f_panic()
{
    echo "error: $1" 1>&2; exit 1
}

# Write a usage message to the standard output and exit with an error
# status.

f_usage()
{
    printf "Usage: pdtrace [-32|-64] [-GhV] [-o output] [-s script] [ args ... ]\n\n"

    printf "\t-32 generate 32-bit ELF files\n"
    printf "\t-64 generate 64-bit ELF files\n\n"

    printf "\t-G  generate an ELF file containing embedded dtrace program\n"
    printf "\t-h  generate a header file with definitions for static probes\n"
    printf "\t-o  set output file\n"
    printf "\t-s  handle probes according to the specified D script\n"
    printf "\t-V  report the DTrace API version implemented by the tool\n"
    exit 2
}

# Write a version message to the standard output and exit with a
# successful status.

f_version()
{
    echo "pdtrace: Sun D 1.6.3"
    exit
}

# Add a new record to a list and return it.
#
# Arguments:
# $1 is the list.
# $2 is the new record

f_add_record()
{
    rec=$1
    test -n "$rec" && \
        { rec=$(printf %s\\n "$rec"; echo x); rec=${rec%x}; }
    printf %s "$rec$2"
}

# Collect the providers and probes information from the input object
# file.
#
# This function sets the values of the following global variables.
# The values are structured in records, each record in a line.  The
# fields of each record are separated in some cases by white
# characters and in other cases by colon (:) characters.
#
# The type codes in the line format descriptors are:
# S: string, D: decimal number
#
# probes
#   Regular probes and is-enabled probes.
#   TYPE(S) PROVIDER(S) NAME(S) OFFSET(D) BASE(D) BASE_SYM(S)
# base_probes
#   Base probes, i.e. probes sharing provider, name and container.
#   PROVIDER(S) NAME(S) BASE(D) BASE_SYM(S)
# providers
#   List of providers.
#   PROVIDER(S)
# All the offsets are expressed in bytes.
#
# Input globals:
#  objfile
# Output globals:
#  probes, base_probes, providers

probes=
base_probes=
providers=
probes_args=

f_collect_probes()
{
    # Probe points are function calls to undefined functions featuring
    # distinct names for both normal probes and is-enabled probes.
    PROBE_REGEX="(__dtrace_([a-zA-Z_]+)___([a-zA-Z_]+))"
    EPROBE_REGEX="(__dtraceenabled_([a-zA-Z_]+)___([a-zA-Z_]+))"

    while read type symbol provider name; do
          test -z "$type" && f_panic "No probe points found in $objfile"

          provider=$(printf %s $provider | $TR -s _)
          name=$(printf %s $name | $TR -s _)

          # Search the object file for relocations defined for the
          # probe symbols.  Then calculate the base address of the
          # probe (along with the symbol associated with that base
          # address) and the offset of the probe point.
          for offset in $($READELF -W -r $objfile | $EGREP $symbol | $CUT -d' ' -f1)
          do
              # Figure out the base address for the probe.  This is
              # done finding the function name in the text section of
              # the object file located above the probed point.  But
              # note that the relocation is for the address operand of
              # the call instruction, so we have to subtract 1 to find
              # the real probed point.
              offset=$((0x$offset - 1))

              # The addresses of is-enabled probes must point to the
              # first NOP instruction in their patched instructions
              # sequences, so modify them (see f_patch_objfile for the
              # instruction sequences).
              if test "$type" = "e"; then
                  if test "$objbits" -eq "32"; then
                      offset=$((offset + 2))
                  else # 64 bits
                      offset=$((offset + 3))
                  fi
              fi
              
              # Determine the base address of the probe and its
              # corresponding function name.
              funcs=$($NM -td $objfile | $EGREP "^[0-9]+ T " \
                      | $CUT -d' ' -f1,3 | $SORT -n -r | $TR ' ' :)
              for fun in $funcs; do
                  func_off=$(printf %s $fun | $CUT -d: -f1)
                  func_sym=$(printf %s $fun | $CUT -d: -f2)
                  # Note that `expr' is used to remove leading zeros
                  # to avoid FUNC_OFF to be interpreted as an octal
                  # number in arithmetic contexts.
                  test "$func_off" -le "$offset" && \
                      { base=$($EXPR $func_off + 0); break; }
              done
              test -n "$base" || \
                f_panic "could not find base address for probe at $objfile($o)"

              # Emit the record for the probe.
              probes=$(f_add_record "$probes" \
                                    "$type $provider $name $(($offset - $base)) $base $func_sym")
          done
      done <<EOF
$($NM $objfile | $EGREP " U $PROBE_REGEX" \
            | $SED -E -e "s/.*$PROBE_REGEX.*/p \1 \2 \3/";
     $NM $objfile | $EGREP " U $EPROBE_REGEX" \
         | $SED -E -e "s/.*$EPROBE_REGEX.*/e \1 \2 \3/")
EOF

    # Build the list of providers and of base probes from the probes.
    while read type provider name offset base base_sym; do
        providers=$(f_add_record "$providers" "$provider")
        base_probes=$(f_add_record "$base_probes" "$provider $name $base $base_sym")
    done <<EOF
$probes
EOF
    providers=$(printf %s\\n "$providers" | $SORT | $UNIQ)
    base_probes=$(printf %s\\n "$base_probes" | $SORT | $UNIQ)
}

# Collect the argument counts and type strings for all the probes
# described in the `probes' global variable.  This is done by
# inspecting the d-script file provided by the user.
#
# This function sets the values of the following global variables.
# The values are structured in records, each record in a line.  The
# fields of each record are separated in some cases by white
# characters and in other cases by colon (:) characters.
#
# The type codes in the line format descriptors are:
# S: string, D: decimal number
#
# probes_args
#   Probes arguments.
#   PROVIDER(S):NAME(S):NARGS(D):ARG1(S):ARG2(S):...:ARGn(S)
#
# Input globals:
#  probes
# Output globals:
#  probes_args
# Arguments:
#   $1 is the d-script file from which to extract the arguments
#      information.

f_collect_probes_args()
{
    dscript=$1
    while read type provider name offset base base_sym; do
        # Process normal probes only.  Is-enabled probes are not
        # described in the d-script file and they don't receive any
        # argument.
        test "$type" = "p" || continue
        
        # Names are mangled in d-script files to make it possible to
        # have underscore characters as part of the provider name and
        # probe name.
        m_provider=$(printf %s $provider | $SED -e 's/_/__/g')
        m_name=$(printf %s $name | $SED -e 's/_/__/g')
        
        # Ignore this probe if the d-script file does not describe its
        # provider.
        $EGREP -q "provider +$m_provider" $dscript || continue
        
        # Look for the line containing the description of the probe.
        # If we can't find it then ignore this probe.
        line=$($EGREP "^ *probe +$m_name *\(.*\);" $dscript)
        test -n "$line" || continue
        
        # Ok, extract the argument types from the probe prototype.
        # This is fragile as hell as it requires the prototype to be
        # in a single line.
        args=""; nargs=0; line=$(printf %s "$line" | $SED -e 's/.*(\(.*\)).*/\1/')
        set -f; IFS=,
        for arg in $line; do
            args="$args:$arg"
            nargs=$((nargs + 1))
        done
        set +f; unset IFS

        # Emit the record for the probe arguments.
        probes_args=$(f_add_record "$probes_args" "$provider:$name:$nargs$args")
    done <<EOF
$probes
EOF
}

# Functions to manipulate the global BCOUNT.

BCOUNT=0

f_incr_bcount()
{
    BCOUNT=$((BCOUNT + $1))
}

f_align_bcount()
{
    test $((BCOUNT % $1)) -eq 0 || BCOUNT=$((BCOUNT + ($1 - (BCOUNT % $1))))
}

# Generate a line of assembly code and add it to the asmprogram global
# variable.
#
# Arguments:
#   $1 string to generate in a line.

asmprogram=

f_gen_asm()
{
    line=$(printf "\t$1")
    asmprogram=$(f_add_record "$asmprogram" "$line")
}

# Helper function to generate the assembly code of a DOF section
# header.
#
# This function is used by `f_gen_dof_program'.
#
# Arguments:
#   $1 is the name of the described section.
#   $2 is the type of the described section.
#   $3 is the alignment of the described section.
#   $4 is the number of entities stored in the described section.
#   $5 is the offset in the DOF program of the described section.
#   $6 is the size of the described section, in bytes.

f_gen_dof_sect_header()
{
    f_gen_asm ""
    f_gen_asm "/* dtrace_dof_sect for the $1 section.  */"
    f_gen_asm ".balign 8"
    f_gen_asm ".4byte $2\t/* uint32_t dofs_type  */"
    f_gen_asm ".4byte $3\t/* uint32_t dofs_align  */"
    # The DOF_SECF_LOAD flag is 1 => loadable section.
    f_gen_asm ".4byte 1\t/* uint32_t dofs_flags  */"
    f_gen_asm ".4byte $4\t/* uint32_t dofs_entsize  */"
    f_gen_asm ".8byte $5\t/* uint64_t dofs_offset  */"
    f_gen_asm ".8byte $6\t/* uint64_t dofs_size  */"
}

# Generate a DOF program and assembly it in the output file.
#
# The DOF program generated by this function has the following
# structure:
#
# HEADER
# STRTAB OFFTAB EOFFTAB [PROBES PROVIDER]...
# STRTAB_SECT OFFTAB_SECT EOFFTAB_SECT ARGTAB_SECT [PROBES_SECT PROVIDER_SECT]...
#
# Input globals:
#   probes, base_probes, providers, probes_args, BCOUNT

f_gen_dof_program()
{   
    ###### Variables used to cache information needed later.
    
    # Number of section headers in the generated DOF program.
    dof_secnum=0
    # Offset of section headers in the generated DOF program, in bytes.
    dof_secoff=0

    # Sizes of the STRTAB, OFFTAB and EOFFTAB sections, in bytes.
    strtab_size=0
    offtab_size=0
    eofftab_size=0
    
    # Offsets of the STRTAB, OFFTAB EOFFTAB and PROBES sections in the
    # generated DOF program.  In bytes.
    strtab_offset=0
    offtab_offset=0
    eofftab_offset=0
    argtab_offset=0
    probes_offset=0
    
    # Indexes of the section headers of the STRTAB, OFFTAB, EOFFTAB and
    # PROBES sections in the sections array.
    strtab_sect_index=0
    offtab_sect_index=0
    eofftab_sect_index=0
    argtab_sect_index=0
    probes_sect_index=0

    # First offsets and eoffsets of the base-probes.
    # Lines: PROVIDER(S) NAME(S) BASE(D) (DOF_OFFSET(D)|DOF_EOFFSET(D))
    probes_dof_offsets=
    probes_dof_eoffsets=
    
    # Offsets in the STRTAB section for the first type of base probes.
    # Record per line: PROVIDER(S) NAME(S) BASE(D) OFFSET(D)
    probes_dof_types=


    # Offsets of the provider names in the provider's STRTAB section.
    # Lines: PROVIDER(S) OFFSET(D)
    providers_dof_names=

    # Offsets of the base-probe names in the provider's STRTAB section.
    # Lines: PROVIDER(S) NAME(S) BASE(D) OFFSET(D)
    probes_dof_names=
    
    # Offsets of the provider sections in the DOF program.
    # Lines: PROVIDER(S) OFFSET(D)
    providers_offsets=

    ###### Generation phase.
    
    # The header of the DOF program contains a `struct
    # dtrace_dof_hdr'.  Record its size, but it is written at the end
    # of the function.
    f_incr_bcount $dof_hdrsize; f_align_bcount 8

    # The STRTAB section immediately follows the header.  It contains
    # the following set of packed null-terminated strings:
    #
    # [PROVIDER [BASE_PROBE_NAME [BASE_PROBE_ARG_TYPE...]]...]...
    strtab_offset=$BCOUNT
    strtab_sect_index=$dof_secnum
    dof_secnum=$((dof_secnum + 1))
    f_gen_asm ""
    f_gen_asm "/* The STRTAB section.  */"
    f_gen_asm ".balign 8"
    # Add the provider names.
    off=0
    while read provider; do
        strtab_size=$(($strtab_size + ${#prov} + 1))
        # Note the funny mangling...
        f_gen_asm ".asciz \"$(printf %s $provider | $TR _ -)\""
        providers_dof_names=$(f_add_record "$providers_dof_names" \
                                           "$provider $off")
        off=$(($off + ${#provider} + 1))

        # Add the base-probe names.
        while read p_provider name base base_sym; do
            test "$p_provider" = "$provider" || continue
            # And yes, more funny mangling...
            f_gen_asm ".asciz \"$(printf %s $name | $TR _ -)\""
            probes_dof_names=$(f_add_record "$probes_dof_names" \
                                            "$p_provider $name $base $off")
            off=$(($off + ${#name} + 1))
            while read args; do
                a_provider=$(printf %s "$args" | $CUT -d: -f1)
                a_name=$(printf %s "$args" | $CUT -d: -f2)
                test "$a_provider" = "$p_provider" \
                    && test "$a_name" = "$name" \
                    || continue

                probes_dof_types=$(f_add_record "$probes_dof_types" \
                                                "$a_provider $name $base $off")
                nargs=$(printf %s "$args" | $CUT -d: -f3)
                for n in $($SEQ $nargs); do
                    arg=$(printf %s "$args" | $CUT -d: -f$(($n + 3)))
                    f_gen_asm ".asciz \"${arg}\""
                    off=$(($off + ${#arg} + 1))
                done                
            done <<EOF
$probes_args
EOF
        done <<EOF
$base_probes
EOF
    done <<EOF
$providers
EOF
    strtab_size=$off
    f_incr_bcount $strtab_size; f_align_bcount 8

    # The OFFTAB section contains a set of 32bit words, one per
    # defined regular probe.
    offtab_offset=$BCOUNT
    offtab_sect_index=$dof_secnum
    dof_secnum=$((dof_secnum + 1))
    f_gen_asm ""
    f_gen_asm "/* The OFFTAB section.  */"
    f_gen_asm ".balign 8"
    off=0
    while read type provider name offset base base_sym; do
        test "$type" = "p" || continue
        f_gen_asm ".4byte $offset\t/* probe ${provider}:${name}  */"
        probes_dof_offsets=$(f_add_record "$probes_dof_offsets" \
                                          "$provider $name $base $off")
        off=$(($off + 4))
    done <<EOF
$probes
EOF
    offtab_size=$off
    f_incr_bcount $offtab_size; f_align_bcount 8

    # The EOFFTAB section contains a set of 32bit words, one per
    # defined is-enabled probe.
    eofftab_offset=$BCOUNT
    eofftab_sect_index=$dof_secnum
    dof_secnum=$((dof_secnum + 1))
    f_gen_asm ""
    f_gen_asm "/* The EOFFTAB section.  */"
    f_gen_asm ".balign 8"
    off=0
    while read type provider name offset base base_sym; do
        test "$type" = "e" || continue
        f_gen_asm ".4byte $offset\t/* is-enabled probe ${provider}:${name}  */"
        probes_dof_eoffsets=$(f_add_record "$probes_dof_eoffsets" \
                                           "$provider $name $base $off")
        off=$(($off + 4))
    done <<EOF
$probes
EOF
    eofftab_size=$off
    f_incr_bcount $eofftab_size; f_align_bcount 8

    # The ARGTAB section is empty, but nonetheless has a section
    # header, so record its section index here.
    argtab_offset=0
    argtab_sect_index=$dof_secnum
    dof_secnum=$((dof_secnum + 1))

    # Generate a pair of sections PROBES and PROVIDER for each
    # provider.
    while read prov; do
        # The PROBES section contains an array of `struct
        # dtrace_dof_probe'.
        #
        # A `dtrace_dof_probe' entry characterizes the collection of
        # probes and is-enabled probes sharing the same provider, name and
        # base address.
        probes_sect_index=$dof_secnum
        dof_secnum=$((dof_secnum + 1))
        probes_offset=$BCOUNT        
        num_base_probes=$(printf %s\\n "$base_probes" | $WC -l)
        while read provider name base base_sym; do
            name_offset=$(printf %s\\n "$probes_dof_names" \
                          | $EGREP "^$provider $name " | $CUT -d' ' -f4)

            num_offsets=$(printf %s\\n "$probes_dof_offsets" \
                          | $EGREP "^$provider $name [0-9]+ " | $WC -l)
            
            first_offset=0
            test "$num_offsets" -gt 0 && \
              first_offset=$(printf %s\\n "$probes_dof_offsets" \
                             | $EGREP "^$provider $name " | $CUT -d' ' -f4 | $HEAD -1)

            num_eoffsets=$(printf %s\\n "$probes_dof_eoffsets" \
                           | $EGREP "^$provider $name [0-9]+ " | $WC -l)
            first_eoffset=0
            test "$num_eoffsets" -gt 0 && \
              first_eoffset=$(printf %s "$probes_dof_eoffsets" \
                              | $EGREP "^$provider $name " | $CUT -d' ' -f4 | $HEAD -1)

            num_args=$(printf %s "$probes_args" \
                       | $EGREP "^$provider:$name:" | $CUT -d: -f3 | $HEAD -1)
 
            first_type=$(printf %s "$probes_dof_types" \
                         | $EGREP "^$provider $name $base " | $CUT -d' ' -f4 | $HEAD -1)

            reloctype=R_X86_64_GLOB_DAT
            test "$objbits" = "32" && reloctype=R_386_32
            
            f_gen_asm ""
            f_gen_asm "/* dtrace_dof_probe for ${provider}:${name} at ${base_sym}  */"
            f_gen_asm ".balign 8"
            f_gen_asm ".reloc ., $reloctype, $base_sym + 0"
            f_gen_asm ".8byte ${base}\t/* uint64_t dofpr_addr  */"
            f_gen_asm ".4byte 0\t/* uint32_t dofpr_func  */"
            f_gen_asm ".4byte $name_offset\t/* uint32_t dofpr_name   */"
            f_gen_asm ".4byte $first_type\t/* uint32_t dofpr_nargv  */"
            f_gen_asm ".4byte 0\t/* uint32_t dofpr_xargv  */"
            f_gen_asm ".4byte 0\t/* uint32_t dofpr_argidx */"
            f_gen_asm ".4byte $(($first_offset/4))\t/* uint32_t dofpr_offidx  */"
            f_gen_asm ".byte  $num_args\t/* uint8_t dofpr_nargc  */"
            f_gen_asm ".byte  0\t/* uint8_t dofpr_xargc  */"
            f_gen_asm ".2byte $num_offsets\t/* uint16_t dofpr_noffs  */"
            f_gen_asm ".4byte $(($first_eoffset/4))\t/* uint32_t dofpr_enoffidx  */"
            f_gen_asm ".2byte $num_eoffsets\t/* uint16_t dofpr_nenoffs  */"
            f_gen_asm ".2byte 0\t/* uint16_t dofpr_pad1  */"
            f_gen_asm ".4byte 0\t/* uint16_t dofpr_pad2  */"

            f_incr_bcount "$dof_probesize"
        done <<EOF
$base_probes
EOF

        # The PROVIDER section contains a `struct dtrace_dof_provider'
        # instance describing the provider for the probes above.
        dof_secnum=$((dof_secnum + 1))
        providers_offsets=$(f_add_record "$providers_offsets" \
                                         "$prov $BCOUNT")
        # The dtrace_dof_provider.
        provider_name_offset=$(printf %s "$providers_dof_names" \
                                      | $EGREP "^$prov " | $CUT -d' ' -f2)

        f_gen_asm ""
        f_gen_asm "/* dtrace_dof_provider for $prov  */"
        f_gen_asm ".balign 8"
        # Links to several DOF sections.
        f_gen_asm ".4byte $strtab_sect_index\t/* uint32_t dofpv_strtab  */"
        f_gen_asm ".4byte $probes_sect_index\t/* uint32_t dofpv_probes  */"
        f_gen_asm ".4byte $argtab_sect_index\t/* uint32_t dofpv_prargs  */"
        f_gen_asm ".4byte $offtab_sect_index\t/* uint32_t dofpv_proffs  */"
        # Offset of the provider name into the STRTAB section.
        f_gen_asm ".4byte $provider_name_offset\t/* uint32_t dofpv_name  */"
        # The rest of fields can be 0 for our modest purposes :)
        f_gen_asm ".4byte 0\t/* uint32_t dofpv_provattr  */"
        f_gen_asm ".4byte 0\t/* uint32_t dofpv_modattr  */"
        f_gen_asm ".4byte 0\t/* uint32_t dofpv_funcattr  */"
        f_gen_asm ".4byte 0\t/* uint32_t dofpv_nameattr  */"
        f_gen_asm ".4byte 0\t/* uint32_t dofpv_argsattr  */"
        # But not this one, of course...
        f_gen_asm ".4byte $eofftab_sect_index\t/* uint32_t dofpv_prenoffs  */"

        f_incr_bcount $dof_providersize
    done<<EOF
$providers
EOF
    f_align_bcount 8

    # The section headers follow, one per section defined above.
    dof_secoff=$BCOUNT

    f_gen_dof_sect_header STRTAB \
                          $dof_sect_type_strtab \
                          1 1 $strtab_offset $strtab_size
    f_incr_bcount $dof_secsize; f_align_bcount 8

    f_gen_dof_sect_header OFFTAB \
                          $dof_sect_type_proffs \
                          4 4 $offtab_offset $offtab_size
    f_incr_bcount $dof_secsize; f_align_bcount 8

    f_gen_dof_sect_header EOFFTAB \
                          $dof_sect_type_prenoffs \
                          4 4 $eofftab_offset $eofftab_size
    f_incr_bcount $dof_secsize; f_align_bcount 8

    f_gen_dof_sect_header ARGTAB \
                          $dof_sect_type_prargs \
                          4 1 $argtab_offset 0
    f_incr_bcount $dof_secsize; f_align_bcount 8
    
    while read provider; do
        provider_offset=$(printf %s "$providers_offsets" \
                          | $EGREP "^$provider " | $CUT -d' ' -f2)
        num_base_probes=$(printf %s\\n "$base_probes" | $WC -l)

        f_gen_dof_sect_header "$provider probes" \
                              $dof_sect_type_probes \
                              8 $dof_probesize $probes_offset \
                              $((num_base_probes * dof_probesize))
        f_incr_bcount $dof_secsize; f_align_bcount 8

        f_gen_dof_sect_header "$provider provider" \
                              $dof_sect_type_provider \
                              8 1 $provider_offset $dof_providersize
        f_incr_bcount $dof_secsize; f_align_bcount 8
    done <<EOF
$providers
EOF

    # Finally, cook the header.
    asmbody="$asmprogram"
    asmprogram=""
    f_gen_asm "/* File generated by pdtrace.  */"
    f_gen_asm ""

    f_gen_asm ".section .SUNW_dof,\"a\",\"progbits\""
    f_gen_asm ".globl __SUNW_dof"
    f_gen_asm ".hidden __SUNW_dof"
    f_gen_asm ".size __SUNW_dof, ${BCOUNT}"
    f_gen_asm ".type __SUNW_dof, @object"
    f_gen_asm "__SUNW_dof:"

    f_gen_asm ""
    f_gen_asm "/* dtrace_dof_hdr */"
    f_gen_asm ".balign 8"
    f_gen_asm ".byte  0x7f, 'D, 'O, 'F\t/* dofh_ident[0..3] */"
    f_gen_asm ".byte  2\t\t/* model: 1=ILP32, 2=LP64 */"
    f_gen_asm ".byte  1\t\t/* encoding: 1: little-endian, 2: big-endian */"
    f_gen_asm ".byte  2\t\t/* DOF version: 1 or 2.  Latest is 2 */"
    f_gen_asm ".byte  2\t\t/* DIF version: 1 or 2.  Latest is 2 */"
    f_gen_asm ".byte  8\t\t/* number of DIF integer registers */"
    f_gen_asm ".byte  8\t\t/* number of DIF tuple registers */"
    f_gen_asm ".byte  0, 0\t\t/* dofh_ident[10..11] */"
    f_gen_asm ".4byte 0\t\t/* dofh_ident[12..15] */"
    f_gen_asm ".4byte 0\t/* uint32_t dofh_flags  */"  # See Limitations above.
    f_gen_asm ".4byte ${dof_hdrsize}\t/* uint32_t dofh_hdrsize  */"
    f_gen_asm ".4byte ${dof_secsize}\t/* uint32_t dofh_secsize */"
    f_gen_asm ".4byte ${dof_secnum}\t/* uint32_t dofh_secnum  */"
    f_gen_asm ".8byte ${dof_secoff}\t/* uint64_t dofh_secoff  */"
    f_gen_asm ".8byte ${BCOUNT}\t/* uint64_t dofh_loadsz  */"
    f_gen_asm ".8byte ${BCOUNT}\t/* uint64_t dofh_filesz  */"
    f_gen_asm ".8byte 0\t/* uint64_t dofh_pad  */"
    f_gen_asm ""

    # Ok, now assembly the program in OFILE
    echo "$asmprogram$asmbody" | $AS -$objbits -o $ofile

    # Next step is to change the sh_type of the ".SUNW_dof" section
    # headers to 0x6ffffff4 (SHT_SUNW_dof).
    #
    # Note that this code relies in the fact that readelf will list
    # the sections ordered in the same order than the section headers
    # in the section header table of the file.
    elfinfo=$($READELF -a $ofile)

    # Mind the endianness.
    if printf %s "$elfinfo" | $EGREP -q "little endian"; then
        sht_sunw_dof=$(printf %s%s%s%s \\364 \\377 \\377 \\157)
    else
        sht_sunw_dof=$(printf %s%s%s%s \\157 \\377 \\377 \\364)
    fi

    shdr_start=$(printf %s "$elfinfo" \
                | $EGREP "^[ \t]*Start of section headers:" \
                | $SED -E -e 's/.*headers:[ \t]*([0-9]+).*/\1/')
    test -n "$shdr_start" \
        || f_panic "could not extract the start of shdr from $ofile"

    shdr_num_entries=$(printf %s "$elfinfo" \
                       | $EGREP "^[ \t]*Size of section headers:" \
                       | $SED -E -e 's/.*headers:[ \t]*([0-9]+).*/\1/')
    test -n "$shdr_num_entries" \
         || f_panic "could not extract the number of shdr entries from $ofile"

    shdr_entry_size=$(printf %s "$elfinfo" \
                      | $EGREP "^[ \t]*Size of section headers:" \
                      | $SED -E -e 's/.*headers:[ \t]*([0-9]+).*/\1/')
    test -n "$shdr_entry_size" \
         || f_panic "could not fetch the size of section headers from $ofile"

    while read line; do
        data=$(printf %s "$line" \
               | $SED -E -e 's/.*\[(.*)\][ \t]+([a-zA-Z_.]+).*/\1:\2/')
        num=$(printf %s "$data" | $CUT -d: -f1)
        name=$(printf %s "$data" | $CUT -d: -f2)
        if test "$name" = ".SUNW_dof"; then
            # Patch the new sh_type in the proper entry of the section
            # header table.
            printf "$sht_sunw_dof" \
                   | dd of=$ofile conv=notrunc count=4 ibs=1 bs=1 \
                        seek=$((shdr_start + (shdr_entry_size * num) + 4)) \
                        2> /dev/null
            break
        fi
    done <<EOF
$(printf %s "$elfinfo" | $EGREP "^[ \t]*\[[0-9 ]+\].*[A-Z]+.*PROGBITS")
EOF

}

# Patch the probed points in the given object file, replacing the
# function calls with NOPs.
#
# The probed points in the input object files are function calls.
# This function replaces these function calls by some other
# instruction sequences.  Which replacement to use depends on several
# factors, as documented below.
#
# Arguments:
#  $1 is the object file to patch.

f_patch_objfile()
{
    objfile=$1
    
    # Several x86_64 instruction opcodes, in octal.
    x86_op_nop=$(printf \\220)
    x86_op_ret=$(printf \\303)
    x86_op_call=$(printf \\350)
    x86_op_jmp32=$(printf \\351)
    x86_op_rex_rax=$(printf \\110)
    x86_op_xor_eax_0=$(printf \\063)
    x86_op_xor_eax_1=$(printf \\300)
    
    # Figure out the file offset of the text section in the object
    # file.
    text_off=0x$(objdump -j .text -h $objfile \
                 | grep \.text | $TR -s ' ' | $CUT -d' ' -f 7)

    while read type provider name offset base base_sym; do
        # Calculate the offset of the probed point in the object file.
        # Note that the `offset' of is-enabled probes is tweaked in
        # `f_collect_probes" to point ahead the patching point.
        probe_off=$((text_off + base + offset))
        if test "$type" = "e"; then
            if test "$objbits" -eq "32"; then
                probe_off=$((probe_off - 2))
            else # 64 bits
                probe_off=$((probe_off - 3))
            fi
        fi

        # The probed point can be either a CALL instruction or a JMP
        # instruction (a tail call).  This has an impact on the
        # patching sequence.  Fetch the first byte at the probed point
        # and do the right thing.
        nopret="$x86_op_nop"
        byte=$(dd if=$objfile count=1 ibs=1 bs=1 skip=$probe_off 2> /dev/null)
        test "$byte" = "$x86_op_jmp32" && nopret="$x86_op_ret"

        # Determine the patching sequence.  It depends on the type of
        # probe at hand (regular or is-enabled) and also if
        # manipulating a 32bit or 64bit binary.
        patchseq=
        case $type in
            p) patchseq=$(printf %s%s%s%s%s \
                                 "$nopret" \
                                 "$x86_op_nop" \
                                 "$x86_op_nop" \
                                 "$x86_op_nop" \
                                 "$x86_op_nop")
               ;;
            e) test "$objbits" -eq 64 && \
                 patchseq=$(printf %s%s%s%s%s \
                                   "$x86_op_rex_rax" \
                                   "$x86_op_xor_eax_0" \
                                   "$x86_op_xor_eax_1" \
                                   "$nopret" \
                                   "$x86_op_nop")
               test "$objbits" -eq 32 && \
                 patchseq=$(printf %s%s%s%s%s \
                                   "$x86_op_xor_eax_0" \
                                   "$x86_op_xor_eax_1" \
                                   "$nopret" \
                                   "$x86_op_nop" \
                                   "$x86_op_nop")
               ;;
            *) f_panic "internal error: wrong probe type $type";;
        esac

        # Patch!
        printf %s "$patchseq" \
               | dd of=$objfile conv=notrunc count=5 ibs=1 bs=1 seek=$probe_off 2> /dev/null
    done <<EOF
$probes
EOF
    
    # Finally, we have to remove the __dtrace_* and __dtraceenabled_*
    # symbols from the object file, along with their respective
    # relocations.
    #
    # Note that the most obvious call:
    #   strip -v -N whatever -w foo.o
    # will not work:
    #   strip: not stripping symbol `whatever' because it is named in a relocation
    #
    # Fortunately using `-K !whatever' instead tricks strip to do the
    # right thing, but this is black magic and may eventually stop
    # working...
    $STRIP -K '!__dtrace_*' -w $objfile
    $STRIP -K '!__dtraceenabled_*' -w $objfile
}

# Read the input .d file and print a header file with macros to
# invoke the probes defined in it.

f_gen_header_file()
{
    guard=$(basename $ofile | $TR - _ | $CUT -d. -f1 | $TR a-z A-Z)
    printf "/*\n * Generated by pdtrace.\n */\n\n"

    printf "#ifndef _${guard}_H\n"
    printf "#define _${guard}_H\n\n"

    printf "#include <unistd.h>\n"
    printf "#include <inttypes.h>\n"
    printf \\n\\n

    printf "#ifdef __cplusplus\nextern \"C\" {\n#endif\n"

    printf "#define _DTRACE_VERSION 1\n\n"

    provider=$(cat $dfile | $EGREP "^ *provider +([a-zA-Z_]+)" \
               | $SED -E -e 's/^ *provider +([a-zA-Z]+).*/\1/')
    test -z "$provider" \
        && f_panic "unable to parse the provider name from $dfile."
    u_provider=$(printf %s "$provider" | $TR a-z A-Z | $TR -s _)
    
    cat $dfile | $EGREP "^ *probe +[a-zA-Z_]+ *\(.*\);" | \
        while read line; do
            # Extract the probe name.
            name=$(printf %s "$line" \
                   | $SED -E -e 's/^ *probe +([a-zA-Z_]+).*/\1/')
            u_name=$(printf %s "$name" | $TR a-z A-Z | $TR -s _)

            # Generate an arg1,arg2,...,argN line for the probe.
            args=""; nargs=0; aline=$(printf %s "$line" | $SED -e 's/.*(\(.*\)).*/\1/')
            set -f; IFS=,
            for arg in $aline; do
                args="${args}arg${nargs},"
                nargs=$((nargs + 1))
            done
            set +f; unset IFS
            args=${args%,}

            echo "#if _DTRACE_VERSION"
            echo ""
            
            # Emit the macros for the probe.
            echo "#define ${u_provider}_${u_name}($args) \\"
            echo "   __dtrace_${provider}___${name}($args)"
            echo "#define ${u_provider}_${u_name}_ENABLED() \\"
            echo "   __dtraceenabled_${provider}___${name}()"

            # Emit the extern definitions for the probe dummy
            # functions.
            echo ""
            printf %s\\n "$line" \
                | $SED -E -e "s/^ *probe +/extern void __dtrace_${provider}___/"
            echo "extern int __dtraceenabled_${provider}___${name}(void);"


            printf "\n#else\n"

            # Emit empty macros for the probe
            echo "#define ${u_provider}_${u_name}($args)"
            echo "#define ${u_provider}_${u_name}_ENABLED() (0)"

            printf "\n#endif /* _DTRACE_VERSION */\n"
        done

    printf "#ifdef __cplusplus\n}\n#endif\n\n"
    printf "#endif /* _${guard}_H */\n"
}

### Main program.

# Process command line arguments.

test "$#" -eq "0" && f_usage

genelf=0
genheader=0
objbits=64
ofile=
dfile=
while getopts VG3264hs:o: name; do
    case $name in
        V) f_version;;
        s) dfile="$OPTARG";
           test -f "$dfile" || f_panic "cannot read $dfile";;
        o) ofile="$OPTARG";;
        G) genelf=1;;
        h) genheader=1;;
        # Note the trick to support -32
        3) objbits=666;;
        2) test "$objbits" -eq 666 || f_usage; objbits=32;;
        # Likewise for -64
        6) objbits=777;;
        4) test "$objbits" -eq 777 || f_usage; objbits=64;;
        ?) f_usage;;
    esac
done
shift $(($OPTIND - 1))

test "$objbits" -eq "32" || test "$objbits" -eq "64" \
    || f_usage

test $((genelf + genheader)) -gt 1 && \
    { echo "Please use either -G or -h."; f_usage; }

test -n "$dfile" || { echo "Please specify a .d file with -s."; exit 2; }

if test "$genelf" -gt 0; then
    # In this mode there must be a remaining argument: the name of the
    # object file to inspect for probed points.
    test "$#" -ne "1" && f_usage
    test -f "$1" || f_panic "cannot read $1"
    objfile=$1

    # Collect probe information from the input object file and the
    # d-script.
    f_collect_probes $objfile    
    f_collect_probes_args $dfile

    # Generate the assembly code and assemble the DOF program in
    # OFILE.  Then patch OBJFILE to remove the dummy probe calls.
    f_gen_dof_program
    f_patch_objfile $objfile
fi

if test "$genheader" -gt 0; then
    test -n "$ofile" || { echo "Please specify an output file with -o."; exit 2; }
    
    # In this mode no extra arguments shall be present.
    test "$#" -ne "0" && f_usage

    f_gen_header_file > $ofile
fi

# pdtrace ends here.