;;; hostinfo v1.0 extension to Chicken Scheme
;;; Description: Look up host, service, and protocol information

;; $Id: hostinfo.scm,v 1.22 2005/06/08 21:19:57 zb Exp zb $

;; Copyright (c) 2005, Zbigniew Szadkowski.  All rights reserved.
;;
;; Redistribution and use in source and binary forms, with or without
;; modification, are permitted provided that the following conditions are met:
;;
;;   Redistributions of source code must retain the above copyright notice,
;;   this list of conditions and the following disclaimer. Redistributions in
;;   binary form must reproduce the above copyright notice, this list of
;;   conditions and the following disclaimer in the documentation and/or
;;   other materials provided with the distribution. Neither the name of the
;;   author nor the names of its contributors may be used to endorse or
;;   promote products derived from this software without specific prior
;;   written permission.
;;
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
;; THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
;; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
;; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

;;;

;; This extension performs host, protocol and service information lookups
;; via underlying calls to gethostbyname(3), getprotobyname(3), and
;; getservbyname(3).  Depending on your system, this may consult DNS,
;; NIS, /etc/hosts, /etc/services, /etc/protocols, and so on.

;; A simple interface is provided for the most commmon queries. Also
;; provided is a more comprehensive interface using records, which
;; contain all data available in a lookup.

;; IP addresses are represented by 4 (IPv4) or 16 (IPv6) byte
;; u8vectors. The interface requires, and returns, addresses in this
;; format; functions are provided to convert between the string and
;; u8vector representations. However, the "do what I want" procedures
;; (e.g. host-information) will do the conversion for you.

;; Caveats:
;; - IPv6 addresses can be converted to and from strings, and the underlying structure
;;   supports IPv6, but lookup of IPv6 addresses and records is not currently implemented.
;; - array0->string-vector and array0->bytevector-vector contain redundant code.
;; - host, services, and protocol-information check their argument types, even
;;   though the underlying code already does it.

(declare
 (uses srfi-4 lolevel)
 (fixnum)
 (export
;;; Short and sweet lookups
  hostname->ip ip->hostname 
  protocol-name->number protocol-number->name  
  service-port->name service-name->port
;;; Entire host, protocol or service record lookup
  hostname->hostinfo ip->hostinfo
  protocol-name->protoinfo protocol-number->protoinfo 
  service-port->servinfo service-name->servinfo
;;; Record accessors and predicates
  hostinfo? hostinfo-name hostinfo-aliases hostinfo-addresses 
            hostinfo-address hostinfo-type hostinfo-length 
  protoinfo? protoinfo-name protoinfo-aliases protoinfo-number 
  servinfo? servinfo-name servinfo-aliases servinfo-port servinfo-protocol 
;;; One-stop shops -- does what you want
  host-information protocol-information service-information
;;; Utilities
  string->ip ip->string))

(require-extension vector-lib) ;; hostent*-addresses uses vector-map.

(cond-expand [paranoia]
             [else
              (declare (no-bound-checks))])

(cond-expand [unsafe
              (eval-when (compile)
                (define-macro (##sys#check-string . _) '(##core#undefined)))]
             [else])

#>
# include <netinet/in.h>  /* in_addr */
# include <arpa/inet.h>   /* inet_ntop, ... */
# include <netdb.h>       /* hostent, gethostby* */
<#

;;; C data structure conversions

(define (c-pointer->byte-vector ptr len)
  (let ((bv (make-byte-vector len))
        (memcpy (foreign-lambda bool "C_memcpy" byte-vector c-pointer integer)))
    (memcpy bv ptr len)
    bv))

;; Convert from null-terminated array of c-strings to vector of strings.
;; These functions use C_alloc and so are not suitable for large datasets.
;; Note: get_argv_2 of runtime.c shows how to build a list instead of a vector (in reverse).
(define array0->string-vector
  (foreign-primitive scheme-object (((pointer "char *") list)) #<<EOF
    char **p; int len = 0;
    C_word *a, vec, *elt;

    for (p = list; *p; ++p, ++len);

    a = C_alloc(C_SIZEOF_VECTOR(len));
    vec = (C_word)a;
    *a++ = C_make_header(C_VECTOR_TYPE, len);
    
    for (p = list; *p; ++p) {
       len = strlen(*p);
       elt = C_alloc(C_SIZEOF_STRING(len));
       /* Both C_mutate and *a++ =  seem to work fine here. */
       C_mutate(a++, C_string(&elt, len, *p));
    }
    return(vec);
EOF
))
  
;; Convert from null-terminated array of IP addresses to vector of strings.
(define array0->bytevector-vector
  (foreign-primitive scheme-object (((pointer "char *") list) (integer addrlen)) #<<EOF
    char **p; int len = 0;
    C_word *a, vec, *elt;

    for (p = list; *p; ++p, ++len);

    a = C_alloc(C_SIZEOF_VECTOR(len));
    vec = (C_word)a;
    *a++ = C_make_header(C_VECTOR_TYPE, len);
    
    for (p = list; *p; ++p) {
       elt = C_alloc(C_SIZEOF_STRING(addrlen));
       C_mutate(a++, C_bytevector(&elt, addrlen, *p));
    }
    return(vec);
EOF
))
  
#| Not currently used.  Could make the array0-> stuff somewhat cleaner.
(define array0-length
  (foreign-lambda* integer (((pointer "void *") list)) #<<EOF
    void **p; int len = 0;
    for (p = list; *p; ++p, ++len);
    return(len);
EOF
))
|#

;;; string->ip conversion

;; inet_pton does not like "127.1", nor "0", nor any other non-standard
;; representation of IP addresses.  This is specified by RFC2553.
;; inet_aton resolves these addresses.  We use inet_pton here.

(define-foreign-variable inet4-addrstrlen integer "INET_ADDRSTRLEN")
(define-foreign-variable inet6-addrstrlen integer "INET6_ADDRSTRLEN")
(define-foreign-variable af-inet  integer "AF_INET")
(define-foreign-variable af-inet6 integer "AF_INET6")

(define inet-ntop (foreign-lambda c-string "inet_ntop" integer u8vector c-string integer))
(define inet-pton (foreign-lambda* bool ((integer type) (c-string src) (byte-vector dest))
                    "return(inet_pton(type, src, dest) == 1);"))

(define (string->ip4 str)
  (##sys#check-string str 'string->ip4)
  (let ((bv (make-byte-vector 4)))
    (and (inet-pton af-inet str bv)
         (byte-vector->u8vector bv))))

(define (string->ip6 str)
  (##sys#check-string str 'string->ip6)
  (let ((bv (make-byte-vector 16)))
    (and (inet-pton af-inet6 str bv)
         (byte-vector->u8vector bv))))

(define (string->ip str)
  (or (string->ip4 str)
      (string->ip6 str)))

;;; ip->string conversion

(define (ip4->string addr)
  (let ((len inet4-addrstrlen))
    (inet-ntop af-inet addr (make-string len) len)))

(define (ip6->string addr)
  (let ((len inet6-addrstrlen))
    (inet-ntop af-inet6 addr (make-string len) len)))
  
;; Take an IPv4 or IPv6 u8vector and convert it into the
;; appropriate string representation, via inet_ntop.
(define (ip->string addr)
  (let ((len (u8vector-length addr)))
    (cond ((fx= len 4)  (ip4->string addr))
          ((fx= len 16) (ip6->string addr))
          (else
           (error "Invalid IP address length" addr)))))

;;; hostent* raw structure

(define-foreign-record (hostent* "struct hostent")
  (c-string h_name)        ;; Using const here generates incorrect code; bug?
  (c-pointer h_aliases)
  (integer h_addrtype)
  (integer h_length)
  (c-pointer h_addr_list))

;; The IPv6 equivalents of these are getipnodebyname and
;; getipnodebyaddr.
(define gethostent/name (foreign-lambda hostent* "gethostbyname" c-string))

(define (gethostent/addr addr)
  (if (fx= (u8vector-length addr) 4)
      (gethostent/addr/bv (u8vector->byte-vector addr))
      (error "invalid IP address length; only IPv4 supported" addr)))

;; Warning: handle IPv6!!
(define gethostent/addr/bv (foreign-lambda* hostent* ((byte-vector addr))
        "return(gethostbyaddr((const char *)addr, 4, AF_INET));"))

;; Some convenient accessors for the raw hostent* structure--with raw c pointers
;; converted to the appropriate scheme objects.  We only use these once or twice
;; below, so their main advantage is clarity.
(define hostent*-name hostent*-h_name)
(define hostent*-type hostent*-h_addrtype)
(define hostent*-length hostent*-h_length)
(define (hostent*-addresses h)
  (vector-map (lambda (i x) (byte-vector->u8vector x))
              (array0->bytevector-vector (hostent*-h_addr_list h)
                                         (hostent*-length h))))
(define (hostent*-aliases h)
  (array0->string-vector (hostent*-h_aliases h)))

(define (hostent*-address h)
  (let* ((get-addr (foreign-lambda* c-pointer ((hostent* h)) "return(h->h_addr_list[0]);"))
         (addr (get-addr h)))
    (byte-vector->u8vector
     (c-pointer->byte-vector addr (hostent*-h_length h)))))

;; This was originally made a macro so we could easily return multiple
;; values -- but we're now returning a hostinfo structure.  Eh.
(define-macro (hostent*->hostinfo h)
  `(make-hostinfo (hostent*-name ,h)
                 (hostent*-addresses ,h)
                 (hostent*-aliases ,h)))

;;; hostinfo and host information

;; Structure accessors created by define-foreign-record do not intercept
;; NULL pointer input, including #f.
(define (hostname->ip host)
  (and-let* ((h (gethostent/name host)))
    (hostent*-address h)))

(define (hostname->hostinfo host)
  (and-let* ((h (gethostent/name host)))
    (hostent*->hostinfo h)))

(define (ip->hostname addr)
  (and-let* ((h (gethostent/addr addr)))
    (hostent*-name h)))

(define (ip->hostinfo addr)
  (and-let* ((h (gethostent/addr addr)))
    (hostent*->hostinfo h)))

;; A simple hostinfo structure.
(define-record hostinfo name addresses aliases)

;; "Accessors" for phantom fields.
;; We don't need to store length or type, as these are artifacts
;; of the C implementation, and can be derived from the address itself.
(define (hostinfo-address h) (vector-ref (hostinfo-addresses h) 0))
(define (hostinfo-length h) (u8vector-length (hostinfo-address h)))
(define (hostinfo-type h)
  (let ((len (u8vector-length (hostinfo-address h))))
    (cond ((fx= len 4)  'AF_INET)   ;; Kind of a dummy implementation--
          ((fx= len 16) 'AF_INET6)  ;; not sure what value would be appropriate
          (else
           (error "Invalid IP address length" (hostinfo-address h))))))

;; Format the structure for easy interactive viewing--should be possible to
;; add a ctor for this representation, though it's not clear why you'd want to.
(define-record-printer (hostinfo h port)
  (fprintf port "#,(hostinfo name: ~S addresses: ~S aliases: ~S)"
           (hostinfo-name h) (hostinfo-addresses h) (hostinfo-aliases h)))

;; Warning:  lookup of an IP address which is invalid yet numeric will
;; return a false positive.  Bug in gethostbyname?
;; E.g.  (hostname->hostinfo "1") => #,(hostinfo name: "1" addresses: (#u8(0 0 0 1)))
;; ** If we used inet_aton for string->ip, then these cases would
;; be transformed into u8vector IPs, and the lookup would correctly fail.

;; Return a hostinfo record.  HOST is a u8vector IP address, a string
;; hostname, or a string numeric IP address.
(define (host-information host)
  (if (u8vector? host)
      (ip->hostinfo host)
      (begin
        (##sys#check-string host 'host-information)
        (cond ((string->ip host) => ip->hostinfo)
              (else (hostname->hostinfo host))))))

;;; protocols

(define-foreign-record (protoent* "struct protoent")
  (c-string p_name)
  (c-pointer p_aliases)
  (integer p_proto))

(define getprotoent/name   (foreign-lambda protoent* "getprotobyname" c-string))
(define getprotoent/number (foreign-lambda protoent* "getprotobynumber" integer))

;; Raw structure -> scheme-object accessors
(define protoent*-name protoent*-p_name)
(define protoent*-proto protoent*-p_proto)
(define (protoent*-aliases p)
  (array0->string-vector (protoent*-p_aliases p)))

(define-record protoinfo name number aliases)
(define-record-printer (protoinfo p port)
  (fprintf port "#,(protoinfo name: ~S number: ~S aliases: ~S)"
           (protoinfo-name p) (protoinfo-number p) (protoinfo-aliases p)))

(define (protocol-name->number name)
  (and-let* ((p (getprotoent/name name)))
    (protoent*-proto p)))
(define (protocol-number->name nr)
  (and-let* ((p (getprotoent/number nr)))
    (protoent*-name p)))

(define-macro (protoent*-values p)
  `(make-protoinfo (protoent*-name ,p)
                  (protoent*-proto ,p)
                  (protoent*-aliases ,p)))

(define (protocol-name->protoinfo name)
  (and-let* ((p (getprotoent/name name)))
    (protoent*-values p)))
(define (protocol-number->protoinfo nr)
  (and-let* ((p (getprotoent/number nr)))
    (protoent*-values p)))

(define (protocol-information proto)
  (if (fixnum? proto)
      (protocol-number->protoinfo proto)
      (begin
        (##sys#check-string proto 'protocol-information)
        (protocol-name->protoinfo proto))))

;;; services

(define-foreign-type port-number int
  (foreign-lambda int "htons" int)
  (foreign-lambda int "ntohs" int) )

(define-foreign-record (servent* "struct servent")
  (c-string s_name)
  (c-pointer s_aliases)
  (port-number s_port)
  (c-string s_proto))

(define-macro (servent*-values s)
  `(make-servinfo (servent*-s_name ,s)
                 (servent*-s_port ,s)
                 (array0->string-vector (servent*-s_aliases ,s))
                 (servent*-s_proto ,s)))

(define getservent/name (foreign-lambda servent* "getservbyname" c-string c-string))
(define getservent/port (foreign-lambda servent* "getservbyport" port-number c-string))

(define-record servinfo name port aliases protocol)
(define-record-printer (servinfo s port)
  (fprintf port "#,(servinfo name: ~S port: ~S aliases: ~S protocol: ~S)"
           (servinfo-name s) (servinfo-port s) (servinfo-aliases s) (servinfo-protocol s)))

;; If provided with the optional protocol argument (a string), these will
;; restrict their search to that protocol.
(define (service-name->port name . pr)
  (let-optionals pr ((proto #f))
    (and-let* ((s (getservent/name name proto)))
      (servent*-s_port s))))
(define (service-port->name port . pr)
  (let-optionals pr ((proto #f))
    (and-let* ((s (getservent/port port proto)))
      (servent*-s_name s))))
(define (service-name->servinfo name . pr)
  (let-optionals pr ((proto #f))
    (and-let* ((s (getservent/name name proto)))
      (servent*-values s))))
(define (service-port->servinfo port . pr)
  (let-optionals pr ((proto #f))
    (and-let* ((s (getservent/port port proto)))
      (servent*-values s))))

;; Return service information given a service name or port, and an
;; optional protocol name or number to restrict the search to.
;; Note: if the protocol-number->name lookup fails,
;; an error is thrown, as this was probably not intended.
(define (service-information service . pr)
  (let-optionals pr ((proto #f))
    (let ((proto (if (fixnum? proto)
                     (or (protocol-number->name proto)
                         (error 'service-information "illegal protocol number" proto))
                     proto)))
      (if (fixnum? service)
          (service-port->servinfo service proto)
          (begin
            (##sys#check-string service 'service-information)
            (service-name->servinfo service proto))))))

;;; Tests

(when #f
  (host-information "www.call-with-current-continuation.org")
  (host-information '#u8(194 97 107 133))
  (host-information "194.97.107.133")
      ; =>  #,(hostinfo name: "www003.lifemedien.de" addresses: #(#u8(194 97 107 133))
      ;        aliases: #("www.call-with-current-continuation.org"))
  (ip->hostname '#u8(194 97 107 133))   ; "www003.lifemedien.de"
  (string->ip "0708::0901")             ;  #u8(7 8 0 0 0 0 0 0 0 0 0 0 0 0 9 1)
  (ip->string '#u8(127 0 0 1))          ;  "127.0.0.1"
  (hostinfo-aliases
   (hostname->hostinfo
    (ip->hostname (hostname->ip
                   (hostinfo-name
                    (host-information "www.call-with-current-continuation.org"))))))
                                        ; => #("www.call-with-current-continuation.org")

  (protocol-information 17)             ; => #,(protoinfo name: "udp" number: 17 aliases: #("UDP"))
  (protoinfo-name (protocol-information 2))   ; => "igmp"
  (protoinfo-aliases (protocol-name->protoinfo
                      (protocol-number->name
                       (protoinfo-number
                        (protocol-information "ospf")))))  ; => #("OSPFIGP")
  (protocol-name->number "OSPFIGP")     ; 89 (you can look up aliases, too)
  
  (servinfo-protocol (service-name->servinfo
                      (service-port->name
                       (servinfo-port (service-information "ssh")))))  ; => "udp" (yes, really)
  (service-information "ssh" "tcp") ; => #,(servinfo name: "ssh" port: 22 aliases: #() protocol: "tcp")
  (service-information "ssh" "tco") ; => #f
  (service-information 512 "tcp")   ; #,(servinfo name: "exec" port: 512 aliases: #() protocol: "tcp")
  (service-information 512 "udp")   ; #,(servinfo name: "comsat" port: 512 aliases: #("biff") protocol: "udp")
  (service-information 512 17)      ; same as previous
  (service-information 512 170000)  ; Error: (service-information) illegal protocol number: 170000
)
