# Tcl support for netstrings http://cr.yp.to/proto/netstrings.txt #JMN 2008-03 Released under a BSD style license. #Please send comments & bug-reports to julian@precisium.com # Contributions also welcome if accompanied by a statement indicating the contribution also falls under a BSD style license. (ie same license as Tcl) #2008-06 - Ver0.1.1 - fixed echo_server package provide netstring [namespace eval netstring { variable version set maxlengthchars 0 ;#default to no limit to comply with spec. #in practice - a dos/resource-hogging attack is possible if the netstring::gets mechanism is being used to set version 0.1.1 }] proc ::netstring::encode {args} { set netstring_sequence "" foreach string $args { append netstring_sequence [string length $string]:$string, } return $netstring_sequence } #::netstring::decode_to_list - main decode function # #This is not named simply 'decode' because of the extra brackets and non-invertability of encode/decode_to_list caused when encoding a single string containing spaces. # #decodes an outer sequence of one or more netstrings, producing a list of payloads as the result. #note that decode will not recursively decode any nested netstrings. #This is intentional - it is the job of the caller to decide how to handle the contents. proc ::netstring::decode_to_list {netstring_sequence} { set tail $netstring_sequence set decoded [list] set posn 0 ;#position of current netstring in original sequence. while {[string length $tail]} { set colon [string first : $tail] #::puts "->tail: $tail colon @ $colon" if {$colon < 1} { error "invalid netstring in sequence at position $posn - expected length followed by colon" } set length [string range $tail 0 $colon-1] if {![string is integer $length]} { error "invalid netstring in sequence at position $posn - invalid length '$length'" } #leading zeroes are prohibited in the 'length' #we could detect that here and raise an error - but it hardly seems worthwhile(?) set pay [string range $tail $colon+1 $colon+$length] ;#an outer netstring payload if {[string length $pay] != $length} { error "invalid netstring in sequence at position $posn - payload only [string length $pay] bytes, expected $length" } lappend decoded $pay set next [expr {$colon + $length + 2}] if {[string range $tail $next-1 $next-1] ne ","} { error "invalid netstring in sequence at position $posn - missing trailing comma" } incr posn $next set tail [string range $tail $next end] } #we always return a list - even if only a single netstring was in the sequence. #This allows the use of llength on the result to determine how many netstrings were in the input sequence. return $decoded } #netstring::split_partial - used to determine whether or not a string holds a complete sequence of netstrings, # and to split at the boundary of such sequence and the trailing partial netstring. # # combined with 'chan read' - this is the more efficient way to retrieve sequences of netstrings from a channel. (netstring::gets is convenient but not optimal) # #return a 2 element list containing 1) a sequence of complete netstrings 2) a trailing partial netstring. # raise an error if the data following the last complete netstring can't possibly form the beginnings of a valid netstring. # may return empty string for either or both elements. (will return empty string for both if & only if input string is empty) proc ::netstring::split_partial {possibly_incomplete_sequence} { set tail $possibly_incomplete_sequence set complete "" set incomplete "" set posn 0 ;#position of current netstring in original sequence. while {[string length $tail]} { set colon [string first : $tail] if {$colon < 1} { if {![string is integer $tail]} { error "invalid netstring in sequence at position $posn - cannot form part of valid netstring header" } #we seem to have only the length (or part thereof) for the next netstring return [list $complete $tail] } set length [string range $tail 0 $colon-1] if {![string is integer $length]} { error "invalid netstring in sequence at position $posn - invalid length '$length'" } set next [expr {$colon + $length + 2}] if {[string length $tail] < $next} { #partial return [list $complete $tail] } else { if {[string range $tail $next-1 $next-1] eq {,}} { append complete [string range $tail 0 $next-1] } else { error "invalid netstring in sequence at position $posn - missing trailing comma" } } incr posn $next set tail [string range $tail $next end] } return [list $complete $tail] } #::netstring::decode #decode_to_list is the preferred way to decode a sequence of netstrings, even of length 1. #decode is provided to ensure the invertability of simple encode/decode operations # in order to satisfy the principle of least surprise for users who may not expect the extra braces around a decoded item that was originally a single string containing spaces. # proc ::netstring::decode {single_netstring} { set tail $single_netstring set colon [string first : $tail] #::puts "->tail: $tail colon @ $colon" if {$colon < 1} { error "invalid netstring - expected length followed by colon" } set length [string range $tail 0 $colon-1] if {![string is integer $length]} { error "invalid netstring in sequence at position $posn - invalid length '$length'" } set pay [string range $tail $colon+1 $colon+$length] ;#an outer netstring payload if {[string length $pay] != $length} { error "invalid netstring in sequence at position $posn - payload only [string length $pay] bytes, expected $length" } set next [expr {$colon + $length + 2}] if {[string range $tail $next-1 $next-1] ne ","} { error "invalid netstring in sequence at position $posn - missing trailing comma" } set tail [string range $tail $next end] if {[string length $tail]} { error "'decode' failed because there is data following the supplied netstring. If this is a sequence of netstrings, use 'decode_to_list' instead." } return $pay } #- for small inputs at least, appears to be slower than even the standard sequence decoder (decode_to_list) above. #proc ::netstring::decode_single {data} { # if {![regexp {\A(0|[1-9]\d*):(.*),} $data _match header payload]} { # error "Invalid netstring" # } else { # if {$header ne [string length $payload]} { # error "netstring bytecount mismatch. Invalid netstring, or data is actually a sequence of netstrings. try 'netstring::decode'" # } # return $payload # } #} #use filevent readable to get the next complete netstring from the channel? #netstring::gets #blocking(*) read of the next complete netstring from a channel - returns *payload* only. #Note that netstring::gets is not optimally efficient in that it performs more read operations than strictly necessary. # (it is designed not to consume data beyond the netstring which is assumed to start at the current point in the channel) # # The preferred mechanism for reading netstrings from a channel is to use the standard 'chan read' function, and # #raise an error if eof occurs before the end of a netstring is reached. #raise an error if the data is not a valid netstring # # * !note! Use of netstring::gets on non-blocking channels is unreliable. # In non-blocking mode we don't really want to consume data from the channel if a complete netstring is not available. # currently - the partial netstring is removed - this means gets will fail intermittently.. ie whenever we don't have a complete netstring in the buffer. # !todo - fix (but how?) (also, certain channels e.g Memchan's fifo don't seem to be able to go into blocking mode.) # we could fake it by buffering the partial data in a global array keyed on socket # - but this makes the assumption that netstring::gets is the only mechanism being used to read this channel. # proc ::netstring::gets {chan {varname {}}} { variable maxlengthchars ;#default 0 follows spec - but allows more resource-intensive dos attacks. set length "" set pay "" set bytes_read 0 while {true} { set char [read $chan 1] if {![string length $char]} { error "data unavailable on channel $chan. eof:[eof $chan]" } incr bytes_read if {$char in {0 1 2 3 4 5 6 7 8 9}} { append length $char #guard against leading zeroes? But why spend resources doing this ... # If evil client wants to stall/resource-hog us and keep us in single-byte read mode they could just send a continuous sequence of any numeral anyway. # According to the spec, "there are no restricitons on length". # #if {[string length $length] > 1) && [string match "0*" $length]} { # error "bad netstring header on channel $chan. Leading zeroes are not permitted" #} if {$maxlengthchars > 0} { #maxlength is the maximum number of chars in the length field (so we need only check string length for each new numeral - not shimmer to integer each time) if {[string length $length] > $maxlengthchars} { error "Excessive number of characters in the length field. (netstring::maxlengthchars configured as: $maxlengthchars )" } } } elseif {$char eq {:}} { #end of netstring header. set got [read $chan [expr {$length + 1}]] ;#read in trailing comma too. incr bytes_read [string length $got] set pay [string range $got 0 end-1] ;#actual payload if {([string length $pay]) != $length} { error "failed to retrieve $length bytes from channel. Received only [string length $pay]" } if {[string range $got end end] ne {,}} { error "Invalid netstring. Not terminated with comma. Found '[string range $got end end]' instead" } #!note - we don't support a varname that happens to be the empty string! if {[string length $varname]} { upvar 1 $varname output set output $pay return [string length $pay] ;#we return count of payload bytes read - not count of bytes read from the channel. } else { return $pay } } else { #bad netstring header #consume all data and include in error.. # - otherwise we'd be raising an error for every char whilst out of sync. # - there's no guarantee that even if the next chunk of data is a valid netstring that we are really back in sync. # - the prudent thing to do would probably be for the caller of netstring::gets to close the connection. set blocking [fconfigure $chan -blocking] if {$blocking} { fconfigure $chan -blocking 0 set junk [read $chan] fconfigure $chan -blocking 1 } else { set junk [read $chan] } error "Invalid netstring header. received character: '$char' (followed by [string length $junk] bytes discarded)" } } #error "eof on channel before end of netstring" } #wrap the supplied data as a netstring and pump it over the channel immediately. proc ::netstring::puts {chan rawdata} { ::puts -nonewline $chan [string length $rawdata]:$rawdata, flush $chan } #same as netstring::puts - but without implicit flush. will buffer as per the configured buffering for the channel. proc ::netstring::putsbuffered {chan rawdata} { ::puts -nonewline $chan [string length $rawdata]:$rawdata, } #!todo? copy from chan to chan.. breaking into netstrings at some reasonable chunk size? #----------------------------------------------------------------------------- namespace eval ::netstring::int { #non-api functions } proc ::netstring::int::connection {readtype socket host port} { ::puts stdout "Got connection -> $socket $host $port" fconfigure $socket -buffering none -blocking 0 -buffersize 0 fileevent $socket readable [list ::netstring::int::handle_request_with_$readtype $socket {}] } proc ::netstring::int::handle_request_with_gets {socket prevdata} { chan configure $socket -blocking 1 if {![catch {::netstring::gets $socket} result]} { ::puts stdout "--> got $result" if {$result eq "shutdown"} { ::puts stdout ">>CLOSING SOCKET $socket" netstring::puts $socket [list ok server_shutdown] close $socket set ::netstring::int::serving 0 return } else { netstring::puts $socket [list ok received_netstring [netstring::encode $result]] } } else { ::puts stderr "!!! error $result" netstring::puts $socket [list err received_junk $result] } flush $socket } proc ::netstring::int::handle_request_with_read {socket prevdata} { #should attempt to read in some chunk size related to the network buffer size # rather than just whatever length netstrings are thrown at us (better interleaving if handling multiple sockets concurrently?) set readchunk_size 8192 if {[catch {set got [read $socket $readchunk_size] } errMsg]} { ::puts stdout "error reading socket: $errMsg" catch {close socket} } ::puts "--- read [string length $got] bytes ---" if {([string length $got] == 0) && [eof $socket]} { ::puts stdout "socket '$socket' disconnected" close $socket return } if {![catch {lassign [netstring::split_partial $prevdata$got] complete_sequence partial} errMsg]} { #successfully split # respond to the complete netstrings set response_sequence "" set seq_len 0 foreach ns [::netstring::decode_to_list $complete_sequence] { ::puts stdout "--> got $ns" incr seq_len if {$ns eq "shutdown"} { ::puts stdout ">>CLOSING SOCKET $socket" netstring::puts $socket [list ok server_shutdown] close $socket set ::netstring::int::serving 0 return } else { append response_sequence [netstring::encode [list ok received_netstring [netstring::encode $ns]]] } } puts stdout ">> writing back $response_sequence ($seq_len items)" puts -nonewline $socket $response_sequence flush $socket if {[string length $partial]} { #partial will be prepended to the next read. fileevent $socket readable [list ::netstring::int::handle_request_with_read $socket $partial] } else { fileevent $socket readable [list ::netstring::int::handle_request_with_read $socket {}] } } else { #!todo - determine if there were any complete netstrings prior to the error - and respond to those ones. ::puts stderr "!!! error $errMsg" netstring::puts $socket [list err received_junk $errMsg] fileevent $socket readable [list ::netstring::int::handle_request_with_read $socket {}] } flush $socket } #simple netstring echo test server # return XX:ok received_netstring:, # or # return XX:err received_junk:, # # usage e.g # In shell 1: # >package require netstring # >netstring::echo_server # # In shell 2: # >package require netstring # >set sock [netstring::echo_client] # >netstring::puts $sock test1 # >netstring::puts $sock test2 # >netstring::puts $sock test3 # >netstring::puts $sock test4 # # >netstring::gets $sock # ok received_netstring 5:test1, # >netstring::gets $sock # ok received_netstring 5:test2, # # #or read all the available data in the standard way to see the raw netstrings sent by the server. # >read $sock # 30:ok received_netstring 5:test3,,30:ok received_netstring 5:test4,, # # proc ::netstring::echo_server {args} { #dict set default -port 0 ;#auto-pick a free port dict set default -port 4242 dict set default -readtype read ;#read|gets set opts [dict merge $default $args] set s [socket -server [list ::netstring::int::connection [dict get $opts -readtype]] [dict get $opts -port]] set ::netstring::int::serving [lindex [fconfigure $s -sockname] 2] ::puts stdout "listening on port $::netstring::int::serving" vwait ::netstring::int::serving } proc ::netstring::echo_client {args} { dict set default -port 4242 dict set default -host localhost set opts [dict merge $default $args] set con [socket [dict get $opts -host] [dict get $opts -port]] fconfigure $con -blocking 0 return $con } #A convenient way to test with larger chunks of data. proc ::netstring::echo_file {socket fname} { set size [file size $fname] set fd [open $fname r] chan configure $fd -translation binary chan puts -nonewline $socket ${size}: chan copy $fd $socket chan puts -nonewline $socket , flush $socket }