mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			318 lines
		
	
	
		
			9.4 KiB
		
	
	
	
		
			Raku
		
	
	
	
	
	
			
		
		
	
	
			318 lines
		
	
	
		
			9.4 KiB
		
	
	
	
		
			Raku
		
	
	
	
	
	
| # ----------------------
 | |
| # LWP::Simple for Perl 6
 | |
| # ----------------------
 | |
| use v6;
 | |
| use MIME::Base64;
 | |
| use URI;
 | |
| 
 | |
| class LWP::Simple:auth<cosimo>:ver<0.085>;
 | |
| 
 | |
| our $VERSION = '0.085';
 | |
| 
 | |
| enum RequestType <GET POST>;
 | |
| 
 | |
| has Str $.default_encoding = 'utf-8';
 | |
| our $.class_default_encoding = 'utf-8';
 | |
| 
 | |
| # these were intended to be constant but that hit pre-compilation issue
 | |
| my Buf $crlf = Buf.new(13, 10);
 | |
| my Buf $http_header_end_marker = Buf.new(13, 10, 13, 10);
 | |
| my Int constant $default_stream_read_len = 2 * 1024;
 | |
| 
 | |
| method base64encode ($user, $pass) {
 | |
|     my MIME::Base64 $mime .= new();
 | |
|     my $encoded = $mime.encode_base64($user ~ ':' ~ $pass);
 | |
|     return $encoded;
 | |
| }
 | |
| 
 | |
| method get (Str $url) {
 | |
|     self.request_shell(RequestType::GET, $url)
 | |
| }
 | |
| 
 | |
| method post (Str $url, %headers = {}, Any $content?) {
 | |
|     self.request_shell(RequestType::POST, $url, %headers, $content)
 | |
| }
 | |
| 
 | |
| method request_shell (RequestType $rt, Str $url, %headers = {}, Any $content?) {
 | |
| 
 | |
|     return unless $url;
 | |
| 
 | |
|     my ($scheme, $hostname, $port, $path, $auth) = self.parse_url($url);
 | |
| 
 | |
|     %headers{'Connection'} = 'close';
 | |
|     %headers{'User-Agent'} //= "LWP::Simple/$VERSION Perl6/$*PERL<compiler><name>";
 | |
| 
 | |
|     if $auth {
 | |
|         $hostname = $auth<host>;
 | |
|         my $user = $auth<user>;
 | |
|         my $pass = $auth<password>;
 | |
|         my $base64enc = self.base64encode($user, $pass);
 | |
|         %headers<Authorization> = "Basic $base64enc";
 | |
|     }
 | |
| 
 | |
|     %headers<Host> = $hostname;
 | |
| 
 | |
|     if ($rt ~~ RequestType::POST && $content.defined) {
 | |
|         # Attach Content-Length header
 | |
|         # as recommended in RFC2616 section 14.3.
 | |
|         # Note: Empty content is also a content,
 | |
|         # header value equals to zero is valid.
 | |
|         %headers{'Content-Length'} = $content.encode.bytes;
 | |
|     }
 | |
| 
 | |
|     my ($status, $resp_headers, $resp_content) =
 | |
|         self.make_request($rt, $hostname, $port, $path, %headers, $content);
 | |
| 
 | |
|     given $status {
 | |
| 
 | |
|         when / 30 <[12]> / {
 | |
|             my %resp_headers = $resp_headers.hash;
 | |
|             my $new_url = %resp_headers<Location>;
 | |
|             if ! $new_url {
 | |
|                 die "Redirect $status without a new URL?";
 | |
|             }
 | |
| 
 | |
|             # Watch out for too many redirects.
 | |
|             # Need to find a way to store a class member
 | |
|             #if $redirects++ > 10 {
 | |
|             #    say "Too many redirects!";
 | |
|             #    return;
 | |
|             #}
 | |
| 
 | |
|             return self.request_shell($rt, $new_url, %headers, $content);
 | |
|         }
 | |
| 
 | |
|         when /200/ {
 | |
|             # should be fancier about charset decoding application - someday
 | |
|             if  $resp_headers<Content-Type> &&
 | |
|                 $resp_headers<Content-Type> ~~
 | |
|                     /   $<media-type>=[<-[/;]>+]
 | |
|                         [ <[/]> $<media-subtype>=[<-[;]>+] ]? /  &&
 | |
|                 (   $<media-type> eq 'text' ||
 | |
|                     (   $<media-type> eq 'application' &&
 | |
|                         $<media-subtype> ~~ /[ ecma | java ]script | json/
 | |
|                     )
 | |
|                 )
 | |
|             {
 | |
|                 my $charset = 
 | |
|                     ($resp_headers<Content-Type> ~~ /charset\=(<-[;]>*)/)[0];
 | |
|                 $charset = $charset ?? $charset.Str !!
 | |
|                     self ?? $.default_encoding !! $.class_default_encoding;
 | |
|                 return $resp_content.decode($charset);
 | |
|             }
 | |
|             else {
 | |
|                 return $resp_content;
 | |
|             }
 | |
|             
 | |
|         }
 | |
| 
 | |
|         # Response failed
 | |
|         default {
 | |
|             return;
 | |
|         }
 | |
|     }
 | |
| 
 | |
| }
 | |
| 
 | |
| method parse_chunks(Blob $b is rw, IO::Socket::INET $sock) {
 | |
|     my Int ($line_end_pos, $chunk_len, $chunk_start) = (0) xx 3;
 | |
|     my Blob $content = Blob.new();
 | |
| 
 | |
|     # smallest valid chunked line is 0CRLFCRLF (ascii or other 8bit like EBCDIC)
 | |
|     while ($line_end_pos + 5 <= $b.bytes) {
 | |
|         while ( $line_end_pos +4 <= $b.bytes  &&
 | |
|                 $b.subbuf($line_end_pos, 2) ne $crlf
 | |
|         ) {
 | |
|             $line_end_pos++
 | |
|         }
 | |
| #       say "got here x0x pos ", $line_end_pos, ' bytes ', $b.bytes, ' start ', $chunk_start, ' some data ', $b.subbuf($chunk_start, $line_end_pos +2 - $chunk_start).decode('ascii');
 | |
|         if  $line_end_pos +4 <= $b.bytes &&
 | |
|             $b.subbuf(
 | |
|                 $chunk_start, $line_end_pos + 2 - $chunk_start
 | |
|             ).decode('ascii') ~~ /^(<.xdigit>+)[";"|"\r\n"]/ 
 | |
|         {
 | |
| 
 | |
|             # deal with case of chunk_len is 0
 | |
| 
 | |
|             $chunk_len = :16($/[0].Str);
 | |
| #            say 'got chunk len ', $/[0].Str;
 | |
| 
 | |
|             # test if at end of buf??
 | |
|             if $chunk_len == 0 {
 | |
|                 # this is a "normal" exit from the routine
 | |
|                 return True, $content;
 | |
|             }
 | |
| 
 | |
|             # think 1CRLFxCRLF
 | |
|             if $line_end_pos + $chunk_len + 4 <= $b.bytes {
 | |
| #                say 'inner chunk';
 | |
|                 $content ~= $b.subbuf($line_end_pos +2, $chunk_len);
 | |
|                 $line_end_pos = $chunk_start = $line_end_pos + $chunk_len +4;
 | |
|             }
 | |
|             else {
 | |
| #                say 'last chunk';
 | |
|                 # remaining chunk part len is chunk_len with CRLF
 | |
|                 # minus the length of the chunk piece at end of buffer
 | |
|                 my $last_chunk_end_len = 
 | |
|                     $chunk_len +2 - ($b.bytes - $line_end_pos -2);
 | |
|                 $content ~= $b.subbuf($line_end_pos +2);
 | |
|                 if $last_chunk_end_len > 2  {
 | |
|                     $content ~= $sock.read($last_chunk_end_len -2);
 | |
|                 }
 | |
|                 # clean up CRLF after chunk
 | |
|                 $sock.read(min($last_chunk_end_len, 2));
 | |
| 
 | |
|                 # this is a` "normal" exit from the routine
 | |
|                 return False, $content;
 | |
|             }
 | |
|         }
 | |
|         else {
 | |
| #            say 'extend bytes ', $b.bytes, ' start ', $chunk_start, ' data ', $b.subbuf($chunk_start).decode('ascii');
 | |
|             # maybe odd case of buffer has just part of header at end
 | |
|             $b ~= $sock.read(20);
 | |
|         }
 | |
|     }
 | |
| 
 | |
| #    say join ' ', $b[0 .. 100];
 | |
| #    say $b.subbuf(0, 100).decode('utf-8');
 | |
|     die "Could not parse chunk header";
 | |
| }
 | |
| 
 | |
| method make_request (
 | |
|     RequestType $rt, $host, $port as Int, $path, %headers, $content?
 | |
| ) {
 | |
| 
 | |
|     my $headers = self.stringify_headers(%headers);
 | |
| 
 | |
|     my IO::Socket::INET $sock .= new(:$host, :$port);
 | |
|     my Str $req_str = $rt.Stringy ~ " {$path} HTTP/1.1\r\n"
 | |
|         ~ $headers
 | |
|         ~ "\r\n";
 | |
| 
 | |
|     # attach $content if given
 | |
|     # (string context is forced by concatenation)
 | |
|     $req_str ~= $content if $content.defined;
 | |
| 
 | |
|     $sock.send($req_str);
 | |
| 
 | |
|     my Blob $resp = $sock.read($default_stream_read_len);
 | |
| 
 | |
|     my ($status, $resp_headers, $resp_content) = self.parse_response($resp);
 | |
| 
 | |
| 
 | |
|     if (($resp_headers<Transfer-Encoding> || '') eq 'chunked') {
 | |
|         my Bool $is_last_chunk;
 | |
|         my Blob $resp_content_chunk;
 | |
| 
 | |
|         ($is_last_chunk, $resp_content) =
 | |
|             self.parse_chunks($resp_content, $sock);
 | |
|         while (not $is_last_chunk) {
 | |
|             ($is_last_chunk, $resp_content_chunk) =
 | |
|                 self.parse_chunks(
 | |
|                     my Blob $next_chunk_start = $sock.read(1024),
 | |
|                     $sock
 | |
|             );
 | |
|             $resp_content ~= $resp_content_chunk;
 | |
|         }
 | |
|     }
 | |
|     elsif ( $resp_headers<Content-Length>   &&
 | |
|             $resp_content.bytes < $resp_headers<Content-Length>
 | |
|     ) {
 | |
|         $resp_content ~= $sock.read(
 | |
|             $resp_headers<Content-Length> - $resp_content.bytes
 | |
|         );
 | |
|     }
 | |
|     else { # a bit hacky for now but should be ok
 | |
|         while ($resp.bytes > 0) {
 | |
|             $resp = $sock.read($default_stream_read_len);
 | |
|             $resp_content ~= $resp;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     $sock.close();
 | |
| 
 | |
|     return ($status, $resp_headers, $resp_content);
 | |
| }
 | |
| 
 | |
| method parse_response (Blob $resp) {
 | |
| 
 | |
|     my %header;
 | |
| 
 | |
|     my Int $header_end_pos = 0;
 | |
|     while ( $header_end_pos < $resp.bytes &&
 | |
|             $http_header_end_marker ne $resp.subbuf($header_end_pos, 4)  ) {
 | |
|         $header_end_pos++;
 | |
|     }
 | |
| 
 | |
|     if ($header_end_pos < $resp.bytes) {
 | |
|         my @header_lines = $resp.subbuf(
 | |
|             0, $header_end_pos
 | |
|         ).decode('ascii').split(/\r\n/);
 | |
|         my Str $status_line = @header_lines.shift;
 | |
| 
 | |
|         for @header_lines {
 | |
|             my ($name, $value) = .split(': ');
 | |
|             %header{$name} = $value;
 | |
|         }
 | |
|         return $status_line, %header.item, $resp.subbuf($header_end_pos +4).item;
 | |
|     }
 | |
| 
 | |
|     die "could not parse headers";
 | |
| #    if %header.exists('Transfer-Encoding') && %header<Transfer-Encoding> ~~ m/:i chunked/ {
 | |
| #        @content = self.decode_chunked(@content);
 | |
| #    }
 | |
| 
 | |
| }
 | |
| 
 | |
| method getprint (Str $url) {
 | |
|     my $out = self.get($url);
 | |
|     if $out ~~ Buf { $*OUT.write($out) } else { say $out }
 | |
| }
 | |
| 
 | |
| method getstore (Str $url, Str $filename) {
 | |
|     return unless defined $url;
 | |
| 
 | |
|     my $content = self.get($url);
 | |
|     if ! $content {
 | |
|         return
 | |
|     }
 | |
| 
 | |
|     my $fh = open($filename, :bin, :w);
 | |
|     if $content ~~ Buf {
 | |
|         $fh.write($content)
 | |
|     }
 | |
|     else {
 | |
|         $fh.print($content)
 | |
|     }
 | |
| 
 | |
|     $fh.close; 
 | |
| }
 | |
| 
 | |
| method parse_url (Str $url) {
 | |
|     my URI $u .= new($url);
 | |
|     my $path = $u.path_query;
 | |
|     
 | |
|     my $user_info = $u.grammar.parse_result<URI_reference><URI><hier_part><authority><userinfo>;
 | |
|     
 | |
|     return (
 | |
|         $u.scheme, 
 | |
|         $user_info ?? "{$user_info}@{$u.host}" !! $u.host, 
 | |
|         $u.port, 
 | |
|         $path eq '' ?? '/' !! $path,
 | |
|         $user_info ?? {
 | |
|             host => $u.host,
 | |
|             user => ~ $user_info<likely_userinfo_component>[0],
 | |
|             password => ~ $user_info<likely_userinfo_component>[1]
 | |
|         } !! Nil
 | |
|     );    
 | |
| }
 | |
| 
 | |
| method stringify_headers (%headers) {
 | |
|     my Str $str = '';
 | |
|     for sort %headers.keys {
 | |
|         $str ~= $_ ~ ': ' ~ %headers{$_} ~ "\r\n";
 | |
|     }
 | |
|     return $str;
 | |
| }
 | |
| 
 |