Show currency symbols in convert menu

4 years ago · bea2b1257c
parent d0fb19086f
commit bea2b1257c
310 changed files with 24159 additions and 89522 deletions
--- a/cointop/conversion.go
+++ b/cointop/conversion.go
@ -166,6 +166,7 @@ func (ct *Cointop) UpdateConvertMenu() {
 	currencies := ct.SupportedCurrencyConversions()
 	for i, key := range keys {
 		currency := currencies[key]
+		symbol := CurrencySymbol(key)
 		if cnt%percol == 0 {
 			cnt = 0
 		}
@ -178,7 +179,8 @@ func (ct *Cointop) UpdateConvertMenu() {
 			key = ct.colorscheme.Menu(key)
 			currency = ct.colorscheme.MenuLabel(currency)
 		}
-		item := fmt.Sprintf(" [ %1s ] %4s %-34s", shortcut, key, currency)
+
+		item := fmt.Sprintf(" [ %1s ] %4s %-36s", shortcut, key, fmt.Sprintf("%s %s", currency, symbol))
 		cols[cnt] = append(cols[cnt], item)
 		cnt = cnt + 1
 	}
@ -231,12 +233,7 @@ func (ct *Cointop) SetCurrencyConverstionFn(convert string) func() error {
 // CurrencySymbol returns the symbol for the currency conversion
 func (ct *Cointop) CurrencySymbol() string {
 	ct.debuglog("currencySymbol()")
-	symbol, ok := CurrencySymbolMap[strings.ToUpper(ct.State.currencyConversion)]
-	if ok {
-		return symbol
-	}
-
-	return "$"
+	return CurrencySymbol(ct.State.currencyConversion)
 }

 // ShowConvertMenu shows the convert menu view
@ -284,5 +281,5 @@ func CurrencySymbol(currency string) string {
 		return symbol
 	}

-	return "$"
+	return "?"
 }
--- a/go.mod
+++ b/go.mod
@ -4,37 +4,25 @@ require (
 	github.com/BurntSushi/toml v0.3.1
 	github.com/anaskhan96/soup v1.1.1 // indirect
 	github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be // indirect
-	github.com/coreos/go-etcd v2.0.0+incompatible // indirect
-	github.com/cpuguy83/go-md2man v1.0.10 // indirect
 	github.com/creack/pty v1.1.11
 	github.com/fatih/color v1.7.0
-	github.com/fsnotify/fsnotify v1.4.9 // indirect
-	github.com/gizak/termui v2.3.0+incompatible // indirect
 	github.com/gliderlabs/ssh v0.3.0
-	github.com/google/pprof v0.0.0-20190502144155-8358a9778bd1 // indirect
-	github.com/gopherjs/gopherjs v0.0.0-20200217142428-fce0ec30dd00 // indirect
 	github.com/maruel/panicparse v1.1.2-0.20180806203336-f20d4c4d746f
 	github.com/mattn/go-colorable v0.1.1 // indirect
 	github.com/mattn/go-isatty v0.0.8 // indirect
 	github.com/mattn/go-runewidth v0.0.7
-	github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b // indirect
 	github.com/miguelmota/go-coinmarketcap v0.1.5
 	github.com/miguelmota/gocui v0.4.2
 	github.com/miguelmota/termbox-go v0.0.0-20191229070316-58d4fcbce2a7
 	github.com/mitchellh/go-wordwrap v1.0.0
-	github.com/neelance/astrewrite v0.0.0-20160511093645-99348263ae86 // indirect
-	github.com/neelance/sourcemap v0.0.0-20200213170602-2833bce08e4c // indirect
 	github.com/patrickmn/go-cache v2.1.0+incompatible
-	github.com/shurcooL/httpfs v0.0.0-20190707220628-8d4bc4ba7749 // indirect
-	github.com/sirupsen/logrus v1.4.1 // indirect
 	github.com/spf13/cobra v1.0.0
 	github.com/spf13/pflag v1.0.5 // indirect
 	github.com/tomnomnom/xtermcolor v0.0.0-20160428124646-b78803f00a7e
-	github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8 // indirect
 	golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de
+	golang.org/x/net v0.0.0-20200625001655-4c5254603344 // indirect
 	golang.org/x/sys v0.0.0-20200728102440-3e129f6d46b1 // indirect
 	golang.org/x/text v0.3.2
-	golang.org/x/tools v0.0.0-20200728235236-e8769ccb4337 // indirect
 )

 go 1.13
--- a/go.sum
+++ b/go.sum
@ -16,14 +16,13 @@ github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghf
 github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
 github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk=
 github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
-github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk=
 github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
 github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
 github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
-github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE=
 github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
 github.com/creack/pty v1.1.11 h1:07n33Z8lZxZ2qwegKbObQohDhXDQxiMMz1NOUGYlesw=
 github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
 github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no=
@ -31,10 +30,7 @@ github.com/fatih/color v1.7.0 h1:DkWD4oS2D8LGGgTQ6IvwJJXSL5Vp2ffcQg58nFV38Ys=
 github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
 github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I=
 github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
-github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
-github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
 github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
-github.com/gizak/termui v2.3.0+incompatible/go.mod h1:PkJoWUt/zacQKysNfQtcw1RW+eK2SxkieVBtl+4ovLA=
 github.com/gliderlabs/ssh v0.3.0 h1:7GcKy4erEljCE/QeQ2jTVpu+3f3zkpZOxOJjFYkMqYU=
 github.com/gliderlabs/ssh v0.3.0/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0=
 github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
@ -50,9 +46,6 @@ github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5y
 github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
 github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
-github.com/google/pprof v0.0.0-20190502144155-8358a9778bd1/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
-github.com/gopherjs/gopherjs v0.0.0-20200217142428-fce0ec30dd00 h1:l5lAOZEym3oK3SQ2HBHWsJUfbNBiTXJDeW2QDxw9AQ0=
-github.com/gopherjs/gopherjs v0.0.0-20200217142428-fce0ec30dd00/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
 github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ=
 github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs=
 github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk=
@ -81,7 +74,6 @@ github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hd
 github.com/mattn/go-runewidth v0.0.7 h1:Ei8KR0497xHyKJPAv59M1dkC+rOZCMBJ+t3fZ+twI54=
 github.com/mattn/go-runewidth v0.0.7/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
 github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
-github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE=
 github.com/miguelmota/go-coinmarketcap v0.1.5 h1:NwAqQG+ClGNsYlReM06ERK8CqEkW1tF8BQsTLNp6TyU=
 github.com/miguelmota/go-coinmarketcap v0.1.5/go.mod h1:Jdv/kqtKclIElmoNAZMMJn0DSQv+j7p/H1te/GGnxhA=
 github.com/miguelmota/gocui v0.4.2 h1:nMYnYn3RjV7FlWFcidQa9eAkX3kT7XMI6yJMxEkAz6s=
@ -93,10 +85,6 @@ github.com/mitchellh/go-wordwrap v1.0.0 h1:6GlHJ/LTGMrIJbwgdqdl2eEH8o+Exx/0m8ir9
 github.com/mitchellh/go-wordwrap v1.0.0/go.mod h1:ZXFpozHsX6DPmq2I0TCekCxypsnAUbP2oI0UX1GXzOo=
 github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
 github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
-github.com/neelance/astrewrite v0.0.0-20160511093645-99348263ae86 h1:D6paGObi5Wud7xg83MaEFyjxQB1W5bz5d0IFppr+ymk=
-github.com/neelance/astrewrite v0.0.0-20160511093645-99348263ae86/go.mod h1:kHJEU3ofeGjhHklVoIGuVj85JJwZ6kWPaJwCIxgnFmo=
-github.com/neelance/sourcemap v0.0.0-20200213170602-2833bce08e4c h1:bY6ktFuJkt+ZXkX0RChQch2FtHpWQLVS8Qo1YasiIVk=
-github.com/neelance/sourcemap v0.0.0-20200213170602-2833bce08e4c/go.mod h1:Qr6/a/Q4r9LP1IltGz7tA7iOK1WonHEYhu1HRBA7ZiM=
 github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
 github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
 github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
@ -113,19 +101,13 @@ github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R
 github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
 github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU=
 github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
-github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
 github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
-github.com/shurcooL/httpfs v0.0.0-20190707220628-8d4bc4ba7749 h1:bUGsEnyNbVPw06Bs80sCeARAlK8lhwqGyi6UT8ymuGk=
-github.com/shurcooL/httpfs v0.0.0-20190707220628-8d4bc4ba7749/go.mod h1:ZY1cvUeJuFPAdZ/B6v7RHavJWZn2YPVFQ1OSXhCGOkg=
 github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
 github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
-github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMBDgk/93Q=
 github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=
 github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
 github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
 github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
-github.com/spf13/cobra v0.0.5 h1:f0B+LkLX6DtmRH1isoNA9VTtNUK9K8xYd28JNNfOv/s=
-github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU=
 github.com/spf13/cobra v1.0.0 h1:6m/oheQuQ13N9ks4hubMG6BnvwOeaJrqSPLahSnczz8=
 github.com/spf13/cobra v1.0.0/go.mod h1:/6GTrnGXV9HjY+aR4k0oJ5tcvakLuG6EuKReYlHNrgE=
 github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
@ -133,7 +115,6 @@ github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg=
 github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
 github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
 github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
-github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s=
 github.com/spf13/viper v1.4.0/go.mod h1:PTJ7Z/lr49W6bUbkmS1V3by4uWynFiR9p7+dSq/yZzE=
 github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
@ -141,26 +122,19 @@ github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1
 github.com/tomnomnom/xtermcolor v0.0.0-20160428124646-b78803f00a7e h1:Ee+VZw13r9NTOMnwTPs6O5KZ0MJU54hsxu9FpZ4pQ10=
 github.com/tomnomnom/xtermcolor v0.0.0-20160428124646-b78803f00a7e/go.mod h1:fSIW/szJHsRts/4U8wlMPhs+YqJC+7NYR+Qqb1uJVpA=
 github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc=
-github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
 github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
 github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
-github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
 go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
 go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
 go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
 golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
-golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
-golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4 h1:HuIa8hRrWRSrqYzx1qI49NNxhdi2PrY7gxVSq1JjLDc=
-golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
-golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de h1:ikNHVSjEfnvz6sxdSPCaPt572qowuyMDMJLLm3Db3ig=
 golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
 golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
-golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/net v0.0.0-20180215212450-dc948dff8834/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@ -168,28 +142,19 @@ golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73r
 golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
-golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20190628185345-da137c7871d7 h1:rTIdg5QFRR7XCaK4LCjBiPbx8j4DQRpdYMnGn/bJUEU=
-golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20200625001655-4c5254603344 h1:vGXIOMxbNfDTk/aXCmfdLgkrSV+Z2tcbze+pEc3v5W4=
 golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa h1:KIDDMLT1O0Nr7TSxp8xM5tJcdn8tgyAONntO829og1M=
-golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200728102440-3e129f6d46b1 h1:sIky/MyNRSHTrdxfsiUSS4WIAMvInbeXljJz+jDjeYE=
 golang.org/x/sys v0.0.0-20200728102440-3e129f6d46b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@ -201,14 +166,6 @@ golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGm
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
-golang.org/x/tools v0.0.0-20190701194522-38ae2c8f6412 h1:6DGEujAtONoB05AXgpHUUP6nGNB4kB9Zp9qenj9Q+tg=
-golang.org/x/tools v0.0.0-20190701194522-38ae2c8f6412/go.mod h1:jcCCGcm9btYwXyDqrUWc6MKQKKGJCWEQ3AfLSRIbEuI=
-golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.0.0-20200728235236-e8769ccb4337 h1:UouaHYVMLabq3zejCFgSA1hgrfVoH3t3yw81kjCVVG4=
-golang.org/x/tools v0.0.0-20200728235236-e8769ccb4337/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
-golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
 google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
 google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
--- a/vendor/github.com/spf13/cobra/.gitignore
+++ b/vendor/github.com/spf13/cobra/.gitignore
@ -32,7 +32,8 @@ Session.vim
 tags

 *.exe
-
 cobra.test
+bin

-.idea/*
+.idea/
+*.iml
--- a/vendor/github.com/spf13/cobra/.travis.yml
+++ b/vendor/github.com/spf13/cobra/.travis.yml
@ -3,29 +3,27 @@ language: go
 stages:
  - diff
  - test
+  - build

 go:
-  - 1.10.x
-  - 1.11.x
  - 1.12.x
+  - 1.13.x
  - tip

+before_install:
+  - go get -u github.com/kyoh86/richgo
+  - go get -u github.com/mitchellh/gox
+
 matrix:
  allow_failures:
    - go: tip
  include:
    - stage: diff
-      go: 1.12.x
-      script: diff -u <(echo -n) <(gofmt -d -s .)
+      go: 1.13.x
+      script: make fmt
+    - stage: build
+      go: 1.13.x
+      script: make cobra_generator

-before_install:
-  - mkdir -p bin
-  - curl -Lso bin/shellcheck https://github.com/caarlos0/shellcheck-docker/releases/download/v0.6.0/shellcheck
-  - chmod +x bin/shellcheck
-  - go get -u github.com/kyoh86/richgo
-script:
-  - PATH=$PATH:$PWD/bin richgo test -v ./...
-  - go build
-  - if [ -z $NOVET ]; then
-      diff -u <(echo -n) <(go vet . 2>&1 | grep -vE 'ExampleCommand|bash_completions.*Fprint');
-    fi
+script: 
+ - make test
--- a/vendor/github.com/spf13/cobra/Makefile
+++ b/vendor/github.com/spf13/cobra/Makefile
@ -0,0 +1,36 @@
+BIN="./bin"
+SRC=$(shell find . -name "*.go")
+
+ifeq (, $(shell which richgo))
+$(warning "could not find richgo in $(PATH), run: go get github.com/kyoh86/richgo")
+endif
+
+.PHONY: fmt vet test cobra_generator install_deps clean
+
+default: all
+
+all: fmt vet test cobra_generator	
+
+fmt:
+	$(info ******************** checking formatting ********************)
+	@test -z $(shell gofmt -l $(SRC)) || (gofmt -d $(SRC); exit 1)
+
+test: install_deps vet
+	$(info ******************** running tests ********************)
+	richgo test -v ./...
+
+cobra_generator: install_deps
+	$(info ******************** building generator ********************)
+	mkdir -p $(BIN)
+	make -C cobra all
+
+install_deps:
+	$(info ******************** downloading dependencies ********************)
+	go get -v ./...
+
+vet:
+	$(info ******************** vetting ********************)
+	go vet ./...
+
+clean:
+	rm -rf $(BIN)
--- a/vendor/github.com/spf13/cobra/README.md
+++ b/vendor/github.com/spf13/cobra/README.md
@ -24,11 +24,13 @@ Many of the most widely used Go projects are built using Cobra, such as:
 [Prototool](https://github.com/uber/prototool),
 [mattermost-server](https://github.com/mattermost/mattermost-server),
 [Gardener](https://github.com/gardener/gardenctl),
+[Linkerd](https://linkerd.io/),
+[Github CLI](https://github.com/cli/cli)
 etc.

 [![Build Status](https://travis-ci.org/spf13/cobra.svg "Travis CI status")](https://travis-ci.org/spf13/cobra)
-[![CircleCI status](https://circleci.com/gh/spf13/cobra.png?circle-token=:circle-token "CircleCI status")](https://circleci.com/gh/spf13/cobra)
 [![GoDoc](https://godoc.org/github.com/spf13/cobra?status.svg)](https://godoc.org/github.com/spf13/cobra)
+[![Go Report Card](https://goreportcard.com/badge/github.com/spf13/cobra)](https://goreportcard.com/report/github.com/spf13/cobra)

 # Table of Contents

@ -208,51 +210,78 @@ You will additionally define flags and handle configuration in your init() funct
 For example cmd/root.go:

 ```go
+package cmd
+
 import (
-  "fmt"
-  "os"
+	"fmt"
+	"os"

-  homedir "github.com/mitchellh/go-homedir"
-  "github.com/spf13/cobra"
-  "github.com/spf13/viper"
+	homedir "github.com/mitchellh/go-homedir"
+	"github.com/spf13/cobra"
+	"github.com/spf13/viper"
 )

-func init() {
-  cobra.OnInitialize(initConfig)
-  rootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file (default is $HOME/.cobra.yaml)")
-  rootCmd.PersistentFlags().StringVarP(&projectBase, "projectbase", "b", "", "base project directory eg. github.com/spf13/")
-  rootCmd.PersistentFlags().StringP("author", "a", "YOUR NAME", "Author name for copyright attribution")
-  rootCmd.PersistentFlags().StringVarP(&userLicense, "license", "l", "", "Name of license for the project (can provide `licensetext` in config)")
-  rootCmd.PersistentFlags().Bool("viper", true, "Use Viper for configuration")
-  viper.BindPFlag("author", rootCmd.PersistentFlags().Lookup("author"))
-  viper.BindPFlag("projectbase", rootCmd.PersistentFlags().Lookup("projectbase"))
-  viper.BindPFlag("useViper", rootCmd.PersistentFlags().Lookup("viper"))
-  viper.SetDefault("author", "NAME HERE <EMAIL ADDRESS>")
-  viper.SetDefault("license", "apache")
+var (
+	// Used for flags.
+	cfgFile     string
+	userLicense string
+
+	rootCmd = &cobra.Command{
+		Use:   "cobra",
+		Short: "A generator for Cobra based Applications",
+		Long: `Cobra is a CLI library for Go that empowers applications.
+This application is a tool to generate the needed files
+to quickly create a Cobra application.`,
+	}
+)
+
+// Execute executes the root command.
+func Execute() error {
+	return rootCmd.Execute()
 }

-func initConfig() {
-  // Don't forget to read config either from cfgFile or from home directory!
-  if cfgFile != "" {
-    // Use config file from the flag.
-    viper.SetConfigFile(cfgFile)
-  } else {
-    // Find home directory.
-    home, err := homedir.Dir()
-    if err != nil {
-      fmt.Println(err)
-      os.Exit(1)
-    }
+func init() {
+	cobra.OnInitialize(initConfig)
+
+	rootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file (default is $HOME/.cobra.yaml)")
+	rootCmd.PersistentFlags().StringP("author", "a", "YOUR NAME", "author name for copyright attribution")
+	rootCmd.PersistentFlags().StringVarP(&userLicense, "license", "l", "", "name of license for the project")
+	rootCmd.PersistentFlags().Bool("viper", true, "use Viper for configuration")
+	viper.BindPFlag("author", rootCmd.PersistentFlags().Lookup("author"))
+	viper.BindPFlag("useViper", rootCmd.PersistentFlags().Lookup("viper"))
+	viper.SetDefault("author", "NAME HERE <EMAIL ADDRESS>")
+	viper.SetDefault("license", "apache")
+
+	rootCmd.AddCommand(addCmd)
+	rootCmd.AddCommand(initCmd)
+}

-    // Search config in home directory with name ".cobra" (without extension).
-    viper.AddConfigPath(home)
-    viper.SetConfigName(".cobra")
-  }
+func er(msg interface{}) {
+	fmt.Println("Error:", msg)
+	os.Exit(1)
+}

-  if err := viper.ReadInConfig(); err != nil {
-    fmt.Println("Can't read config:", err)
-    os.Exit(1)
-  }
+func initConfig() {
+	if cfgFile != "" {
+		// Use config file from the flag.
+		viper.SetConfigFile(cfgFile)
+	} else {
+		// Find home directory.
+		home, err := homedir.Dir()
+		if err != nil {
+			er(err)
+		}
+
+		// Search config in home directory with name ".cobra" (without extension).
+		viper.AddConfigPath(home)
+		viper.SetConfigName(".cobra")
+	}
+
+	viper.AutomaticEnv()
+
+	if err := viper.ReadInConfig(); err == nil {
+		fmt.Println("Using config file:", viper.ConfigFileUsed())
+	}
 }
 ```

@ -459,7 +488,7 @@ For many years people have printed back to the screen.`,
 Echo works a lot like print, except it has a child command.`,
    Args: cobra.MinimumNArgs(1),
    Run: func(cmd *cobra.Command, args []string) {
-      fmt.Println("Print: " + strings.Join(args, " "))
+      fmt.Println("Echo: " + strings.Join(args, " "))
    },
  }

--- a/vendor/github.com/spf13/cobra/args.go
+++ b/vendor/github.com/spf13/cobra/args.go
@ -2,6 +2,7 @@ package cobra

 import (
 	"fmt"
+	"strings"
 )

 type PositionalArgs func(cmd *Command, args []string) error
@ -34,8 +35,15 @@ func NoArgs(cmd *Command, args []string) error {
 // OnlyValidArgs returns an error if any args are not in the list of ValidArgs.
 func OnlyValidArgs(cmd *Command, args []string) error {
 	if len(cmd.ValidArgs) > 0 {
+		// Remove any description that may be included in ValidArgs.
+		// A description is following a tab character.
+		var validArgs []string
+		for _, v := range cmd.ValidArgs {
+			validArgs = append(validArgs, strings.Split(v, "\t")[0])
+		}
+
 		for _, v := range args {
-			if !stringInSlice(v, cmd.ValidArgs) {
+			if !stringInSlice(v, validArgs) {
 				return fmt.Errorf("invalid argument %q for %q%s", v, cmd.CommandPath(), cmd.findSuggestions(args[0]))
 			}
 		}
--- a/vendor/github.com/spf13/cobra/bash_completions.go
+++ b/vendor/github.com/spf13/cobra/bash_completions.go
@ -58,9 +58,71 @@ __%[1]s_contains_word()
    return 1
 }

+__%[1]s_handle_go_custom_completion()
+{
+    __%[1]s_debug "${FUNCNAME[0]}: cur is ${cur}, words[*] is ${words[*]}, #words[@] is ${#words[@]}"
+
+    local out requestComp lastParam lastChar comp directive args
+
+    # Prepare the command to request completions for the program.
+    # Calling ${words[0]} instead of directly %[1]s allows to handle aliases
+    args=("${words[@]:1}")
+    requestComp="${words[0]} %[2]s ${args[*]}"
+
+    lastParam=${words[$((${#words[@]}-1))]}
+    lastChar=${lastParam:$((${#lastParam}-1)):1}
+    __%[1]s_debug "${FUNCNAME[0]}: lastParam ${lastParam}, lastChar ${lastChar}"
+
+    if [ -z "${cur}" ] && [ "${lastChar}" != "=" ]; then
+        # If the last parameter is complete (there is a space following it)
+        # We add an extra empty parameter so we can indicate this to the go method.
+        __%[1]s_debug "${FUNCNAME[0]}: Adding extra empty parameter"
+        requestComp="${requestComp} \"\""
+    fi
+
+    __%[1]s_debug "${FUNCNAME[0]}: calling ${requestComp}"
+    # Use eval to handle any environment variables and such
+    out=$(eval "${requestComp}" 2>/dev/null)
+
+    # Extract the directive integer at the very end of the output following a colon (:)
+    directive=${out##*:}
+    # Remove the directive
+    out=${out%%:*}
+    if [ "${directive}" = "${out}" ]; then
+        # There is not directive specified
+        directive=0
+    fi
+    __%[1]s_debug "${FUNCNAME[0]}: the completion directive is: ${directive}"
+    __%[1]s_debug "${FUNCNAME[0]}: the completions are: ${out[*]}"
+
+    if [ $((directive & %[3]d)) -ne 0 ]; then
+        # Error code.  No completion.
+        __%[1]s_debug "${FUNCNAME[0]}: received error from custom completion go code"
+        return
+    else
+        if [ $((directive & %[4]d)) -ne 0 ]; then
+            if [[ $(type -t compopt) = "builtin" ]]; then
+                __%[1]s_debug "${FUNCNAME[0]}: activating no space"
+                compopt -o nospace
+            fi
+        fi
+        if [ $((directive & %[5]d)) -ne 0 ]; then
+            if [[ $(type -t compopt) = "builtin" ]]; then
+                __%[1]s_debug "${FUNCNAME[0]}: activating no file completion"
+                compopt +o default
+            fi
+        fi
+
+        while IFS='' read -r comp; do
+            COMPREPLY+=("$comp")
+        done < <(compgen -W "${out[*]}" -- "$cur")
+    fi
+}
+
 __%[1]s_handle_reply()
 {
    __%[1]s_debug "${FUNCNAME[0]}"
+    local comp
    case $cur in
        -*)
            if [[ $(type -t compopt) = "builtin" ]]; then
@ -72,7 +134,9 @@ __%[1]s_handle_reply()
            else
                allflags=("${flags[*]} ${two_word_flags[*]}")
            fi
-            COMPREPLY=( $(compgen -W "${allflags[*]}" -- "$cur") )
+            while IFS='' read -r comp; do
+                COMPREPLY+=("$comp")
+            done < <(compgen -W "${allflags[*]}" -- "$cur")
            if [[ $(type -t compopt) = "builtin" ]]; then
                [[ "${COMPREPLY[0]}" == *= ]] || compopt +o nospace
            fi
@ -118,14 +182,22 @@ __%[1]s_handle_reply()
    completions=("${commands[@]}")
    if [[ ${#must_have_one_noun[@]} -ne 0 ]]; then
        completions=("${must_have_one_noun[@]}")
+    elif [[ -n "${has_completion_function}" ]]; then
+        # if a go completion function is provided, defer to that function
+        completions=()
+        __%[1]s_handle_go_custom_completion
    fi
    if [[ ${#must_have_one_flag[@]} -ne 0 ]]; then
        completions+=("${must_have_one_flag[@]}")
    fi
-    COMPREPLY=( $(compgen -W "${completions[*]}" -- "$cur") )
+    while IFS='' read -r comp; do
+        COMPREPLY+=("$comp")
+    done < <(compgen -W "${completions[*]}" -- "$cur")

    if [[ ${#COMPREPLY[@]} -eq 0 && ${#noun_aliases[@]} -gt 0 && ${#must_have_one_noun[@]} -ne 0 ]]; then
-        COMPREPLY=( $(compgen -W "${noun_aliases[*]}" -- "$cur") )
+        while IFS='' read -r comp; do
+            COMPREPLY+=("$comp")
+        done < <(compgen -W "${noun_aliases[*]}" -- "$cur")
    fi

    if [[ ${#COMPREPLY[@]} -eq 0 ]]; then
@ -160,7 +232,7 @@ __%[1]s_handle_filename_extension_flag()
 __%[1]s_handle_subdirs_in_dir_flag()
 {
    local dir="$1"
-    pushd "${dir}" >/dev/null 2>&1 && _filedir -d && popd >/dev/null 2>&1
+    pushd "${dir}" >/dev/null 2>&1 && _filedir -d && popd >/dev/null 2>&1 || return
 }

 __%[1]s_handle_flag()
@ -272,7 +344,7 @@ __%[1]s_handle_word()
    __%[1]s_handle_word
 }

-`, name))
+`, name, ShellCompNoDescRequestCmd, ShellCompDirectiveError, ShellCompDirectiveNoSpace, ShellCompDirectiveNoFileComp))
 }

 func writePostscript(buf *bytes.Buffer, name string) {
@ -297,6 +369,7 @@ func writePostscript(buf *bytes.Buffer, name string) {
    local commands=("%[1]s")
    local must_have_one_flag=()
    local must_have_one_noun=()
+    local has_completion_function
    local last_command
    local nouns=()

@ -397,7 +470,22 @@ func writeLocalNonPersistentFlag(buf *bytes.Buffer, flag *pflag.Flag) {
 	buf.WriteString(fmt.Sprintf(format, name))
 }

+// Setup annotations for go completions for registered flags
+func prepareCustomAnnotationsForFlags(cmd *Command) {
+	for flag := range flagCompletionFunctions {
+		// Make sure the completion script calls the __*_go_custom_completion function for
+		// every registered flag.  We need to do this here (and not when the flag was registered
+		// for completion) so that we can know the root command name for the prefix
+		// of __<prefix>_go_custom_completion
+		if flag.Annotations == nil {
+			flag.Annotations = map[string][]string{}
+		}
+		flag.Annotations[BashCompCustom] = []string{fmt.Sprintf("__%[1]s_handle_go_custom_completion", cmd.Root().Name())}
+	}
+}
+
 func writeFlags(buf *bytes.Buffer, cmd *Command) {
+	prepareCustomAnnotationsForFlags(cmd)
 	buf.WriteString(`    flags=()
    two_word_flags=()
    local_nonpersistent_flags=()
@ -460,8 +548,14 @@ func writeRequiredNouns(buf *bytes.Buffer, cmd *Command) {
 	buf.WriteString("    must_have_one_noun=()\n")
 	sort.Sort(sort.StringSlice(cmd.ValidArgs))
 	for _, value := range cmd.ValidArgs {
+		// Remove any description that may be included following a tab character.
+		// Descriptions are not supported by bash completion.
+		value = strings.Split(value, "\t")[0]
 		buf.WriteString(fmt.Sprintf("    must_have_one_noun+=(%q)\n", value))
 	}
+	if cmd.ValidArgsFunction != nil {
+		buf.WriteString("    has_completion_function=1\n")
+	}
 }

 func writeCmdAliases(buf *bytes.Buffer, cmd *Command) {
--- a/vendor/github.com/spf13/cobra/bash_completions.md
+++ b/vendor/github.com/spf13/cobra/bash_completions.md
@ -56,7 +56,149 @@ func main() {

 `out.sh` will get you completions of subcommands and flags. Copy it to `/etc/bash_completion.d/` as described [here](https://debian-administration.org/article/316/An_introduction_to_bash_completion_part_1) and reset your terminal to use autocompletion. If you make additional annotations to your code, you can get even more intelligent and flexible behavior.

-## Creating your own custom functions
+## Have the completions code complete your 'nouns'
+
+### Static completion of nouns
+
+This method allows you to provide a pre-defined list of completion choices for your nouns using the `validArgs` field.
+For example, if you want `kubectl get [tab][tab]` to show a list of valid "nouns" you have to set them. Simplified code from `kubectl get` looks like:
+
+```go
+validArgs []string = { "pod", "node", "service", "replicationcontroller" }
+
+cmd := &cobra.Command{
+	Use:     "get [(-o|--output=)json|yaml|template|...] (RESOURCE [NAME] | RESOURCE/NAME ...)",
+	Short:   "Display one or many resources",
+	Long:    get_long,
+	Example: get_example,
+	Run: func(cmd *cobra.Command, args []string) {
+		err := RunGet(f, out, cmd, args)
+		util.CheckErr(err)
+	},
+	ValidArgs: validArgs,
+}
+```
+
+Notice we put the "ValidArgs" on the "get" subcommand. Doing so will give results like
+
+```bash
+# kubectl get [tab][tab]
+node                 pod                    replicationcontroller  service
+```
+
+### Plural form and shortcuts for nouns
+
+If your nouns have a number of aliases, you can define them alongside `ValidArgs` using `ArgAliases`:
+
+```go
+argAliases []string = { "pods", "nodes", "services", "svc", "replicationcontrollers", "rc" }
+
+cmd := &cobra.Command{
+    ...
+	ValidArgs:  validArgs,
+	ArgAliases: argAliases
+}
+```
+
+The aliases are not shown to the user on tab completion, but they are accepted as valid nouns by
+the completion algorithm if entered manually, e.g. in:
+
+```bash
+# kubectl get rc [tab][tab]
+backend        frontend       database 
+```
+
+Note that without declaring `rc` as an alias, the completion algorithm would show the list of nouns
+in this example again instead of the replication controllers.
+
+### Dynamic completion of nouns
+
+In some cases it is not possible to provide a list of possible completions in advance.  Instead, the list of completions must be determined at execution-time.  Cobra provides two ways of defining such dynamic completion of nouns. Note that both these methods can be used along-side each other as long as they are not both used for the same command.
+
+**Note**: *Custom Completions written in Go* will automatically work for other shell-completion scripts (e.g., Fish shell), while *Custom Completions written in Bash* will only work for Bash shell-completion.  It is therefore recommended to use *Custom Completions written in Go*.
+
+#### 1. Custom completions of nouns written in Go
+
+In a similar fashion as for static completions, you can use the `ValidArgsFunction` field to provide a Go function that Cobra will execute when it needs the list of completion choices for the nouns of a command.  Note that either `ValidArgs` or `ValidArgsFunction` can be used for a single cobra command, but not both.
+Simplified code from `helm status` looks like:
+
+```go
+cmd := &cobra.Command{
+	Use:   "status RELEASE_NAME",
+	Short: "Display the status of the named release",
+	Long:  status_long,
+	RunE: func(cmd *cobra.Command, args []string) {
+		RunGet(args[0])
+	},
+	ValidArgsFunction: func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
+		if len(args) != 0 {
+			return nil, cobra.ShellCompDirectiveNoFileComp
+		}
+		return getReleasesFromCluster(toComplete), cobra.ShellCompDirectiveNoFileComp
+	},
+}
+```
+Where `getReleasesFromCluster()` is a Go function that obtains the list of current Helm releases running on the Kubernetes cluster.
+Notice we put the `ValidArgsFunction` on the `status` subcommand. Let's assume the Helm releases on the cluster are: `harbor`, `notary`, `rook` and `thanos` then this dynamic completion will give results like
+
+```bash
+# helm status [tab][tab]
+harbor notary rook thanos
+```
+You may have noticed the use of `cobra.ShellCompDirective`.  These directives are bit fields allowing to control some shell completion behaviors for your particular completion.  You can combine them with the bit-or operator such as `cobra.ShellCompDirectiveNoSpace | cobra.ShellCompDirectiveNoFileComp`
+```go
+// Indicates an error occurred and completions should be ignored.
+ShellCompDirectiveError
+// Indicates that the shell should not add a space after the completion,
+// even if there is a single completion provided.
+ShellCompDirectiveNoSpace
+// Indicates that the shell should not provide file completion even when
+// no completion is provided.
+// This currently does not work for zsh or bash < 4
+ShellCompDirectiveNoFileComp
+// Indicates that the shell will perform its default behavior after completions
+// have been provided (this implies !ShellCompDirectiveNoSpace && !ShellCompDirectiveNoFileComp).
+ShellCompDirectiveDefault
+```
+
+When using the `ValidArgsFunction`, Cobra will call your registered function after having parsed all flags and arguments provided in the command-line.  You therefore don't need to do this parsing yourself.  For example, when a user calls `helm status --namespace my-rook-ns [tab][tab]`, Cobra will call your registered `ValidArgsFunction` after having parsed the `--namespace` flag, as it would have done when calling the `RunE` function.
+
+##### Debugging
+
+Cobra achieves dynamic completions written in Go through the use of a hidden command called by the completion script.  To debug your Go completion code, you can call this hidden command directly:
+```bash
+# helm __complete status har<ENTER>
+harbor
+:4
+Completion ended with directive: ShellCompDirectiveNoFileComp # This is on stderr
+```
+***Important:*** If the noun to complete is empty, you must pass an empty parameter to the `__complete` command:
+```bash
+# helm __complete status ""<ENTER>
+harbor
+notary
+rook
+thanos
+:4
+Completion ended with directive: ShellCompDirectiveNoFileComp # This is on stderr
+```
+Calling the `__complete` command directly allows you to run the Go debugger to troubleshoot your code.  You can also add printouts to your code; Cobra provides the following functions to use for printouts in Go completion code:
+```go
+// Prints to the completion script debug file (if BASH_COMP_DEBUG_FILE
+// is set to a file path) and optionally prints to stderr.
+cobra.CompDebug(msg string, printToStdErr bool) {
+cobra.CompDebugln(msg string, printToStdErr bool)
+
+// Prints to the completion script debug file (if BASH_COMP_DEBUG_FILE
+// is set to a file path) and to stderr.
+cobra.CompError(msg string)
+cobra.CompErrorln(msg string)
+```
+***Important:*** You should **not** leave traces that print to stdout in your completion code as they will be interpreted as completion choices by the completion script.  Instead, use the cobra-provided debugging traces functions mentioned above.
+
+#### 2. Custom completions of nouns written in Bash
+
+This method allows you to inject bash functions into the completion script.  Those bash functions are responsible for providing the completion choices for your own completions.

 Some more actual code that works in kubernetes:

@ -111,58 +253,6 @@ Find more information at https://github.com/GoogleCloudPlatform/kubernetes.`,

 The `BashCompletionFunction` option is really only valid/useful on the root command. Doing the above will cause `__kubectl_custom_func()` (`__<command-use>_custom_func()`) to be called when the built in processor was unable to find a solution. In the case of kubernetes a valid command might look something like `kubectl get pod [mypod]`. If you type `kubectl get pod [tab][tab]` the `__kubectl_customc_func()` will run because the cobra.Command only understood "kubectl" and "get." `__kubectl_custom_func()` will see that the cobra.Command is "kubectl_get" and will thus call another helper `__kubectl_get_resource()`.  `__kubectl_get_resource` will look at the 'nouns' collected. In our example the only noun will be `pod`.  So it will call `__kubectl_parse_get pod`.  `__kubectl_parse_get` will actually call out to kubernetes and get any pods.  It will then set `COMPREPLY` to valid pods!

-## Have the completions code complete your 'nouns'
-
-In the above example "pod" was assumed to already be typed. But if you want `kubectl get [tab][tab]` to show a list of valid "nouns" you have to set them. Simplified code from `kubectl get` looks like:
-
-```go
-validArgs []string = { "pod", "node", "service", "replicationcontroller" }
-
-cmd := &cobra.Command{
-	Use:     "get [(-o|--output=)json|yaml|template|...] (RESOURCE [NAME] | RESOURCE/NAME ...)",
-	Short:   "Display one or many resources",
-	Long:    get_long,
-	Example: get_example,
-	Run: func(cmd *cobra.Command, args []string) {
-		err := RunGet(f, out, cmd, args)
-		util.CheckErr(err)
-	},
-	ValidArgs: validArgs,
-}
-```
-
-Notice we put the "ValidArgs" on the "get" subcommand. Doing so will give results like
-
-```bash
-# kubectl get [tab][tab]
-node                 pod                    replicationcontroller  service
-```
-
-## Plural form and shortcuts for nouns
-
-If your nouns have a number of aliases, you can define them alongside `ValidArgs` using `ArgAliases`:
-
-```go
-argAliases []string = { "pods", "nodes", "services", "svc", "replicationcontrollers", "rc" }
-
-cmd := &cobra.Command{
-    ...
-	ValidArgs:  validArgs,
-	ArgAliases: argAliases
-}
-```
-
-The aliases are not shown to the user on tab completion, but they are accepted as valid nouns by
-the completion algorithm if entered manually, e.g. in:
-
-```bash
-# kubectl get rc [tab][tab]
-backend        frontend       database 
-```
-
-Note that without declaring `rc` as an alias, the completion algorithm would show the list of nouns
-in this example again instead of the replication controllers.
-
 ## Mark flags as required

 Most of the time completions will only show subcommands. But if a flag is required to make a subcommand work, you probably want it to show up when the user types [tab][tab].  Marking a flag as 'Required' is incredibly easy.
@ -211,8 +301,45 @@ So while there are many other files in the CWD it only shows me subdirs and thos

 # Specify custom flag completion

-Similar to the filename completion and filtering using cobra.BashCompFilenameExt, you can specify
-a custom flag completion function with cobra.BashCompCustom:
+As for nouns, Cobra provides two ways of defining dynamic completion of flags.  Note that both these methods can be used along-side each other as long as they are not both used for the same flag.
+
+**Note**: *Custom Completions written in Go* will automatically work for other shell-completion scripts (e.g., Fish shell), while *Custom Completions written in Bash* will only work for Bash shell-completion.  It is therefore recommended to use *Custom Completions written in Go*.
+
+## 1. Custom completions of flags written in Go
+
+To provide a Go function that Cobra will execute when it needs the list of completion choices for a flag, you must register the function in the following manner:
+
+```go
+flagName := "output"
+cmd.RegisterFlagCompletionFunc(flagName, func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
+	return []string{"json", "table", "yaml"}, cobra.ShellCompDirectiveDefault
+})
+```
+Notice that calling `RegisterFlagCompletionFunc()` is done through the `command` with which the flag is associated.  In our example this dynamic completion will give results like so:
+
+```bash
+# helm status --output [tab][tab]
+json table yaml
+```
+
+### Debugging
+
+You can also easily debug your Go completion code for flags:
+```bash
+# helm __complete status --output ""
+json
+table
+yaml
+:4
+Completion ended with directive: ShellCompDirectiveNoFileComp # This is on stderr
+```
+***Important:*** You should **not** leave traces that print to stdout in your completion code as they will be interpreted as completion choices by the completion script.  Instead, use the cobra-provided debugging traces functions mentioned in the above section.
+
+## 2. Custom completions of flags written in Bash
+
+Alternatively, you can use bash code for flag custom completion. Similar to the filename
+completion and filtering using `cobra.BashCompFilenameExt`, you can specify
+a custom flag completion bash function with `cobra.BashCompCustom`:

 ```go
 	annotation := make(map[string][]string)
@ -226,7 +353,7 @@ a custom flag completion function with cobra.BashCompCustom:
 	cmd.Flags().AddFlag(flag)
 ```

-In addition add the `__handle_namespace_flag` implementation in the `BashCompletionFunction`
+In addition add the `__kubectl_get_namespaces` implementation in the `BashCompletionFunction`
 value, e.g.:

 ```bash
--- a/vendor/github.com/spf13/cobra/cobra.go
+++ b/vendor/github.com/spf13/cobra/cobra.go
@ -52,7 +52,7 @@ var EnableCommandSorting = true
 // if the CLI is started from explorer.exe.
 // To disable the mousetrap, just set this variable to blank string ("").
 // Works only on Microsoft Windows.
-var MousetrapHelpText string = `This is a command line tool.
+var MousetrapHelpText = `This is a command line tool.

 You need to open cmd.exe and run it from there.
 `
@ -61,7 +61,7 @@ You need to open cmd.exe and run it from there.
 // if the CLI is started from explorer.exe. Set to 0 to wait for the return key to be pressed.
 // To disable the mousetrap, just set MousetrapHelpText to blank string ("").
 // Works only on Microsoft Windows.
-var MousetrapDisplayDuration time.Duration = 5 * time.Second
+var MousetrapDisplayDuration = 5 * time.Second

 // AddTemplateFunc adds a template function that's available to Usage and Help
 // template generation.
--- a/vendor/github.com/spf13/cobra/command.go
+++ b/vendor/github.com/spf13/cobra/command.go
@ -17,6 +17,7 @@ package cobra

 import (
 	"bytes"
+	"context"
 	"fmt"
 	"io"
 	"os"
@ -56,6 +57,10 @@ type Command struct {

 	// ValidArgs is list of all valid non-flag arguments that are accepted in bash completions
 	ValidArgs []string
+	// ValidArgsFunction is an optional function that provides valid non-flag arguments for bash completion.
+	// It is a dynamic version of using ValidArgs.
+	// Only one of ValidArgs and ValidArgsFunction can be used for a command.
+	ValidArgsFunction func(cmd *Command, args []string, toComplete string) ([]string, ShellCompDirective)

 	// Expected arguments
 	Args PositionalArgs
@ -80,7 +85,8 @@ type Command struct {

 	// Version defines the version for this command. If this value is non-empty and the command does not
 	// define a "version" flag, a "version" boolean flag will be added to the command and, if specified,
-	// will print content of the "Version" variable.
+	// will print content of the "Version" variable. A shorthand "v" flag will also be added if the
+	// command does not define one.
 	Version string

 	// The *Run functions are executed in the following order:
@ -140,9 +146,11 @@ type Command struct {
 	// TraverseChildren parses flags on all parents before executing child command.
 	TraverseChildren bool

-	//FParseErrWhitelist flag parse errors to be ignored
+	// FParseErrWhitelist flag parse errors to be ignored
 	FParseErrWhitelist FParseErrWhitelist

+	ctx context.Context
+
 	// commands is the list of commands supported by this program.
 	commands []*Command
 	// parent is a parent command for this command.
@ -202,6 +210,12 @@ type Command struct {
 	errWriter io.Writer
 }

+// Context returns underlying command context. If command wasn't
+// executed with ExecuteContext Context returns Background context.
+func (c *Command) Context() context.Context {
+	return c.ctx
+}
+
 // SetArgs sets arguments for the command. It is set to os.Args[1:] by default, if desired, can be overridden
 // particularly useful when testing.
 func (c *Command) SetArgs(a []string) {
@ -228,7 +242,7 @@ func (c *Command) SetErr(newErr io.Writer) {
 	c.errWriter = newErr
 }

-// SetOut sets the source for input data
+// SetIn sets the source for input data
 // If newIn is nil, os.Stdin is used.
 func (c *Command) SetIn(newIn io.Reader) {
 	c.inReader = newIn
@ -297,7 +311,7 @@ func (c *Command) ErrOrStderr() io.Writer {
 	return c.getErr(os.Stderr)
 }

-// ErrOrStderr returns output to stderr
+// InOrStdin returns input to stdin
 func (c *Command) InOrStdin() io.Reader {
 	return c.getIn(os.Stdin)
 }
@ -369,6 +383,8 @@ func (c *Command) HelpFunc() func(*Command, []string) {
 	}
 	return func(c *Command, a []string) {
 		c.mergePersistentFlags()
+		// The help should be sent to stdout
+		// See https://github.com/spf13/cobra/issues/1002
 		err := tmpl(c.OutOrStdout(), c.HelpTemplate(), c)
 		if err != nil {
 			c.Println(err)
@ -857,6 +873,13 @@ func (c *Command) preRun() {
 	}
 }

+// ExecuteContext is the same as Execute(), but sets the ctx on the command.
+// Retrieve ctx by calling cmd.Context() inside your *Run lifecycle functions.
+func (c *Command) ExecuteContext(ctx context.Context) error {
+	c.ctx = ctx
+	return c.Execute()
+}
+
 // Execute uses the args (os.Args[1:] by default)
 // and run through the command tree finding appropriate matches
 // for commands and then corresponding flags.
@ -867,6 +890,10 @@ func (c *Command) Execute() error {

 // ExecuteC executes the command.
 func (c *Command) ExecuteC() (cmd *Command, err error) {
+	if c.ctx == nil {
+		c.ctx = context.Background()
+	}
+
 	// Regardless of what command execute is called on, run on Root only
 	if c.HasParent() {
 		return c.Root().ExecuteC()
@ -888,6 +915,9 @@ func (c *Command) ExecuteC() (cmd *Command, err error) {
 		args = os.Args[1:]
 	}

+	// initialize the hidden command to be used for bash completion
+	c.initCompleteCmd(args)
+
 	var flags []string
 	if c.TraverseChildren {
 		cmd, flags, err = c.Traverse(args)
@ -911,6 +941,12 @@ func (c *Command) ExecuteC() (cmd *Command, err error) {
 		cmd.commandCalledAs.name = cmd.Name()
 	}

+	// We have to pass global context to children command
+	// if context is present on the parent command.
+	if cmd.ctx == nil {
+		cmd.ctx = c.ctx
+	}
+
 	err = cmd.execute(flags)
 	if err != nil {
 		// Always show help if requested, even if SilenceErrors is in
@ -994,7 +1030,11 @@ func (c *Command) InitDefaultVersionFlag() {
 		} else {
 			usage += c.Name()
 		}
-		c.Flags().Bool("version", false, usage)
+		if c.Flags().ShorthandLookup("v") == nil {
+			c.Flags().BoolP("version", "v", false, usage)
+		} else {
+			c.Flags().Bool("version", false, usage)
+		}
 	}
 }

@ -1547,7 +1587,7 @@ func (c *Command) ParseFlags(args []string) error {
 	beforeErrorBufLen := c.flagErrorBuf.Len()
 	c.mergePersistentFlags()

-	//do it here after merging all flags and just before parse
+	// do it here after merging all flags and just before parse
 	c.Flags().ParseErrorsWhitelist = flag.ParseErrorsWhitelist(c.FParseErrWhitelist)

 	err := c.Flags().Parse(args)
--- a/vendor/github.com/spf13/cobra/custom_completions.go
+++ b/vendor/github.com/spf13/cobra/custom_completions.go
@ -0,0 +1,384 @@
+package cobra
+
+import (
+	"errors"
+	"fmt"
+	"os"
+	"strings"
+
+	"github.com/spf13/pflag"
+)
+
+const (
+	// ShellCompRequestCmd is the name of the hidden command that is used to request
+	// completion results from the program.  It is used by the shell completion scripts.
+	ShellCompRequestCmd = "__complete"
+	// ShellCompNoDescRequestCmd is the name of the hidden command that is used to request
+	// completion results without their description.  It is used by the shell completion scripts.
+	ShellCompNoDescRequestCmd = "__completeNoDesc"
+)
+
+// Global map of flag completion functions.
+var flagCompletionFunctions = map[*pflag.Flag]func(cmd *Command, args []string, toComplete string) ([]string, ShellCompDirective){}
+
+// ShellCompDirective is a bit map representing the different behaviors the shell
+// can be instructed to have once completions have been provided.
+type ShellCompDirective int
+
+const (
+	// ShellCompDirectiveError indicates an error occurred and completions should be ignored.
+	ShellCompDirectiveError ShellCompDirective = 1 << iota
+
+	// ShellCompDirectiveNoSpace indicates that the shell should not add a space
+	// after the completion even if there is a single completion provided.
+	ShellCompDirectiveNoSpace
+
+	// ShellCompDirectiveNoFileComp indicates that the shell should not provide
+	// file completion even when no completion is provided.
+	// This currently does not work for zsh or bash < 4
+	ShellCompDirectiveNoFileComp
+
+	// ShellCompDirectiveDefault indicates to let the shell perform its default
+	// behavior after completions have been provided.
+	ShellCompDirectiveDefault ShellCompDirective = 0
+)
+
+// RegisterFlagCompletionFunc should be called to register a function to provide completion for a flag.
+func (c *Command) RegisterFlagCompletionFunc(flagName string, f func(cmd *Command, args []string, toComplete string) ([]string, ShellCompDirective)) error {
+	flag := c.Flag(flagName)
+	if flag == nil {
+		return fmt.Errorf("RegisterFlagCompletionFunc: flag '%s' does not exist", flagName)
+	}
+	if _, exists := flagCompletionFunctions[flag]; exists {
+		return fmt.Errorf("RegisterFlagCompletionFunc: flag '%s' already registered", flagName)
+	}
+	flagCompletionFunctions[flag] = f
+	return nil
+}
+
+// Returns a string listing the different directive enabled in the specified parameter
+func (d ShellCompDirective) string() string {
+	var directives []string
+	if d&ShellCompDirectiveError != 0 {
+		directives = append(directives, "ShellCompDirectiveError")
+	}
+	if d&ShellCompDirectiveNoSpace != 0 {
+		directives = append(directives, "ShellCompDirectiveNoSpace")
+	}
+	if d&ShellCompDirectiveNoFileComp != 0 {
+		directives = append(directives, "ShellCompDirectiveNoFileComp")
+	}
+	if len(directives) == 0 {
+		directives = append(directives, "ShellCompDirectiveDefault")
+	}
+
+	if d > ShellCompDirectiveError+ShellCompDirectiveNoSpace+ShellCompDirectiveNoFileComp {
+		return fmt.Sprintf("ERROR: unexpected ShellCompDirective value: %d", d)
+	}
+	return strings.Join(directives, ", ")
+}
+
+// Adds a special hidden command that can be used to request custom completions.
+func (c *Command) initCompleteCmd(args []string) {
+	completeCmd := &Command{
+		Use:                   fmt.Sprintf("%s [command-line]", ShellCompRequestCmd),
+		Aliases:               []string{ShellCompNoDescRequestCmd},
+		DisableFlagsInUseLine: true,
+		Hidden:                true,
+		DisableFlagParsing:    true,
+		Args:                  MinimumNArgs(1),
+		Short:                 "Request shell completion choices for the specified command-line",
+		Long: fmt.Sprintf("%[2]s is a special command that is used by the shell completion logic\n%[1]s",
+			"to request completion choices for the specified command-line.", ShellCompRequestCmd),
+		Run: func(cmd *Command, args []string) {
+			finalCmd, completions, directive, err := cmd.getCompletions(args)
+			if err != nil {
+				CompErrorln(err.Error())
+				// Keep going for multiple reasons:
+				// 1- There could be some valid completions even though there was an error
+				// 2- Even without completions, we need to print the directive
+			}
+
+			noDescriptions := (cmd.CalledAs() == ShellCompNoDescRequestCmd)
+			for _, comp := range completions {
+				if noDescriptions {
+					// Remove any description that may be included following a tab character.
+					comp = strings.Split(comp, "\t")[0]
+				}
+				// Print each possible completion to stdout for the completion script to consume.
+				fmt.Fprintln(finalCmd.OutOrStdout(), comp)
+			}
+
+			if directive > ShellCompDirectiveError+ShellCompDirectiveNoSpace+ShellCompDirectiveNoFileComp {
+				directive = ShellCompDirectiveDefault
+			}
+
+			// As the last printout, print the completion directive for the completion script to parse.
+			// The directive integer must be that last character following a single colon (:).
+			// The completion script expects :<directive>
+			fmt.Fprintf(finalCmd.OutOrStdout(), ":%d\n", directive)
+
+			// Print some helpful info to stderr for the user to understand.
+			// Output from stderr must be ignored by the completion script.
+			fmt.Fprintf(finalCmd.ErrOrStderr(), "Completion ended with directive: %s\n", directive.string())
+		},
+	}
+	c.AddCommand(completeCmd)
+	subCmd, _, err := c.Find(args)
+	if err != nil || subCmd.Name() != ShellCompRequestCmd {
+		// Only create this special command if it is actually being called.
+		// This reduces possible side-effects of creating such a command;
+		// for example, having this command would cause problems to a
+		// cobra program that only consists of the root command, since this
+		// command would cause the root command to suddenly have a subcommand.
+		c.RemoveCommand(completeCmd)
+	}
+}
+
+func (c *Command) getCompletions(args []string) (*Command, []string, ShellCompDirective, error) {
+	var completions []string
+
+	// The last argument, which is not completely typed by the user,
+	// should not be part of the list of arguments
+	toComplete := args[len(args)-1]
+	trimmedArgs := args[:len(args)-1]
+
+	// Find the real command for which completion must be performed
+	finalCmd, finalArgs, err := c.Root().Find(trimmedArgs)
+	if err != nil {
+		// Unable to find the real command. E.g., <program> someInvalidCmd <TAB>
+		return c, completions, ShellCompDirectiveDefault, fmt.Errorf("Unable to find a command for arguments: %v", trimmedArgs)
+	}
+
+	// When doing completion of a flag name, as soon as an argument starts with
+	// a '-' we know it is a flag.  We cannot use isFlagArg() here as it requires
+	// the flag to be complete
+	if len(toComplete) > 0 && toComplete[0] == '-' && !strings.Contains(toComplete, "=") {
+		// We are completing a flag name
+		finalCmd.NonInheritedFlags().VisitAll(func(flag *pflag.Flag) {
+			completions = append(completions, getFlagNameCompletions(flag, toComplete)...)
+		})
+		finalCmd.InheritedFlags().VisitAll(func(flag *pflag.Flag) {
+			completions = append(completions, getFlagNameCompletions(flag, toComplete)...)
+		})
+
+		directive := ShellCompDirectiveDefault
+		if len(completions) > 0 {
+			if strings.HasSuffix(completions[0], "=") {
+				directive = ShellCompDirectiveNoSpace
+			}
+		}
+		return finalCmd, completions, directive, nil
+	}
+
+	var flag *pflag.Flag
+	if !finalCmd.DisableFlagParsing {
+		// We only do flag completion if we are allowed to parse flags
+		// This is important for commands which have requested to do their own flag completion.
+		flag, finalArgs, toComplete, err = checkIfFlagCompletion(finalCmd, finalArgs, toComplete)
+		if err != nil {
+			// Error while attempting to parse flags
+			return finalCmd, completions, ShellCompDirectiveDefault, err
+		}
+	}
+
+	if flag == nil {
+		// Complete subcommand names
+		for _, subCmd := range finalCmd.Commands() {
+			if subCmd.IsAvailableCommand() && strings.HasPrefix(subCmd.Name(), toComplete) {
+				completions = append(completions, fmt.Sprintf("%s\t%s", subCmd.Name(), subCmd.Short))
+			}
+		}
+
+		if len(finalCmd.ValidArgs) > 0 {
+			// Always complete ValidArgs, even if we are completing a subcommand name.
+			// This is for commands that have both subcommands and ValidArgs.
+			for _, validArg := range finalCmd.ValidArgs {
+				if strings.HasPrefix(validArg, toComplete) {
+					completions = append(completions, validArg)
+				}
+			}
+
+			// If there are ValidArgs specified (even if they don't match), we stop completion.
+			// Only one of ValidArgs or ValidArgsFunction can be used for a single command.
+			return finalCmd, completions, ShellCompDirectiveNoFileComp, nil
+		}
+
+		// Always let the logic continue so as to add any ValidArgsFunction completions,
+		// even if we already found sub-commands.
+		// This is for commands that have subcommands but also specify a ValidArgsFunction.
+	}
+
+	// Parse the flags and extract the arguments to prepare for calling the completion function
+	if err = finalCmd.ParseFlags(finalArgs); err != nil {
+		return finalCmd, completions, ShellCompDirectiveDefault, fmt.Errorf("Error while parsing flags from args %v: %s", finalArgs, err.Error())
+	}
+
+	// We only remove the flags from the arguments if DisableFlagParsing is not set.
+	// This is important for commands which have requested to do their own flag completion.
+	if !finalCmd.DisableFlagParsing {
+		finalArgs = finalCmd.Flags().Args()
+	}
+
+	// Find the completion function for the flag or command
+	var completionFn func(cmd *Command, args []string, toComplete string) ([]string, ShellCompDirective)
+	if flag != nil {
+		completionFn = flagCompletionFunctions[flag]
+	} else {
+		completionFn = finalCmd.ValidArgsFunction
+	}
+	if completionFn == nil {
+		// Go custom completion not supported/needed for this flag or command
+		return finalCmd, completions, ShellCompDirectiveDefault, nil
+	}
+
+	// Call the registered completion function to get the completions
+	comps, directive := completionFn(finalCmd, finalArgs, toComplete)
+	completions = append(completions, comps...)
+	return finalCmd, completions, directive, nil
+}
+
+func getFlagNameCompletions(flag *pflag.Flag, toComplete string) []string {
+	if nonCompletableFlag(flag) {
+		return []string{}
+	}
+
+	var completions []string
+	flagName := "--" + flag.Name
+	if strings.HasPrefix(flagName, toComplete) {
+		// Flag without the =
+		completions = append(completions, fmt.Sprintf("%s\t%s", flagName, flag.Usage))
+
+		if len(flag.NoOptDefVal) == 0 {
+			// Flag requires a value, so it can be suffixed with =
+			flagName += "="
+			completions = append(completions, fmt.Sprintf("%s\t%s", flagName, flag.Usage))
+		}
+	}
+
+	flagName = "-" + flag.Shorthand
+	if len(flag.Shorthand) > 0 && strings.HasPrefix(flagName, toComplete) {
+		completions = append(completions, fmt.Sprintf("%s\t%s", flagName, flag.Usage))
+	}
+
+	return completions
+}
+
+func checkIfFlagCompletion(finalCmd *Command, args []string, lastArg string) (*pflag.Flag, []string, string, error) {
+	var flagName string
+	trimmedArgs := args
+	flagWithEqual := false
+	if isFlagArg(lastArg) {
+		if index := strings.Index(lastArg, "="); index >= 0 {
+			flagName = strings.TrimLeft(lastArg[:index], "-")
+			lastArg = lastArg[index+1:]
+			flagWithEqual = true
+		} else {
+			return nil, nil, "", errors.New("Unexpected completion request for flag")
+		}
+	}
+
+	if len(flagName) == 0 {
+		if len(args) > 0 {
+			prevArg := args[len(args)-1]
+			if isFlagArg(prevArg) {
+				// Only consider the case where the flag does not contain an =.
+				// If the flag contains an = it means it has already been fully processed,
+				// so we don't need to deal with it here.
+				if index := strings.Index(prevArg, "="); index < 0 {
+					flagName = strings.TrimLeft(prevArg, "-")
+
+					// Remove the uncompleted flag or else there could be an error created
+					// for an invalid value for that flag
+					trimmedArgs = args[:len(args)-1]
+				}
+			}
+		}
+	}
+
+	if len(flagName) == 0 {
+		// Not doing flag completion
+		return nil, trimmedArgs, lastArg, nil
+	}
+
+	flag := findFlag(finalCmd, flagName)
+	if flag == nil {
+		// Flag not supported by this command, nothing to complete
+		err := fmt.Errorf("Subcommand '%s' does not support flag '%s'", finalCmd.Name(), flagName)
+		return nil, nil, "", err
+	}
+
+	if !flagWithEqual {
+		if len(flag.NoOptDefVal) != 0 {
+			// We had assumed dealing with a two-word flag but the flag is a boolean flag.
+			// In that case, there is no value following it, so we are not really doing flag completion.
+			// Reset everything to do noun completion.
+			trimmedArgs = args
+			flag = nil
+		}
+	}
+
+	return flag, trimmedArgs, lastArg, nil
+}
+
+func findFlag(cmd *Command, name string) *pflag.Flag {
+	flagSet := cmd.Flags()
+	if len(name) == 1 {
+		// First convert the short flag into a long flag
+		// as the cmd.Flag() search only accepts long flags
+		if short := flagSet.ShorthandLookup(name); short != nil {
+			name = short.Name
+		} else {
+			set := cmd.InheritedFlags()
+			if short = set.ShorthandLookup(name); short != nil {
+				name = short.Name
+			} else {
+				return nil
+			}
+		}
+	}
+	return cmd.Flag(name)
+}
+
+// CompDebug prints the specified string to the same file as where the
+// completion script prints its logs.
+// Note that completion printouts should never be on stdout as they would
+// be wrongly interpreted as actual completion choices by the completion script.
+func CompDebug(msg string, printToStdErr bool) {
+	msg = fmt.Sprintf("[Debug] %s", msg)
+
+	// Such logs are only printed when the user has set the environment
+	// variable BASH_COMP_DEBUG_FILE to the path of some file to be used.
+	if path := os.Getenv("BASH_COMP_DEBUG_FILE"); path != "" {
+		f, err := os.OpenFile(path,
+			os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
+		if err == nil {
+			defer f.Close()
+			f.WriteString(msg)
+		}
+	}
+
+	if printToStdErr {
+		// Must print to stderr for this not to be read by the completion script.
+		fmt.Fprintf(os.Stderr, msg)
+	}
+}
+
+// CompDebugln prints the specified string with a newline at the end
+// to the same file as where the completion script prints its logs.
+// Such logs are only printed when the user has set the environment
+// variable BASH_COMP_DEBUG_FILE to the path of some file to be used.
+func CompDebugln(msg string, printToStdErr bool) {
+	CompDebug(fmt.Sprintf("%s\n", msg), printToStdErr)
+}
+
+// CompError prints the specified completion message to stderr.
+func CompError(msg string) {
+	msg = fmt.Sprintf("[Error] %s", msg)
+	CompDebug(msg, true)
+}
+
+// CompErrorln prints the specified completion message to stderr with a newline at the end.
+func CompErrorln(msg string) {
+	CompError(fmt.Sprintf("%s\n", msg))
+}
--- a/vendor/github.com/spf13/cobra/fish_completions.go
+++ b/vendor/github.com/spf13/cobra/fish_completions.go
@ -0,0 +1,172 @@
+package cobra
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"os"
+)
+
+func genFishComp(buf *bytes.Buffer, name string, includeDesc bool) {
+	compCmd := ShellCompRequestCmd
+	if !includeDesc {
+		compCmd = ShellCompNoDescRequestCmd
+	}
+	buf.WriteString(fmt.Sprintf("# fish completion for %-36s -*- shell-script -*-\n", name))
+	buf.WriteString(fmt.Sprintf(`
+function __%[1]s_debug
+    set file "$BASH_COMP_DEBUG_FILE"
+    if test -n "$file"
+        echo "$argv" >> $file
+    end
+end
+
+function __%[1]s_perform_completion
+    __%[1]s_debug "Starting __%[1]s_perform_completion with: $argv"
+
+    set args (string split -- " " "$argv")
+    set lastArg "$args[-1]"
+
+    __%[1]s_debug "args: $args"
+    __%[1]s_debug "last arg: $lastArg"
+
+    set emptyArg ""
+    if test -z "$lastArg"
+        __%[1]s_debug "Setting emptyArg"
+        set emptyArg \"\"
+    end
+    __%[1]s_debug "emptyArg: $emptyArg"
+
+    set requestComp "$args[1] %[2]s $args[2..-1] $emptyArg"
+    __%[1]s_debug "Calling $requestComp"
+
+    set results (eval $requestComp 2> /dev/null)
+    set comps $results[1..-2]
+    set directiveLine $results[-1]
+
+    # For Fish, when completing a flag with an = (e.g., <program> -n=<TAB>)
+    # completions must be prefixed with the flag
+    set flagPrefix (string match -r -- '-.*=' "$lastArg")
+
+    __%[1]s_debug "Comps: $comps"
+    __%[1]s_debug "DirectiveLine: $directiveLine"
+    __%[1]s_debug "flagPrefix: $flagPrefix"
+
+    for comp in $comps
+        printf "%%s%%s\n" "$flagPrefix" "$comp"
+    end
+
+    printf "%%s\n" "$directiveLine"
+end
+
+# This function does three things:
+# 1- Obtain the completions and store them in the global __%[1]s_comp_results
+# 2- Set the __%[1]s_comp_do_file_comp flag if file completion should be performed
+#    and unset it otherwise
+# 3- Return true if the completion results are not empty
+function __%[1]s_prepare_completions
+    # Start fresh
+    set --erase __%[1]s_comp_do_file_comp
+    set --erase __%[1]s_comp_results
+
+    # Check if the command-line is already provided.  This is useful for testing.
+    if not set --query __%[1]s_comp_commandLine
+        set __%[1]s_comp_commandLine (commandline)
+    end
+    __%[1]s_debug "commandLine is: $__%[1]s_comp_commandLine"
+
+    set results (__%[1]s_perform_completion "$__%[1]s_comp_commandLine")
+    set --erase __%[1]s_comp_commandLine
+    __%[1]s_debug "Completion results: $results"
+
+    if test -z "$results"
+        __%[1]s_debug "No completion, probably due to a failure"
+        # Might as well do file completion, in case it helps
+        set --global __%[1]s_comp_do_file_comp 1
+        return 0
+    end
+
+    set directive (string sub --start 2 $results[-1])
+    set --global __%[1]s_comp_results $results[1..-2]
+
+    __%[1]s_debug "Completions are: $__%[1]s_comp_results"
+    __%[1]s_debug "Directive is: $directive"
+
+    if test -z "$directive"
+        set directive 0
+    end
+
+    set compErr (math (math --scale 0 $directive / %[3]d) %% 2)
+    if test $compErr -eq 1
+        __%[1]s_debug "Received error directive: aborting."
+        # Might as well do file completion, in case it helps
+        set --global __%[1]s_comp_do_file_comp 1
+        return 0
+    end
+
+    set nospace (math (math --scale 0 $directive / %[4]d) %% 2)
+    set nofiles (math (math --scale 0 $directive / %[5]d) %% 2)
+
+    __%[1]s_debug "nospace: $nospace, nofiles: $nofiles"
+
+    # Important not to quote the variable for count to work
+    set numComps (count $__%[1]s_comp_results)
+    __%[1]s_debug "numComps: $numComps"
+
+    if test $numComps -eq 1; and test $nospace -ne 0
+        # To support the "nospace" directive we trick the shell
+        # by outputting an extra, longer completion.
+        __%[1]s_debug "Adding second completion to perform nospace directive"
+        set --append __%[1]s_comp_results $__%[1]s_comp_results[1].
+    end
+
+    if test $numComps -eq 0; and test $nofiles -eq 0
+        __%[1]s_debug "Requesting file completion"
+        set --global __%[1]s_comp_do_file_comp 1
+    end
+
+    # If we don't want file completion, we must return true even if there
+    # are no completions found.  This is because fish will perform the last
+    # completion command, even if its condition is false, if no other
+    # completion command was triggered
+    return (not set --query __%[1]s_comp_do_file_comp)
+end
+
+# Remove any pre-existing completions for the program since we will be handling all of them
+# TODO this cleanup is not sufficient.  Fish completions are only loaded once the user triggers
+# them, so the below deletion will not work as it is run too early.  What else can we do?
+complete -c %[1]s -e
+
+# The order in which the below two lines are defined is very important so that __%[1]s_prepare_completions
+# is called first.  It is __%[1]s_prepare_completions that sets up the __%[1]s_comp_do_file_comp variable.
+#
+# This completion will be run second as complete commands are added FILO.
+# It triggers file completion choices when __%[1]s_comp_do_file_comp is set.
+complete -c %[1]s -n 'set --query __%[1]s_comp_do_file_comp'
+
+# This completion will be run first as complete commands are added FILO.
+# The call to __%[1]s_prepare_completions will setup both __%[1]s_comp_results abd __%[1]s_comp_do_file_comp.
+# It provides the program's completion choices.
+complete -c %[1]s -n '__%[1]s_prepare_completions' -f -a '$__%[1]s_comp_results'
+
+`, name, compCmd, ShellCompDirectiveError, ShellCompDirectiveNoSpace, ShellCompDirectiveNoFileComp))
+}
+
+// GenFishCompletion generates fish completion file and writes to the passed writer.
+func (c *Command) GenFishCompletion(w io.Writer, includeDesc bool) error {
+	buf := new(bytes.Buffer)
+	genFishComp(buf, c.Name(), includeDesc)
+	_, err := buf.WriteTo(w)
+	return err
+}
+
+// GenFishCompletionFile generates fish completion file.
+func (c *Command) GenFishCompletionFile(filename string, includeDesc bool) error {
+	outFile, err := os.Create(filename)
+	if err != nil {
+		return err
+	}
+	defer outFile.Close()
+
+	return c.GenFishCompletion(outFile, includeDesc)
+}
--- a/vendor/github.com/spf13/cobra/fish_completions.md
+++ b/vendor/github.com/spf13/cobra/fish_completions.md
@ -0,0 +1,7 @@
+## Generating Fish Completions for your own cobra.Command
+
+Cobra supports native Fish completions generated from the root `cobra.Command`.  You can use the `command.GenFishCompletion()` or `command.GenFishCompletionFile()` functions. You must provide these functions with a parameter indicating if the completions should be annotated with a description; Cobra will provide the description automatically based on usage information.  You can choose to make this option configurable by your users.
+
+### Limitations
+
+* Custom completions implemented using the `ValidArgsFunction` and `RegisterFlagCompletionFunc()` are supported automatically but the ones implemented in Bash scripting are not.
--- a/vendor/github.com/spf13/cobra/go.mod
+++ b/vendor/github.com/spf13/cobra/go.mod
@ -3,11 +3,10 @@ module github.com/spf13/cobra
 go 1.12

 require (
-	github.com/BurntSushi/toml v0.3.1 // indirect
-	github.com/cpuguy83/go-md2man v1.0.10
+	github.com/cpuguy83/go-md2man/v2 v2.0.0
 	github.com/inconshreveable/mousetrap v1.0.0
 	github.com/mitchellh/go-homedir v1.1.0
 	github.com/spf13/pflag v1.0.3
-	github.com/spf13/viper v1.3.2
+	github.com/spf13/viper v1.4.0
 	gopkg.in/yaml.v2 v2.2.2
 )
--- a/vendor/github.com/spf13/cobra/go.sum
+++ b/vendor/github.com/spf13/cobra/go.sum
@ -1,31 +1,91 @@
+cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
 github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
+github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
+github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
+github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
 github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
+github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
+github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
+github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
+github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
+github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk=
 github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
-github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk=
 github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
-github.com/cpuguy83/go-md2man v1.0.10 h1:BSKMNlYxDvnunlTymqtgONjNnaRV1sTpcovwwjF22jk=
-github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE=
+github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
+github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
+github.com/cpuguy83/go-md2man/v2 v2.0.0 h1:EoUDS0afbrsXAZ9YQ9jdu/mZ2sXgT1/2yyNng4PGlyM=
+github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
+github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no=
 github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I=
 github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
+github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
+github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
+github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
+github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
+github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
+github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
+github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4=
+github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
+github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
+github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
+github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
+github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
+github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ=
+github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs=
+github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk=
+github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY=
 github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
 github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
 github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM=
 github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
+github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo=
+github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
+github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=
+github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
+github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
+github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
+github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
+github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
 github.com/magiconair/properties v1.8.0 h1:LLgXmsheXeRoUOBOjtwPQCWIYqM/LU1ayDtDePerRcY=
 github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
+github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
 github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
 github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
 github.com/mitchellh/mapstructure v1.1.2 h1:fmNYVwqnSfB9mZU6OS2O6GsXM+wcskZDuKQzvN1EDeE=
 github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
+github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
+github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
 github.com/pelletier/go-toml v1.2.0 h1:T5zMGML61Wp+FlcbWjRDT7yAxhJNAiPPLOFECq181zc=
 github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
+github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/russross/blackfriday v1.5.2 h1:HyvC0ARfnZBqnXwABFeSZHpKvJHJJfPz81GNueLj0oo=
-github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
+github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
+github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso=
+github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
+github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
+github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro=
+github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
+github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
+github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
+github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU=
+github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
+github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q=
+github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
+github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo=
+github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
+github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
+github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=
+github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
 github.com/spf13/afero v1.1.2 h1:m8/z1t7/fwjysjQRYbP0RD+bUIF/8tJwPdEZsI83ACI=
 github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
 github.com/spf13/cast v1.3.0 h1:oget//CVOEoFewqQxwr0Ej5yjygnqGkvggSE/gB35Q8=
@ -34,18 +94,56 @@ github.com/spf13/jwalterweatherman v1.0.0 h1:XHEdyB+EcvlqZamSM4ZOMGlc93t6AcsBEu9
 github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
 github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg=
 github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
-github.com/spf13/viper v1.3.2 h1:VUFqw5KcqRf7i70GOzW7N+Q7+gxVBkSSqiXB12+JQ4M=
-github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s=
+github.com/spf13/viper v1.4.0 h1:yXHLWeravcrgGyFSyCgdYpXQ9dR9c/WED3pg1RhxqEU=
+github.com/spf13/viper v1.4.0/go.mod h1:PTJ7Z/lr49W6bUbkmS1V3by4uWynFiR9p7+dSq/yZzE=
+github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w=
 github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
-github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
+github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
+github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc=
+github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
 github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
-golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
-golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a h1:1n5lsVfiQW3yfsRGu98756EH1YthsFqr/5mxHduZW2A=
-golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
+go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
+go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
+go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
+golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
+golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
+golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
+golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a h1:1BGLXjeY4akVXGgbC9HugT3Jv3hCI0z56oJR5vAMgBU=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
+golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
+google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
+google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
+google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
+google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
+gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
+gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo=
+gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74=
+gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
 gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
--- a/vendor/github.com/spf13/pflag/.travis.yml
+++ b/vendor/github.com/spf13/pflag/.travis.yml
@ -3,8 +3,9 @@ sudo: false
 language: go

 go:
-  - 1.7.3
-  - 1.8.1
+  - 1.9.x
+  - 1.10.x
+  - 1.11.x
  - tip

 matrix:
@ -12,7 +13,7 @@ matrix:
    - go: tip

 install:
-  - go get github.com/golang/lint/golint
+  - go get golang.org/x/lint/golint
  - export PATH=$GOPATH/bin:$PATH
  - go install ./...

--- a/vendor/github.com/spf13/pflag/README.md
+++ b/vendor/github.com/spf13/pflag/README.md
@ -86,8 +86,8 @@ fmt.Println("ip has value ", *ip)
 fmt.Println("flagvar has value ", flagvar)
 ```

-There are helpers function to get values later if you have the FlagSet but
-it was difficult to keep up with all of the flag pointers in your code.
+There are helper functions available to get the value stored in a Flag if you have a FlagSet but find
+it difficult to keep up with all of the pointers in your code.
 If you have a pflag.FlagSet with a flag called 'flagname' of type int you
 can use GetInt() to get the int value. But notice that 'flagname' must exist
 and it must be an int. GetString("flagname") will fail.
--- a/vendor/github.com/spf13/pflag/bool_slice.go
+++ b/vendor/github.com/spf13/pflag/bool_slice.go
@ -71,6 +71,44 @@ func (s *boolSliceValue) String() string {
 	return "[" + out + "]"
 }

+func (s *boolSliceValue) fromString(val string) (bool, error) {
+	return strconv.ParseBool(val)
+}
+
+func (s *boolSliceValue) toString(val bool) string {
+	return strconv.FormatBool(val)
+}
+
+func (s *boolSliceValue) Append(val string) error {
+	i, err := s.fromString(val)
+	if err != nil {
+		return err
+	}
+	*s.value = append(*s.value, i)
+	return nil
+}
+
+func (s *boolSliceValue) Replace(val []string) error {
+	out := make([]bool, len(val))
+	for i, d := range val {
+		var err error
+		out[i], err = s.fromString(d)
+		if err != nil {
+			return err
+		}
+	}
+	*s.value = out
+	return nil
+}
+
+func (s *boolSliceValue) GetSlice() []string {
+	out := make([]string, len(*s.value))
+	for i, d := range *s.value {
+		out[i] = s.toString(d)
+	}
+	return out
+}
+
 func boolSliceConv(val string) (interface{}, error) {
 	val = strings.Trim(val, "[]")
 	// Empty string would cause a slice with one (empty) entry
--- a/vendor/github.com/spf13/pflag/count.go
+++ b/vendor/github.com/spf13/pflag/count.go
@ -46,7 +46,7 @@ func (f *FlagSet) GetCount(name string) (int, error) {

 // CountVar defines a count flag with specified name, default value, and usage string.
 // The argument p points to an int variable in which to store the value of the flag.
-// A count flag will add 1 to its value evey time it is found on the command line
+// A count flag will add 1 to its value every time it is found on the command line
 func (f *FlagSet) CountVar(p *int, name string, usage string) {
 	f.CountVarP(p, name, "", usage)
 }
@ -69,7 +69,7 @@ func CountVarP(p *int, name, shorthand string, usage string) {

 // Count defines a count flag with specified name, default value, and usage string.
 // The return value is the address of an int variable that stores the value of the flag.
-// A count flag will add 1 to its value evey time it is found on the command line
+// A count flag will add 1 to its value every time it is found on the command line
 func (f *FlagSet) Count(name string, usage string) *int {
 	p := new(int)
 	f.CountVarP(p, name, "", usage)
--- a/vendor/github.com/spf13/pflag/duration_slice.go
+++ b/vendor/github.com/spf13/pflag/duration_slice.go
@ -51,6 +51,44 @@ func (s *durationSliceValue) String() string {
 	return "[" + strings.Join(out, ",") + "]"
 }

+func (s *durationSliceValue) fromString(val string) (time.Duration, error) {
+	return time.ParseDuration(val)
+}
+
+func (s *durationSliceValue) toString(val time.Duration) string {
+	return fmt.Sprintf("%s", val)
+}
+
+func (s *durationSliceValue) Append(val string) error {
+	i, err := s.fromString(val)
+	if err != nil {
+		return err
+	}
+	*s.value = append(*s.value, i)
+	return nil
+}
+
+func (s *durationSliceValue) Replace(val []string) error {
+	out := make([]time.Duration, len(val))
+	for i, d := range val {
+		var err error
+		out[i], err = s.fromString(d)
+		if err != nil {
+			return err
+		}
+	}
+	*s.value = out
+	return nil
+}
+
+func (s *durationSliceValue) GetSlice() []string {
+	out := make([]string, len(*s.value))
+	for i, d := range *s.value {
+		out[i] = s.toString(d)
+	}
+	return out
+}
+
 func durationSliceConv(val string) (interface{}, error) {
 	val = strings.Trim(val, "[]")
 	// Empty string would cause a slice with one (empty) entry
--- a/vendor/github.com/spf13/pflag/flag.go
+++ b/vendor/github.com/spf13/pflag/flag.go
@ -57,9 +57,9 @@ that give one-letter shorthands for flags. You can use these by appending
 	var ip = flag.IntP("flagname", "f", 1234, "help message")
 	var flagvar bool
 	func init() {
-		flag.BoolVarP("boolname", "b", true, "help message")
+		flag.BoolVarP(&flagvar, "boolname", "b", true, "help message")
 	}
-	flag.VarP(&flagVar, "varname", "v", 1234, "help message")
+	flag.VarP(&flagval, "varname", "v", "help message")
 Shorthand letters can be used with single dashes on the command line.
 Boolean shorthand flags can be combined with other shorthand flags.

@ -190,6 +190,18 @@ type Value interface {
 	Type() string
 }

+// SliceValue is a secondary interface to all flags which hold a list
+// of values.  This allows full control over the value of list flags,
+// and avoids complicated marshalling and unmarshalling to csv.
+type SliceValue interface {
+	// Append adds the specified value to the end of the flag value list.
+	Append(string) error
+	// Replace will fully overwrite any data currently in the flag value list.
+	Replace([]string) error
+	// GetSlice returns the flag value list as an array of strings.
+	GetSlice() []string
+}
+
 // sortFlags returns the flags as a slice in lexicographical sorted order.
 func sortFlags(flags map[NormalizedName]*Flag) []*Flag {
 	list := make(sort.StringSlice, len(flags))
--- a/vendor/github.com/spf13/pflag/float32_slice.go
+++ b/vendor/github.com/spf13/pflag/float32_slice.go
@ -0,0 +1,174 @@
+package pflag
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+)
+
+// -- float32Slice Value
+type float32SliceValue struct {
+	value   *[]float32
+	changed bool
+}
+
+func newFloat32SliceValue(val []float32, p *[]float32) *float32SliceValue {
+	isv := new(float32SliceValue)
+	isv.value = p
+	*isv.value = val
+	return isv
+}
+
+func (s *float32SliceValue) Set(val string) error {
+	ss := strings.Split(val, ",")
+	out := make([]float32, len(ss))
+	for i, d := range ss {
+		var err error
+		var temp64 float64
+		temp64, err = strconv.ParseFloat(d, 32)
+		if err != nil {
+			return err
+		}
+		out[i] = float32(temp64)
+
+	}
+	if !s.changed {
+		*s.value = out
+	} else {
+		*s.value = append(*s.value, out...)
+	}
+	s.changed = true
+	return nil
+}
+
+func (s *float32SliceValue) Type() string {
+	return "float32Slice"
+}
+
+func (s *float32SliceValue) String() string {
+	out := make([]string, len(*s.value))
+	for i, d := range *s.value {
+		out[i] = fmt.Sprintf("%f", d)
+	}
+	return "[" + strings.Join(out, ",") + "]"
+}
+
+func (s *float32SliceValue) fromString(val string) (float32, error) {
+	t64, err := strconv.ParseFloat(val, 32)
+	if err != nil {
+		return 0, err
+	}
+	return float32(t64), nil
+}
+
+func (s *float32SliceValue) toString(val float32) string {
+	return fmt.Sprintf("%f", val)
+}
+
+func (s *float32SliceValue) Append(val string) error {
+	i, err := s.fromString(val)
+	if err != nil {
+		return err
+	}
+	*s.value = append(*s.value, i)
+	return nil
+}
+
+func (s *float32SliceValue) Replace(val []string) error {
+	out := make([]float32, len(val))
+	for i, d := range val {
+		var err error
+		out[i], err = s.fromString(d)
+		if err != nil {
+			return err
+		}
+	}
+	*s.value = out
+	return nil
+}
+
+func (s *float32SliceValue) GetSlice() []string {
+	out := make([]string, len(*s.value))
+	for i, d := range *s.value {
+		out[i] = s.toString(d)
+	}
+	return out
+}
+
+func float32SliceConv(val string) (interface{}, error) {
+	val = strings.Trim(val, "[]")
+	// Empty string would cause a slice with one (empty) entry
+	if len(val) == 0 {
+		return []float32{}, nil
+	}
+	ss := strings.Split(val, ",")
+	out := make([]float32, len(ss))
+	for i, d := range ss {
+		var err error
+		var temp64 float64
+		temp64, err = strconv.ParseFloat(d, 32)
+		if err != nil {
+			return nil, err
+		}
+		out[i] = float32(temp64)
+
+	}
+	return out, nil
+}
+
+// GetFloat32Slice return the []float32 value of a flag with the given name
+func (f *FlagSet) GetFloat32Slice(name string) ([]float32, error) {
+	val, err := f.getFlagType(name, "float32Slice", float32SliceConv)
+	if err != nil {
+		return []float32{}, err
+	}
+	return val.([]float32), nil
+}
+
+// Float32SliceVar defines a float32Slice flag with specified name, default value, and usage string.
+// The argument p points to a []float32 variable in which to store the value of the flag.
+func (f *FlagSet) Float32SliceVar(p *[]float32, name string, value []float32, usage string) {
+	f.VarP(newFloat32SliceValue(value, p), name, "", usage)
+}
+
+// Float32SliceVarP is like Float32SliceVar, but accepts a shorthand letter that can be used after a single dash.
+func (f *FlagSet) Float32SliceVarP(p *[]float32, name, shorthand string, value []float32, usage string) {
+	f.VarP(newFloat32SliceValue(value, p), name, shorthand, usage)
+}
+
+// Float32SliceVar defines a float32[] flag with specified name, default value, and usage string.
+// The argument p points to a float32[] variable in which to store the value of the flag.
+func Float32SliceVar(p *[]float32, name string, value []float32, usage string) {
+	CommandLine.VarP(newFloat32SliceValue(value, p), name, "", usage)
+}
+
+// Float32SliceVarP is like Float32SliceVar, but accepts a shorthand letter that can be used after a single dash.
+func Float32SliceVarP(p *[]float32, name, shorthand string, value []float32, usage string) {
+	CommandLine.VarP(newFloat32SliceValue(value, p), name, shorthand, usage)
+}
+
+// Float32Slice defines a []float32 flag with specified name, default value, and usage string.
+// The return value is the address of a []float32 variable that stores the value of the flag.
+func (f *FlagSet) Float32Slice(name string, value []float32, usage string) *[]float32 {
+	p := []float32{}
+	f.Float32SliceVarP(&p, name, "", value, usage)
+	return &p
+}
+
+// Float32SliceP is like Float32Slice, but accepts a shorthand letter that can be used after a single dash.
+func (f *FlagSet) Float32SliceP(name, shorthand string, value []float32, usage string) *[]float32 {
+	p := []float32{}
+	f.Float32SliceVarP(&p, name, shorthand, value, usage)
+	return &p
+}
+
+// Float32Slice defines a []float32 flag with specified name, default value, and usage string.
+// The return value is the address of a []float32 variable that stores the value of the flag.
+func Float32Slice(name string, value []float32, usage string) *[]float32 {
+	return CommandLine.Float32SliceP(name, "", value, usage)
+}
+
+// Float32SliceP is like Float32Slice, but accepts a shorthand letter that can be used after a single dash.
+func Float32SliceP(name, shorthand string, value []float32, usage string) *[]float32 {
+	return CommandLine.Float32SliceP(name, shorthand, value, usage)
+}
--- a/vendor/github.com/spf13/pflag/float64_slice.go
+++ b/vendor/github.com/spf13/pflag/float64_slice.go
@ -0,0 +1,166 @@
+package pflag
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+)
+
+// -- float64Slice Value
+type float64SliceValue struct {
+	value   *[]float64
+	changed bool
+}
+
+func newFloat64SliceValue(val []float64, p *[]float64) *float64SliceValue {
+	isv := new(float64SliceValue)
+	isv.value = p
+	*isv.value = val
+	return isv
+}
+
+func (s *float64SliceValue) Set(val string) error {
+	ss := strings.Split(val, ",")
+	out := make([]float64, len(ss))
+	for i, d := range ss {
+		var err error
+		out[i], err = strconv.ParseFloat(d, 64)
+		if err != nil {
+			return err
+		}
+
+	}
+	if !s.changed {
+		*s.value = out
+	} else {
+		*s.value = append(*s.value, out...)
+	}
+	s.changed = true
+	return nil
+}
+
+func (s *float64SliceValue) Type() string {
+	return "float64Slice"
+}
+
+func (s *float64SliceValue) String() string {
+	out := make([]string, len(*s.value))
+	for i, d := range *s.value {
+		out[i] = fmt.Sprintf("%f", d)
+	}
+	return "[" + strings.Join(out, ",") + "]"
+}
+
+func (s *float64SliceValue) fromString(val string) (float64, error) {
+	return strconv.ParseFloat(val, 64)
+}
+
+func (s *float64SliceValue) toString(val float64) string {
+	return fmt.Sprintf("%f", val)
+}
+
+func (s *float64SliceValue) Append(val string) error {
+	i, err := s.fromString(val)
+	if err != nil {
+		return err
+	}
+	*s.value = append(*s.value, i)
+	return nil
+}
+
+func (s *float64SliceValue) Replace(val []string) error {
+	out := make([]float64, len(val))
+	for i, d := range val {
+		var err error
+		out[i], err = s.fromString(d)
+		if err != nil {
+			return err
+		}
+	}
+	*s.value = out
+	return nil
+}
+
+func (s *float64SliceValue) GetSlice() []string {
+	out := make([]string, len(*s.value))
+	for i, d := range *s.value {
+		out[i] = s.toString(d)
+	}
+	return out
+}
+
+func float64SliceConv(val string) (interface{}, error) {
+	val = strings.Trim(val, "[]")
+	// Empty string would cause a slice with one (empty) entry
+	if len(val) == 0 {
+		return []float64{}, nil
+	}
+	ss := strings.Split(val, ",")
+	out := make([]float64, len(ss))
+	for i, d := range ss {
+		var err error
+		out[i], err = strconv.ParseFloat(d, 64)
+		if err != nil {
+			return nil, err
+		}
+
+	}
+	return out, nil
+}
+
+// GetFloat64Slice return the []float64 value of a flag with the given name
+func (f *FlagSet) GetFloat64Slice(name string) ([]float64, error) {
+	val, err := f.getFlagType(name, "float64Slice", float64SliceConv)
+	if err != nil {
+		return []float64{}, err
+	}
+	return val.([]float64), nil
+}
+
+// Float64SliceVar defines a float64Slice flag with specified name, default value, and usage string.
+// The argument p points to a []float64 variable in which to store the value of the flag.
+func (f *FlagSet) Float64SliceVar(p *[]float64, name string, value []float64, usage string) {
+	f.VarP(newFloat64SliceValue(value, p), name, "", usage)
+}
+
+// Float64SliceVarP is like Float64SliceVar, but accepts a shorthand letter that can be used after a single dash.
+func (f *FlagSet) Float64SliceVarP(p *[]float64, name, shorthand string, value []float64, usage string) {
+	f.VarP(newFloat64SliceValue(value, p), name, shorthand, usage)
+}
+
+// Float64SliceVar defines a float64[] flag with specified name, default value, and usage string.
+// The argument p points to a float64[] variable in which to store the value of the flag.
+func Float64SliceVar(p *[]float64, name string, value []float64, usage string) {
+	CommandLine.VarP(newFloat64SliceValue(value, p), name, "", usage)
+}
+
+// Float64SliceVarP is like Float64SliceVar, but accepts a shorthand letter that can be used after a single dash.
+func Float64SliceVarP(p *[]float64, name, shorthand string, value []float64, usage string) {
+	CommandLine.VarP(newFloat64SliceValue(value, p), name, shorthand, usage)
+}
+
+// Float64Slice defines a []float64 flag with specified name, default value, and usage string.
+// The return value is the address of a []float64 variable that stores the value of the flag.
+func (f *FlagSet) Float64Slice(name string, value []float64, usage string) *[]float64 {
+	p := []float64{}
+	f.Float64SliceVarP(&p, name, "", value, usage)
+	return &p
+}
+
+// Float64SliceP is like Float64Slice, but accepts a shorthand letter that can be used after a single dash.
+func (f *FlagSet) Float64SliceP(name, shorthand string, value []float64, usage string) *[]float64 {
+	p := []float64{}
+	f.Float64SliceVarP(&p, name, shorthand, value, usage)
+	return &p
+}
+
+// Float64Slice defines a []float64 flag with specified name, default value, and usage string.
+// The return value is the address of a []float64 variable that stores the value of the flag.
+func Float64Slice(name string, value []float64, usage string) *[]float64 {
+	return CommandLine.Float64SliceP(name, "", value, usage)
+}
+
+// Float64SliceP is like Float64Slice, but accepts a shorthand letter that can be used after a single dash.
+func Float64SliceP(name, shorthand string, value []float64, usage string) *[]float64 {
+	return CommandLine.Float64SliceP(name, shorthand, value, usage)
+}
--- a/vendor/github.com/spf13/pflag/go.mod
+++ b/vendor/github.com/spf13/pflag/go.mod
@ -0,0 +1,3 @@
+module github.com/spf13/pflag
+
+go 1.12
--- a/vendor/github.com/spf13/pflag/go.sum
+++ b/vendor/github.com/spf13/pflag/go.sum
--- a/vendor/github.com/spf13/pflag/int32_slice.go
+++ b/vendor/github.com/spf13/pflag/int32_slice.go
@ -0,0 +1,174 @@
+package pflag
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+)
+
+// -- int32Slice Value
+type int32SliceValue struct {
+	value   *[]int32
+	changed bool
+}
+
+func newInt32SliceValue(val []int32, p *[]int32) *int32SliceValue {
+	isv := new(int32SliceValue)
+	isv.value = p
+	*isv.value = val
+	return isv
+}
+
+func (s *int32SliceValue) Set(val string) error {
+	ss := strings.Split(val, ",")
+	out := make([]int32, len(ss))
+	for i, d := range ss {
+		var err error
+		var temp64 int64
+		temp64, err = strconv.ParseInt(d, 0, 32)
+		if err != nil {
+			return err
+		}
+		out[i] = int32(temp64)
+
+	}
+	if !s.changed {
+		*s.value = out
+	} else {
+		*s.value = append(*s.value, out...)
+	}
+	s.changed = true
+	return nil
+}
+
+func (s *int32SliceValue) Type() string {
+	return "int32Slice"
+}
+
+func (s *int32SliceValue) String() string {
+	out := make([]string, len(*s.value))
+	for i, d := range *s.value {
+		out[i] = fmt.Sprintf("%d", d)
+	}
+	return "[" + strings.Join(out, ",") + "]"
+}
+
+func (s *int32SliceValue) fromString(val string) (int32, error) {
+	t64, err := strconv.ParseInt(val, 0, 32)
+	if err != nil {
+		return 0, err
+	}
+	return int32(t64), nil
+}
+
+func (s *int32SliceValue) toString(val int32) string {
+	return fmt.Sprintf("%d", val)
+}
+
+func (s *int32SliceValue) Append(val string) error {
+	i, err := s.fromString(val)
+	if err != nil {
+		return err
+	}
+	*s.value = append(*s.value, i)
+	return nil
+}
+
+func (s *int32SliceValue) Replace(val []string) error {
+	out := make([]int32, len(val))
+	for i, d := range val {
+		var err error
+		out[i], err = s.fromString(d)
+		if err != nil {
+			return err
+		}
+	}
+	*s.value = out
+	return nil
+}
+
+func (s *int32SliceValue) GetSlice() []string {
+	out := make([]string, len(*s.value))
+	for i, d := range *s.value {
+		out[i] = s.toString(d)
+	}
+	return out
+}
+
+func int32SliceConv(val string) (interface{}, error) {
+	val = strings.Trim(val, "[]")
+	// Empty string would cause a slice with one (empty) entry
+	if len(val) == 0 {
+		return []int32{}, nil
+	}
+	ss := strings.Split(val, ",")
+	out := make([]int32, len(ss))
+	for i, d := range ss {
+		var err error
+		var temp64 int64
+		temp64, err = strconv.ParseInt(d, 0, 32)
+		if err != nil {
+			return nil, err
+		}
+		out[i] = int32(temp64)
+
+	}
+	return out, nil
+}
+
+// GetInt32Slice return the []int32 value of a flag with the given name
+func (f *FlagSet) GetInt32Slice(name string) ([]int32, error) {
+	val, err := f.getFlagType(name, "int32Slice", int32SliceConv)
+	if err != nil {
+		return []int32{}, err
+	}
+	return val.([]int32), nil
+}
+
+// Int32SliceVar defines a int32Slice flag with specified name, default value, and usage string.
+// The argument p points to a []int32 variable in which to store the value of the flag.
+func (f *FlagSet) Int32SliceVar(p *[]int32, name string, value []int32, usage string) {
+	f.VarP(newInt32SliceValue(value, p), name, "", usage)
+}
+
+// Int32SliceVarP is like Int32SliceVar, but accepts a shorthand letter that can be used after a single dash.
+func (f *FlagSet) Int32SliceVarP(p *[]int32, name, shorthand string, value []int32, usage string) {
+	f.VarP(newInt32SliceValue(value, p), name, shorthand, usage)
+}
+
+// Int32SliceVar defines a int32[] flag with specified name, default value, and usage string.
+// The argument p points to a int32[] variable in which to store the value of the flag.
+func Int32SliceVar(p *[]int32, name string, value []int32, usage string) {
+	CommandLine.VarP(newInt32SliceValue(value, p), name, "", usage)
+}
+
+// Int32SliceVarP is like Int32SliceVar, but accepts a shorthand letter that can be used after a single dash.
+func Int32SliceVarP(p *[]int32, name, shorthand string, value []int32, usage string) {
+	CommandLine.VarP(newInt32SliceValue(value, p), name, shorthand, usage)
+}
+
+// Int32Slice defines a []int32 flag with specified name, default value, and usage string.
+// The return value is the address of a []int32 variable that stores the value of the flag.
+func (f *FlagSet) Int32Slice(name string, value []int32, usage string) *[]int32 {
+	p := []int32{}
+	f.Int32SliceVarP(&p, name, "", value, usage)
+	return &p
+}
+
+// Int32SliceP is like Int32Slice, but accepts a shorthand letter that can be used after a single dash.
+func (f *FlagSet) Int32SliceP(name, shorthand string, value []int32, usage string) *[]int32 {
+	p := []int32{}
+	f.Int32SliceVarP(&p, name, shorthand, value, usage)
+	return &p
+}
+
+// Int32Slice defines a []int32 flag with specified name, default value, and usage string.
+// The return value is the address of a []int32 variable that stores the value of the flag.
+func Int32Slice(name string, value []int32, usage string) *[]int32 {
+	return CommandLine.Int32SliceP(name, "", value, usage)
+}
+
+// Int32SliceP is like Int32Slice, but accepts a shorthand letter that can be used after a single dash.
+func Int32SliceP(name, shorthand string, value []int32, usage string) *[]int32 {
+	return CommandLine.Int32SliceP(name, shorthand, value, usage)
+}
--- a/vendor/github.com/spf13/pflag/int64_slice.go
+++ b/vendor/github.com/spf13/pflag/int64_slice.go
@ -0,0 +1,166 @@
+package pflag
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+)
+
+// -- int64Slice Value
+type int64SliceValue struct {
+	value   *[]int64
+	changed bool
+}
+
+func newInt64SliceValue(val []int64, p *[]int64) *int64SliceValue {
+	isv := new(int64SliceValue)
+	isv.value = p
+	*isv.value = val
+	return isv
+}
+
+func (s *int64SliceValue) Set(val string) error {
+	ss := strings.Split(val, ",")
+	out := make([]int64, len(ss))
+	for i, d := range ss {
+		var err error
+		out[i], err = strconv.ParseInt(d, 0, 64)
+		if err != nil {
+			return err
+		}
+
+	}
+	if !s.changed {
+		*s.value = out
+	} else {
+		*s.value = append(*s.value, out...)
+	}
+	s.changed = true
+	return nil
+}
+
+func (s *int64SliceValue) Type() string {
+	return "int64Slice"
+}
+
+func (s *int64SliceValue) String() string {
+	out := make([]string, len(*s.value))
+	for i, d := range *s.value {
+		out[i] = fmt.Sprintf("%d", d)
+	}
+	return "[" + strings.Join(out, ",") + "]"
+}
+
+func (s *int64SliceValue) fromString(val string) (int64, error) {
+	return strconv.ParseInt(val, 0, 64)
+}
+
+func (s *int64SliceValue) toString(val int64) string {
+	return fmt.Sprintf("%d", val)
+}
+
+func (s *int64SliceValue) Append(val string) error {
+	i, err := s.fromString(val)
+	if err != nil {
+		return err
+	}
+	*s.value = append(*s.value, i)
+	return nil
+}
+
+func (s *int64SliceValue) Replace(val []string) error {
+	out := make([]int64, len(val))
+	for i, d := range val {
+		var err error
+		out[i], err = s.fromString(d)
+		if err != nil {
+			return err
+		}
+	}
+	*s.value = out
+	return nil
+}
+
+func (s *int64SliceValue) GetSlice() []string {
+	out := make([]string, len(*s.value))
+	for i, d := range *s.value {
+		out[i] = s.toString(d)
+	}
+	return out
+}
+
+func int64SliceConv(val string) (interface{}, error) {
+	val = strings.Trim(val, "[]")
+	// Empty string would cause a slice with one (empty) entry
+	if len(val) == 0 {
+		return []int64{}, nil
+	}
+	ss := strings.Split(val, ",")
+	out := make([]int64, len(ss))
+	for i, d := range ss {
+		var err error
+		out[i], err = strconv.ParseInt(d, 0, 64)
+		if err != nil {
+			return nil, err
+		}
+
+	}
+	return out, nil
+}
+
+// GetInt64Slice return the []int64 value of a flag with the given name
+func (f *FlagSet) GetInt64Slice(name string) ([]int64, error) {
+	val, err := f.getFlagType(name, "int64Slice", int64SliceConv)
+	if err != nil {
+		return []int64{}, err
+	}
+	return val.([]int64), nil
+}
+
+// Int64SliceVar defines a int64Slice flag with specified name, default value, and usage string.
+// The argument p points to a []int64 variable in which to store the value of the flag.
+func (f *FlagSet) Int64SliceVar(p *[]int64, name string, value []int64, usage string) {
+	f.VarP(newInt64SliceValue(value, p), name, "", usage)
+}
+
+// Int64SliceVarP is like Int64SliceVar, but accepts a shorthand letter that can be used after a single dash.
+func (f *FlagSet) Int64SliceVarP(p *[]int64, name, shorthand string, value []int64, usage string) {
+	f.VarP(newInt64SliceValue(value, p), name, shorthand, usage)
+}
+
+// Int64SliceVar defines a int64[] flag with specified name, default value, and usage string.
+// The argument p points to a int64[] variable in which to store the value of the flag.
+func Int64SliceVar(p *[]int64, name string, value []int64, usage string) {
+	CommandLine.VarP(newInt64SliceValue(value, p), name, "", usage)
+}
+
+// Int64SliceVarP is like Int64SliceVar, but accepts a shorthand letter that can be used after a single dash.
+func Int64SliceVarP(p *[]int64, name, shorthand string, value []int64, usage string) {
+	CommandLine.VarP(newInt64SliceValue(value, p), name, shorthand, usage)
+}
+
+// Int64Slice defines a []int64 flag with specified name, default value, and usage string.
+// The return value is the address of a []int64 variable that stores the value of the flag.
+func (f *FlagSet) Int64Slice(name string, value []int64, usage string) *[]int64 {
+	p := []int64{}
+	f.Int64SliceVarP(&p, name, "", value, usage)
+	return &p
+}
+
+// Int64SliceP is like Int64Slice, but accepts a shorthand letter that can be used after a single dash.
+func (f *FlagSet) Int64SliceP(name, shorthand string, value []int64, usage string) *[]int64 {
+	p := []int64{}
+	f.Int64SliceVarP(&p, name, shorthand, value, usage)
+	return &p
+}
+
+// Int64Slice defines a []int64 flag with specified name, default value, and usage string.
+// The return value is the address of a []int64 variable that stores the value of the flag.
+func Int64Slice(name string, value []int64, usage string) *[]int64 {
+	return CommandLine.Int64SliceP(name, "", value, usage)
+}
+
+// Int64SliceP is like Int64Slice, but accepts a shorthand letter that can be used after a single dash.
+func Int64SliceP(name, shorthand string, value []int64, usage string) *[]int64 {
+	return CommandLine.Int64SliceP(name, shorthand, value, usage)
+}
--- a/vendor/github.com/spf13/pflag/int_slice.go
+++ b/vendor/github.com/spf13/pflag/int_slice.go
@ -51,6 +51,36 @@ func (s *intSliceValue) String() string {
 	return "[" + strings.Join(out, ",") + "]"
 }

+func (s *intSliceValue) Append(val string) error {
+	i, err := strconv.Atoi(val)
+	if err != nil {
+		return err
+	}
+	*s.value = append(*s.value, i)
+	return nil
+}
+
+func (s *intSliceValue) Replace(val []string) error {
+	out := make([]int, len(val))
+	for i, d := range val {
+		var err error
+		out[i], err = strconv.Atoi(d)
+		if err != nil {
+			return err
+		}
+	}
+	*s.value = out
+	return nil
+}
+
+func (s *intSliceValue) GetSlice() []string {
+	out := make([]string, len(*s.value))
+	for i, d := range *s.value {
+		out[i] = strconv.Itoa(d)
+	}
+	return out
+}
+
 func intSliceConv(val string) (interface{}, error) {
 	val = strings.Trim(val, "[]")
 	// Empty string would cause a slice with one (empty) entry
--- a/vendor/github.com/spf13/pflag/ip_slice.go
+++ b/vendor/github.com/spf13/pflag/ip_slice.go
@ -72,9 +72,47 @@ func (s *ipSliceValue) String() string {
 	return "[" + out + "]"
 }

+func (s *ipSliceValue) fromString(val string) (net.IP, error) {
+	return net.ParseIP(strings.TrimSpace(val)), nil
+}
+
+func (s *ipSliceValue) toString(val net.IP) string {
+	return val.String()
+}
+
+func (s *ipSliceValue) Append(val string) error {
+	i, err := s.fromString(val)
+	if err != nil {
+		return err
+	}
+	*s.value = append(*s.value, i)
+	return nil
+}
+
+func (s *ipSliceValue) Replace(val []string) error {
+	out := make([]net.IP, len(val))
+	for i, d := range val {
+		var err error
+		out[i], err = s.fromString(d)
+		if err != nil {
+			return err
+		}
+	}
+	*s.value = out
+	return nil
+}
+
+func (s *ipSliceValue) GetSlice() []string {
+	out := make([]string, len(*s.value))
+	for i, d := range *s.value {
+		out[i] = s.toString(d)
+	}
+	return out
+}
+
 func ipSliceConv(val string) (interface{}, error) {
 	val = strings.Trim(val, "[]")
-	// Emtpy string would cause a slice with one (empty) entry
+	// Empty string would cause a slice with one (empty) entry
 	if len(val) == 0 {
 		return []net.IP{}, nil
 	}
--- a/vendor/github.com/spf13/pflag/string_array.go
+++ b/vendor/github.com/spf13/pflag/string_array.go
@ -23,6 +23,32 @@ func (s *stringArrayValue) Set(val string) error {
 	return nil
 }

+func (s *stringArrayValue) Append(val string) error {
+	*s.value = append(*s.value, val)
+	return nil
+}
+
+func (s *stringArrayValue) Replace(val []string) error {
+	out := make([]string, len(val))
+	for i, d := range val {
+		var err error
+		out[i] = d
+		if err != nil {
+			return err
+		}
+	}
+	*s.value = out
+	return nil
+}
+
+func (s *stringArrayValue) GetSlice() []string {
+	out := make([]string, len(*s.value))
+	for i, d := range *s.value {
+		out[i] = d
+	}
+	return out
+}
+
 func (s *stringArrayValue) Type() string {
 	return "stringArray"
 }
--- a/vendor/github.com/spf13/pflag/string_slice.go
+++ b/vendor/github.com/spf13/pflag/string_slice.go
@ -62,6 +62,20 @@ func (s *stringSliceValue) String() string {
 	return "[" + str + "]"
 }

+func (s *stringSliceValue) Append(val string) error {
+	*s.value = append(*s.value, val)
+	return nil
+}
+
+func (s *stringSliceValue) Replace(val []string) error {
+	*s.value = val
+	return nil
+}
+
+func (s *stringSliceValue) GetSlice() []string {
+	return *s.value
+}
+
 func stringSliceConv(sval string) (interface{}, error) {
 	sval = sval[1 : len(sval)-1]
 	// An empty string would cause a slice with one (empty) string
@ -84,7 +98,7 @@ func (f *FlagSet) GetStringSlice(name string) ([]string, error) {
 // The argument p points to a []string variable in which to store the value of the flag.
 // Compared to StringArray flags, StringSlice flags take comma-separated value as arguments and split them accordingly.
 // For example:
-//   --ss="v1,v2" -ss="v3"
+//   --ss="v1,v2" --ss="v3"
 // will result in
 //   []string{"v1", "v2", "v3"}
 func (f *FlagSet) StringSliceVar(p *[]string, name string, value []string, usage string) {
@ -100,7 +114,7 @@ func (f *FlagSet) StringSliceVarP(p *[]string, name, shorthand string, value []s
 // The argument p points to a []string variable in which to store the value of the flag.
 // Compared to StringArray flags, StringSlice flags take comma-separated value as arguments and split them accordingly.
 // For example:
-//   --ss="v1,v2" -ss="v3"
+//   --ss="v1,v2" --ss="v3"
 // will result in
 //   []string{"v1", "v2", "v3"}
 func StringSliceVar(p *[]string, name string, value []string, usage string) {
@ -116,7 +130,7 @@ func StringSliceVarP(p *[]string, name, shorthand string, value []string, usage
 // The return value is the address of a []string variable that stores the value of the flag.
 // Compared to StringArray flags, StringSlice flags take comma-separated value as arguments and split them accordingly.
 // For example:
-//   --ss="v1,v2" -ss="v3"
+//   --ss="v1,v2" --ss="v3"
 // will result in
 //   []string{"v1", "v2", "v3"}
 func (f *FlagSet) StringSlice(name string, value []string, usage string) *[]string {
@ -136,7 +150,7 @@ func (f *FlagSet) StringSliceP(name, shorthand string, value []string, usage str
 // The return value is the address of a []string variable that stores the value of the flag.
 // Compared to StringArray flags, StringSlice flags take comma-separated value as arguments and split them accordingly.
 // For example:
-//   --ss="v1,v2" -ss="v3"
+//   --ss="v1,v2" --ss="v3"
 // will result in
 //   []string{"v1", "v2", "v3"}
 func StringSlice(name string, value []string, usage string) *[]string {
--- a/vendor/github.com/spf13/pflag/string_to_int64.go
+++ b/vendor/github.com/spf13/pflag/string_to_int64.go
@ -0,0 +1,149 @@
+package pflag
+
+import (
+	"bytes"
+	"fmt"
+	"strconv"
+	"strings"
+)
+
+// -- stringToInt64 Value
+type stringToInt64Value struct {
+	value   *map[string]int64
+	changed bool
+}
+
+func newStringToInt64Value(val map[string]int64, p *map[string]int64) *stringToInt64Value {
+	ssv := new(stringToInt64Value)
+	ssv.value = p
+	*ssv.value = val
+	return ssv
+}
+
+// Format: a=1,b=2
+func (s *stringToInt64Value) Set(val string) error {
+	ss := strings.Split(val, ",")
+	out := make(map[string]int64, len(ss))
+	for _, pair := range ss {
+		kv := strings.SplitN(pair, "=", 2)
+		if len(kv) != 2 {
+			return fmt.Errorf("%s must be formatted as key=value", pair)
+		}
+		var err error
+		out[kv[0]], err = strconv.ParseInt(kv[1], 10, 64)
+		if err != nil {
+			return err
+		}
+	}
+	if !s.changed {
+		*s.value = out
+	} else {
+		for k, v := range out {
+			(*s.value)[k] = v
+		}
+	}
+	s.changed = true
+	return nil
+}
+
+func (s *stringToInt64Value) Type() string {
+	return "stringToInt64"
+}
+
+func (s *stringToInt64Value) String() string {
+	var buf bytes.Buffer
+	i := 0
+	for k, v := range *s.value {
+		if i > 0 {
+			buf.WriteRune(',')
+		}
+		buf.WriteString(k)
+		buf.WriteRune('=')
+		buf.WriteString(strconv.FormatInt(v, 10))
+		i++
+	}
+	return "[" + buf.String() + "]"
+}
+
+func stringToInt64Conv(val string) (interface{}, error) {
+	val = strings.Trim(val, "[]")
+	// An empty string would cause an empty map
+	if len(val) == 0 {
+		return map[string]int64{}, nil
+	}
+	ss := strings.Split(val, ",")
+	out := make(map[string]int64, len(ss))
+	for _, pair := range ss {
+		kv := strings.SplitN(pair, "=", 2)
+		if len(kv) != 2 {
+			return nil, fmt.Errorf("%s must be formatted as key=value", pair)
+		}
+		var err error
+		out[kv[0]], err = strconv.ParseInt(kv[1], 10, 64)
+		if err != nil {
+			return nil, err
+		}
+	}
+	return out, nil
+}
+
+// GetStringToInt64 return the map[string]int64 value of a flag with the given name
+func (f *FlagSet) GetStringToInt64(name string) (map[string]int64, error) {
+	val, err := f.getFlagType(name, "stringToInt64", stringToInt64Conv)
+	if err != nil {
+		return map[string]int64{}, err
+	}
+	return val.(map[string]int64), nil
+}
+
+// StringToInt64Var defines a string flag with specified name, default value, and usage string.
+// The argument p point64s to a map[string]int64 variable in which to store the values of the multiple flags.
+// The value of each argument will not try to be separated by comma
+func (f *FlagSet) StringToInt64Var(p *map[string]int64, name string, value map[string]int64, usage string) {
+	f.VarP(newStringToInt64Value(value, p), name, "", usage)
+}
+
+// StringToInt64VarP is like StringToInt64Var, but accepts a shorthand letter that can be used after a single dash.
+func (f *FlagSet) StringToInt64VarP(p *map[string]int64, name, shorthand string, value map[string]int64, usage string) {
+	f.VarP(newStringToInt64Value(value, p), name, shorthand, usage)
+}
+
+// StringToInt64Var defines a string flag with specified name, default value, and usage string.
+// The argument p point64s to a map[string]int64 variable in which to store the value of the flag.
+// The value of each argument will not try to be separated by comma
+func StringToInt64Var(p *map[string]int64, name string, value map[string]int64, usage string) {
+	CommandLine.VarP(newStringToInt64Value(value, p), name, "", usage)
+}
+
+// StringToInt64VarP is like StringToInt64Var, but accepts a shorthand letter that can be used after a single dash.
+func StringToInt64VarP(p *map[string]int64, name, shorthand string, value map[string]int64, usage string) {
+	CommandLine.VarP(newStringToInt64Value(value, p), name, shorthand, usage)
+}
+
+// StringToInt64 defines a string flag with specified name, default value, and usage string.
+// The return value is the address of a map[string]int64 variable that stores the value of the flag.
+// The value of each argument will not try to be separated by comma
+func (f *FlagSet) StringToInt64(name string, value map[string]int64, usage string) *map[string]int64 {
+	p := map[string]int64{}
+	f.StringToInt64VarP(&p, name, "", value, usage)
+	return &p
+}
+
+// StringToInt64P is like StringToInt64, but accepts a shorthand letter that can be used after a single dash.
+func (f *FlagSet) StringToInt64P(name, shorthand string, value map[string]int64, usage string) *map[string]int64 {
+	p := map[string]int64{}
+	f.StringToInt64VarP(&p, name, shorthand, value, usage)
+	return &p
+}
+
+// StringToInt64 defines a string flag with specified name, default value, and usage string.
+// The return value is the address of a map[string]int64 variable that stores the value of the flag.
+// The value of each argument will not try to be separated by comma
+func StringToInt64(name string, value map[string]int64, usage string) *map[string]int64 {
+	return CommandLine.StringToInt64P(name, "", value, usage)
+}
+
+// StringToInt64P is like StringToInt64, but accepts a shorthand letter that can be used after a single dash.
+func StringToInt64P(name, shorthand string, value map[string]int64, usage string) *map[string]int64 {
+	return CommandLine.StringToInt64P(name, shorthand, value, usage)
+}
--- a/vendor/github.com/spf13/pflag/uint_slice.go
+++ b/vendor/github.com/spf13/pflag/uint_slice.go
@ -50,6 +50,48 @@ func (s *uintSliceValue) String() string {
 	return "[" + strings.Join(out, ",") + "]"
 }

+func (s *uintSliceValue) fromString(val string) (uint, error) {
+	t, err := strconv.ParseUint(val, 10, 0)
+	if err != nil {
+		return 0, err
+	}
+	return uint(t), nil
+}
+
+func (s *uintSliceValue) toString(val uint) string {
+	return fmt.Sprintf("%d", val)
+}
+
+func (s *uintSliceValue) Append(val string) error {
+	i, err := s.fromString(val)
+	if err != nil {
+		return err
+	}
+	*s.value = append(*s.value, i)
+	return nil
+}
+
+func (s *uintSliceValue) Replace(val []string) error {
+	out := make([]uint, len(val))
+	for i, d := range val {
+		var err error
+		out[i], err = s.fromString(d)
+		if err != nil {
+			return err
+		}
+	}
+	*s.value = out
+	return nil
+}
+
+func (s *uintSliceValue) GetSlice() []string {
+	out := make([]string, len(*s.value))
+	for i, d := range *s.value {
+		out[i] = s.toString(d)
+	}
+	return out
+}
+
 func uintSliceConv(val string) (interface{}, error) {
 	val = strings.Trim(val, "[]")
 	// Empty string would cause a slice with one (empty) entry
--- a/vendor/golang.org/x/crypto/blowfish/block.go
+++ b/vendor/golang.org/x/crypto/blowfish/block.go
@ -0,0 +1,159 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package blowfish
+
+// getNextWord returns the next big-endian uint32 value from the byte slice
+// at the given position in a circular manner, updating the position.
+func getNextWord(b []byte, pos *int) uint32 {
+	var w uint32
+	j := *pos
+	for i := 0; i < 4; i++ {
+		w = w<<8 | uint32(b[j])
+		j++
+		if j >= len(b) {
+			j = 0
+		}
+	}
+	*pos = j
+	return w
+}
+
+// ExpandKey performs a key expansion on the given *Cipher. Specifically, it
+// performs the Blowfish algorithm's key schedule which sets up the *Cipher's
+// pi and substitution tables for calls to Encrypt. This is used, primarily,
+// by the bcrypt package to reuse the Blowfish key schedule during its
+// set up. It's unlikely that you need to use this directly.
+func ExpandKey(key []byte, c *Cipher) {
+	j := 0
+	for i := 0; i < 18; i++ {
+		// Using inlined getNextWord for performance.
+		var d uint32
+		for k := 0; k < 4; k++ {
+			d = d<<8 | uint32(key[j])
+			j++
+			if j >= len(key) {
+				j = 0
+			}
+		}
+		c.p[i] ^= d
+	}
+
+	var l, r uint32
+	for i := 0; i < 18; i += 2 {
+		l, r = encryptBlock(l, r, c)
+		c.p[i], c.p[i+1] = l, r
+	}
+
+	for i := 0; i < 256; i += 2 {
+		l, r = encryptBlock(l, r, c)
+		c.s0[i], c.s0[i+1] = l, r
+	}
+	for i := 0; i < 256; i += 2 {
+		l, r = encryptBlock(l, r, c)
+		c.s1[i], c.s1[i+1] = l, r
+	}
+	for i := 0; i < 256; i += 2 {
+		l, r = encryptBlock(l, r, c)
+		c.s2[i], c.s2[i+1] = l, r
+	}
+	for i := 0; i < 256; i += 2 {
+		l, r = encryptBlock(l, r, c)
+		c.s3[i], c.s3[i+1] = l, r
+	}
+}
+
+// This is similar to ExpandKey, but folds the salt during the key
+// schedule. While ExpandKey is essentially expandKeyWithSalt with an all-zero
+// salt passed in, reusing ExpandKey turns out to be a place of inefficiency
+// and specializing it here is useful.
+func expandKeyWithSalt(key []byte, salt []byte, c *Cipher) {
+	j := 0
+	for i := 0; i < 18; i++ {
+		c.p[i] ^= getNextWord(key, &j)
+	}
+
+	j = 0
+	var l, r uint32
+	for i := 0; i < 18; i += 2 {
+		l ^= getNextWord(salt, &j)
+		r ^= getNextWord(salt, &j)
+		l, r = encryptBlock(l, r, c)
+		c.p[i], c.p[i+1] = l, r
+	}
+
+	for i := 0; i < 256; i += 2 {
+		l ^= getNextWord(salt, &j)
+		r ^= getNextWord(salt, &j)
+		l, r = encryptBlock(l, r, c)
+		c.s0[i], c.s0[i+1] = l, r
+	}
+
+	for i := 0; i < 256; i += 2 {
+		l ^= getNextWord(salt, &j)
+		r ^= getNextWord(salt, &j)
+		l, r = encryptBlock(l, r, c)
+		c.s1[i], c.s1[i+1] = l, r
+	}
+
+	for i := 0; i < 256; i += 2 {
+		l ^= getNextWord(salt, &j)
+		r ^= getNextWord(salt, &j)
+		l, r = encryptBlock(l, r, c)
+		c.s2[i], c.s2[i+1] = l, r
+	}
+
+	for i := 0; i < 256; i += 2 {
+		l ^= getNextWord(salt, &j)
+		r ^= getNextWord(salt, &j)
+		l, r = encryptBlock(l, r, c)
+		c.s3[i], c.s3[i+1] = l, r
+	}
+}
+
+func encryptBlock(l, r uint32, c *Cipher) (uint32, uint32) {
+	xl, xr := l, r
+	xl ^= c.p[0]
+	xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[1]
+	xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[2]
+	xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[3]
+	xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[4]
+	xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[5]
+	xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[6]
+	xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[7]
+	xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[8]
+	xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[9]
+	xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[10]
+	xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[11]
+	xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[12]
+	xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[13]
+	xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[14]
+	xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[15]
+	xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[16]
+	xr ^= c.p[17]
+	return xr, xl
+}
+
+func decryptBlock(l, r uint32, c *Cipher) (uint32, uint32) {
+	xl, xr := l, r
+	xl ^= c.p[17]
+	xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[16]
+	xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[15]
+	xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[14]
+	xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[13]
+	xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[12]
+	xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[11]
+	xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[10]
+	xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[9]
+	xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[8]
+	xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[7]
+	xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[6]
+	xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[5]
+	xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[4]
+	xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[3]
+	xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[2]
+	xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[1]
+	xr ^= c.p[0]
+	return xr, xl
+}
--- a/vendor/golang.org/x/crypto/blowfish/cipher.go
+++ b/vendor/golang.org/x/crypto/blowfish/cipher.go
@ -0,0 +1,99 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package blowfish implements Bruce Schneier's Blowfish encryption algorithm.
+//
+// Blowfish is a legacy cipher and its short block size makes it vulnerable to
+// birthday bound attacks (see https://sweet32.info). It should only be used
+// where compatibility with legacy systems, not security, is the goal.
+//
+// Deprecated: any new system should use AES (from crypto/aes, if necessary in
+// an AEAD mode like crypto/cipher.NewGCM) or XChaCha20-Poly1305 (from
+// golang.org/x/crypto/chacha20poly1305).
+package blowfish // import "golang.org/x/crypto/blowfish"
+
+// The code is a port of Bruce Schneier's C implementation.
+// See https://www.schneier.com/blowfish.html.
+
+import "strconv"
+
+// The Blowfish block size in bytes.
+const BlockSize = 8
+
+// A Cipher is an instance of Blowfish encryption using a particular key.
+type Cipher struct {
+	p              [18]uint32
+	s0, s1, s2, s3 [256]uint32
+}
+
+type KeySizeError int
+
+func (k KeySizeError) Error() string {
+	return "crypto/blowfish: invalid key size " + strconv.Itoa(int(k))
+}
+
+// NewCipher creates and returns a Cipher.
+// The key argument should be the Blowfish key, from 1 to 56 bytes.
+func NewCipher(key []byte) (*Cipher, error) {
+	var result Cipher
+	if k := len(key); k < 1 || k > 56 {
+		return nil, KeySizeError(k)
+	}
+	initCipher(&result)
+	ExpandKey(key, &result)
+	return &result, nil
+}
+
+// NewSaltedCipher creates a returns a Cipher that folds a salt into its key
+// schedule. For most purposes, NewCipher, instead of NewSaltedCipher, is
+// sufficient and desirable. For bcrypt compatibility, the key can be over 56
+// bytes.
+func NewSaltedCipher(key, salt []byte) (*Cipher, error) {
+	if len(salt) == 0 {
+		return NewCipher(key)
+	}
+	var result Cipher
+	if k := len(key); k < 1 {
+		return nil, KeySizeError(k)
+	}
+	initCipher(&result)
+	expandKeyWithSalt(key, salt, &result)
+	return &result, nil
+}
+
+// BlockSize returns the Blowfish block size, 8 bytes.
+// It is necessary to satisfy the Block interface in the
+// package "crypto/cipher".
+func (c *Cipher) BlockSize() int { return BlockSize }
+
+// Encrypt encrypts the 8-byte buffer src using the key k
+// and stores the result in dst.
+// Note that for amounts of data larger than a block,
+// it is not safe to just call Encrypt on successive blocks;
+// instead, use an encryption mode like CBC (see crypto/cipher/cbc.go).
+func (c *Cipher) Encrypt(dst, src []byte) {
+	l := uint32(src[0])<<24 | uint32(src[1])<<16 | uint32(src[2])<<8 | uint32(src[3])
+	r := uint32(src[4])<<24 | uint32(src[5])<<16 | uint32(src[6])<<8 | uint32(src[7])
+	l, r = encryptBlock(l, r, c)
+	dst[0], dst[1], dst[2], dst[3] = byte(l>>24), byte(l>>16), byte(l>>8), byte(l)
+	dst[4], dst[5], dst[6], dst[7] = byte(r>>24), byte(r>>16), byte(r>>8), byte(r)
+}
+
+// Decrypt decrypts the 8-byte buffer src using the key k
+// and stores the result in dst.
+func (c *Cipher) Decrypt(dst, src []byte) {
+	l := uint32(src[0])<<24 | uint32(src[1])<<16 | uint32(src[2])<<8 | uint32(src[3])
+	r := uint32(src[4])<<24 | uint32(src[5])<<16 | uint32(src[6])<<8 | uint32(src[7])
+	l, r = decryptBlock(l, r, c)
+	dst[0], dst[1], dst[2], dst[3] = byte(l>>24), byte(l>>16), byte(l>>8), byte(l)
+	dst[4], dst[5], dst[6], dst[7] = byte(r>>24), byte(r>>16), byte(r>>8), byte(r)
+}
+
+func initCipher(c *Cipher) {
+	copy(c.p[0:], p[0:])
+	copy(c.s0[0:], s0[0:])
+	copy(c.s1[0:], s1[0:])
+	copy(c.s2[0:], s2[0:])
+	copy(c.s3[0:], s3[0:])
+}
--- a/vendor/golang.org/x/crypto/blowfish/const.go
+++ b/vendor/golang.org/x/crypto/blowfish/const.go
@ -0,0 +1,199 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The startup permutation array and substitution boxes.
+// They are the hexadecimal digits of PI; see:
+// https://www.schneier.com/code/constants.txt.
+
+package blowfish
+
+var s0 = [256]uint32{
+	0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7, 0xb8e1afed, 0x6a267e96,
+	0xba7c9045, 0xf12c7f99, 0x24a19947, 0xb3916cf7, 0x0801f2e2, 0x858efc16,
+	0x636920d8, 0x71574e69, 0xa458fea3, 0xf4933d7e, 0x0d95748f, 0x728eb658,
+	0x718bcd58, 0x82154aee, 0x7b54a41d, 0xc25a59b5, 0x9c30d539, 0x2af26013,
+	0xc5d1b023, 0x286085f0, 0xca417918, 0xb8db38ef, 0x8e79dcb0, 0x603a180e,
+	0x6c9e0e8b, 0xb01e8a3e, 0xd71577c1, 0xbd314b27, 0x78af2fda, 0x55605c60,
+	0xe65525f3, 0xaa55ab94, 0x57489862, 0x63e81440, 0x55ca396a, 0x2aab10b6,
+	0xb4cc5c34, 0x1141e8ce, 0xa15486af, 0x7c72e993, 0xb3ee1411, 0x636fbc2a,
+	0x2ba9c55d, 0x741831f6, 0xce5c3e16, 0x9b87931e, 0xafd6ba33, 0x6c24cf5c,
+	0x7a325381, 0x28958677, 0x3b8f4898, 0x6b4bb9af, 0xc4bfe81b, 0x66282193,
+	0x61d809cc, 0xfb21a991, 0x487cac60, 0x5dec8032, 0xef845d5d, 0xe98575b1,
+	0xdc262302, 0xeb651b88, 0x23893e81, 0xd396acc5, 0x0f6d6ff3, 0x83f44239,
+	0x2e0b4482, 0xa4842004, 0x69c8f04a, 0x9e1f9b5e, 0x21c66842, 0xf6e96c9a,
+	0x670c9c61, 0xabd388f0, 0x6a51a0d2, 0xd8542f68, 0x960fa728, 0xab5133a3,
+	0x6eef0b6c, 0x137a3be4, 0xba3bf050, 0x7efb2a98, 0xa1f1651d, 0x39af0176,
+	0x66ca593e, 0x82430e88, 0x8cee8619, 0x456f9fb4, 0x7d84a5c3, 0x3b8b5ebe,
+	0xe06f75d8, 0x85c12073, 0x401a449f, 0x56c16aa6, 0x4ed3aa62, 0x363f7706,
+	0x1bfedf72, 0x429b023d, 0x37d0d724, 0xd00a1248, 0xdb0fead3, 0x49f1c09b,
+	0x075372c9, 0x80991b7b, 0x25d479d8, 0xf6e8def7, 0xe3fe501a, 0xb6794c3b,
+	0x976ce0bd, 0x04c006ba, 0xc1a94fb6, 0x409f60c4, 0x5e5c9ec2, 0x196a2463,
+	0x68fb6faf, 0x3e6c53b5, 0x1339b2eb, 0x3b52ec6f, 0x6dfc511f, 0x9b30952c,
+	0xcc814544, 0xaf5ebd09, 0xbee3d004, 0xde334afd, 0x660f2807, 0x192e4bb3,
+	0xc0cba857, 0x45c8740f, 0xd20b5f39, 0xb9d3fbdb, 0x5579c0bd, 0x1a60320a,
+	0xd6a100c6, 0x402c7279, 0x679f25fe, 0xfb1fa3cc, 0x8ea5e9f8, 0xdb3222f8,
+	0x3c7516df, 0xfd616b15, 0x2f501ec8, 0xad0552ab, 0x323db5fa, 0xfd238760,
+	0x53317b48, 0x3e00df82, 0x9e5c57bb, 0xca6f8ca0, 0x1a87562e, 0xdf1769db,
+	0xd542a8f6, 0x287effc3, 0xac6732c6, 0x8c4f5573, 0x695b27b0, 0xbbca58c8,
+	0xe1ffa35d, 0xb8f011a0, 0x10fa3d98, 0xfd2183b8, 0x4afcb56c, 0x2dd1d35b,
+	0x9a53e479, 0xb6f84565, 0xd28e49bc, 0x4bfb9790, 0xe1ddf2da, 0xa4cb7e33,
+	0x62fb1341, 0xcee4c6e8, 0xef20cada, 0x36774c01, 0xd07e9efe, 0x2bf11fb4,
+	0x95dbda4d, 0xae909198, 0xeaad8e71, 0x6b93d5a0, 0xd08ed1d0, 0xafc725e0,
+	0x8e3c5b2f, 0x8e7594b7, 0x8ff6e2fb, 0xf2122b64, 0x8888b812, 0x900df01c,
+	0x4fad5ea0, 0x688fc31c, 0xd1cff191, 0xb3a8c1ad, 0x2f2f2218, 0xbe0e1777,
+	0xea752dfe, 0x8b021fa1, 0xe5a0cc0f, 0xb56f74e8, 0x18acf3d6, 0xce89e299,
+	0xb4a84fe0, 0xfd13e0b7, 0x7cc43b81, 0xd2ada8d9, 0x165fa266, 0x80957705,
+	0x93cc7314, 0x211a1477, 0xe6ad2065, 0x77b5fa86, 0xc75442f5, 0xfb9d35cf,
+	0xebcdaf0c, 0x7b3e89a0, 0xd6411bd3, 0xae1e7e49, 0x00250e2d, 0x2071b35e,
+	0x226800bb, 0x57b8e0af, 0x2464369b, 0xf009b91e, 0x5563911d, 0x59dfa6aa,
+	0x78c14389, 0xd95a537f, 0x207d5ba2, 0x02e5b9c5, 0x83260376, 0x6295cfa9,
+	0x11c81968, 0x4e734a41, 0xb3472dca, 0x7b14a94a, 0x1b510052, 0x9a532915,
+	0xd60f573f, 0xbc9bc6e4, 0x2b60a476, 0x81e67400, 0x08ba6fb5, 0x571be91f,
+	0xf296ec6b, 0x2a0dd915, 0xb6636521, 0xe7b9f9b6, 0xff34052e, 0xc5855664,
+	0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a,
+}
+
+var s1 = [256]uint32{
+	0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623, 0xad6ea6b0, 0x49a7df7d,
+	0x9cee60b8, 0x8fedb266, 0xecaa8c71, 0x699a17ff, 0x5664526c, 0xc2b19ee1,
+	0x193602a5, 0x75094c29, 0xa0591340, 0xe4183a3e, 0x3f54989a, 0x5b429d65,
+	0x6b8fe4d6, 0x99f73fd6, 0xa1d29c07, 0xefe830f5, 0x4d2d38e6, 0xf0255dc1,
+	0x4cdd2086, 0x8470eb26, 0x6382e9c6, 0x021ecc5e, 0x09686b3f, 0x3ebaefc9,
+	0x3c971814, 0x6b6a70a1, 0x687f3584, 0x52a0e286, 0xb79c5305, 0xaa500737,
+	0x3e07841c, 0x7fdeae5c, 0x8e7d44ec, 0x5716f2b8, 0xb03ada37, 0xf0500c0d,
+	0xf01c1f04, 0x0200b3ff, 0xae0cf51a, 0x3cb574b2, 0x25837a58, 0xdc0921bd,
+	0xd19113f9, 0x7ca92ff6, 0x94324773, 0x22f54701, 0x3ae5e581, 0x37c2dadc,
+	0xc8b57634, 0x9af3dda7, 0xa9446146, 0x0fd0030e, 0xecc8c73e, 0xa4751e41,
+	0xe238cd99, 0x3bea0e2f, 0x3280bba1, 0x183eb331, 0x4e548b38, 0x4f6db908,
+	0x6f420d03, 0xf60a04bf, 0x2cb81290, 0x24977c79, 0x5679b072, 0xbcaf89af,
+	0xde9a771f, 0xd9930810, 0xb38bae12, 0xdccf3f2e, 0x5512721f, 0x2e6b7124,
+	0x501adde6, 0x9f84cd87, 0x7a584718, 0x7408da17, 0xbc9f9abc, 0xe94b7d8c,
+	0xec7aec3a, 0xdb851dfa, 0x63094366, 0xc464c3d2, 0xef1c1847, 0x3215d908,
+	0xdd433b37, 0x24c2ba16, 0x12a14d43, 0x2a65c451, 0x50940002, 0x133ae4dd,
+	0x71dff89e, 0x10314e55, 0x81ac77d6, 0x5f11199b, 0x043556f1, 0xd7a3c76b,
+	0x3c11183b, 0x5924a509, 0xf28fe6ed, 0x97f1fbfa, 0x9ebabf2c, 0x1e153c6e,
+	0x86e34570, 0xeae96fb1, 0x860e5e0a, 0x5a3e2ab3, 0x771fe71c, 0x4e3d06fa,
+	0x2965dcb9, 0x99e71d0f, 0x803e89d6, 0x5266c825, 0x2e4cc978, 0x9c10b36a,
+	0xc6150eba, 0x94e2ea78, 0xa5fc3c53, 0x1e0a2df4, 0xf2f74ea7, 0x361d2b3d,
+	0x1939260f, 0x19c27960, 0x5223a708, 0xf71312b6, 0xebadfe6e, 0xeac31f66,
+	0xe3bc4595, 0xa67bc883, 0xb17f37d1, 0x018cff28, 0xc332ddef, 0xbe6c5aa5,
+	0x65582185, 0x68ab9802, 0xeecea50f, 0xdb2f953b, 0x2aef7dad, 0x5b6e2f84,
+	0x1521b628, 0x29076170, 0xecdd4775, 0x619f1510, 0x13cca830, 0xeb61bd96,
+	0x0334fe1e, 0xaa0363cf, 0xb5735c90, 0x4c70a239, 0xd59e9e0b, 0xcbaade14,
+	0xeecc86bc, 0x60622ca7, 0x9cab5cab, 0xb2f3846e, 0x648b1eaf, 0x19bdf0ca,
+	0xa02369b9, 0x655abb50, 0x40685a32, 0x3c2ab4b3, 0x319ee9d5, 0xc021b8f7,
+	0x9b540b19, 0x875fa099, 0x95f7997e, 0x623d7da8, 0xf837889a, 0x97e32d77,
+	0x11ed935f, 0x16681281, 0x0e358829, 0xc7e61fd6, 0x96dedfa1, 0x7858ba99,
+	0x57f584a5, 0x1b227263, 0x9b83c3ff, 0x1ac24696, 0xcdb30aeb, 0x532e3054,
+	0x8fd948e4, 0x6dbc3128, 0x58ebf2ef, 0x34c6ffea, 0xfe28ed61, 0xee7c3c73,
+	0x5d4a14d9, 0xe864b7e3, 0x42105d14, 0x203e13e0, 0x45eee2b6, 0xa3aaabea,
+	0xdb6c4f15, 0xfacb4fd0, 0xc742f442, 0xef6abbb5, 0x654f3b1d, 0x41cd2105,
+	0xd81e799e, 0x86854dc7, 0xe44b476a, 0x3d816250, 0xcf62a1f2, 0x5b8d2646,
+	0xfc8883a0, 0xc1c7b6a3, 0x7f1524c3, 0x69cb7492, 0x47848a0b, 0x5692b285,
+	0x095bbf00, 0xad19489d, 0x1462b174, 0x23820e00, 0x58428d2a, 0x0c55f5ea,
+	0x1dadf43e, 0x233f7061, 0x3372f092, 0x8d937e41, 0xd65fecf1, 0x6c223bdb,
+	0x7cde3759, 0xcbee7460, 0x4085f2a7, 0xce77326e, 0xa6078084, 0x19f8509e,
+	0xe8efd855, 0x61d99735, 0xa969a7aa, 0xc50c06c2, 0x5a04abfc, 0x800bcadc,
+	0x9e447a2e, 0xc3453484, 0xfdd56705, 0x0e1e9ec9, 0xdb73dbd3, 0x105588cd,
+	0x675fda79, 0xe3674340, 0xc5c43465, 0x713e38d8, 0x3d28f89e, 0xf16dff20,
+	0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7,
+}
+
+var s2 = [256]uint32{
+	0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934, 0x411520f7, 0x7602d4f7,
+	0xbcf46b2e, 0xd4a20068, 0xd4082471, 0x3320f46a, 0x43b7d4b7, 0x500061af,
+	0x1e39f62e, 0x97244546, 0x14214f74, 0xbf8b8840, 0x4d95fc1d, 0x96b591af,
+	0x70f4ddd3, 0x66a02f45, 0xbfbc09ec, 0x03bd9785, 0x7fac6dd0, 0x31cb8504,
+	0x96eb27b3, 0x55fd3941, 0xda2547e6, 0xabca0a9a, 0x28507825, 0x530429f4,
+	0x0a2c86da, 0xe9b66dfb, 0x68dc1462, 0xd7486900, 0x680ec0a4, 0x27a18dee,
+	0x4f3ffea2, 0xe887ad8c, 0xb58ce006, 0x7af4d6b6, 0xaace1e7c, 0xd3375fec,
+	0xce78a399, 0x406b2a42, 0x20fe9e35, 0xd9f385b9, 0xee39d7ab, 0x3b124e8b,
+	0x1dc9faf7, 0x4b6d1856, 0x26a36631, 0xeae397b2, 0x3a6efa74, 0xdd5b4332,
+	0x6841e7f7, 0xca7820fb, 0xfb0af54e, 0xd8feb397, 0x454056ac, 0xba489527,
+	0x55533a3a, 0x20838d87, 0xfe6ba9b7, 0xd096954b, 0x55a867bc, 0xa1159a58,
+	0xcca92963, 0x99e1db33, 0xa62a4a56, 0x3f3125f9, 0x5ef47e1c, 0x9029317c,
+	0xfdf8e802, 0x04272f70, 0x80bb155c, 0x05282ce3, 0x95c11548, 0xe4c66d22,
+	0x48c1133f, 0xc70f86dc, 0x07f9c9ee, 0x41041f0f, 0x404779a4, 0x5d886e17,
+	0x325f51eb, 0xd59bc0d1, 0xf2bcc18f, 0x41113564, 0x257b7834, 0x602a9c60,
+	0xdff8e8a3, 0x1f636c1b, 0x0e12b4c2, 0x02e1329e, 0xaf664fd1, 0xcad18115,
+	0x6b2395e0, 0x333e92e1, 0x3b240b62, 0xeebeb922, 0x85b2a20e, 0xe6ba0d99,
+	0xde720c8c, 0x2da2f728, 0xd0127845, 0x95b794fd, 0x647d0862, 0xe7ccf5f0,
+	0x5449a36f, 0x877d48fa, 0xc39dfd27, 0xf33e8d1e, 0x0a476341, 0x992eff74,
+	0x3a6f6eab, 0xf4f8fd37, 0xa812dc60, 0xa1ebddf8, 0x991be14c, 0xdb6e6b0d,
+	0xc67b5510, 0x6d672c37, 0x2765d43b, 0xdcd0e804, 0xf1290dc7, 0xcc00ffa3,
+	0xb5390f92, 0x690fed0b, 0x667b9ffb, 0xcedb7d9c, 0xa091cf0b, 0xd9155ea3,
+	0xbb132f88, 0x515bad24, 0x7b9479bf, 0x763bd6eb, 0x37392eb3, 0xcc115979,
+	0x8026e297, 0xf42e312d, 0x6842ada7, 0xc66a2b3b, 0x12754ccc, 0x782ef11c,
+	0x6a124237, 0xb79251e7, 0x06a1bbe6, 0x4bfb6350, 0x1a6b1018, 0x11caedfa,
+	0x3d25bdd8, 0xe2e1c3c9, 0x44421659, 0x0a121386, 0xd90cec6e, 0xd5abea2a,
+	0x64af674e, 0xda86a85f, 0xbebfe988, 0x64e4c3fe, 0x9dbc8057, 0xf0f7c086,
+	0x60787bf8, 0x6003604d, 0xd1fd8346, 0xf6381fb0, 0x7745ae04, 0xd736fccc,
+	0x83426b33, 0xf01eab71, 0xb0804187, 0x3c005e5f, 0x77a057be, 0xbde8ae24,
+	0x55464299, 0xbf582e61, 0x4e58f48f, 0xf2ddfda2, 0xf474ef38, 0x8789bdc2,
+	0x5366f9c3, 0xc8b38e74, 0xb475f255, 0x46fcd9b9, 0x7aeb2661, 0x8b1ddf84,
+	0x846a0e79, 0x915f95e2, 0x466e598e, 0x20b45770, 0x8cd55591, 0xc902de4c,
+	0xb90bace1, 0xbb8205d0, 0x11a86248, 0x7574a99e, 0xb77f19b6, 0xe0a9dc09,
+	0x662d09a1, 0xc4324633, 0xe85a1f02, 0x09f0be8c, 0x4a99a025, 0x1d6efe10,
+	0x1ab93d1d, 0x0ba5a4df, 0xa186f20f, 0x2868f169, 0xdcb7da83, 0x573906fe,
+	0xa1e2ce9b, 0x4fcd7f52, 0x50115e01, 0xa70683fa, 0xa002b5c4, 0x0de6d027,
+	0x9af88c27, 0x773f8641, 0xc3604c06, 0x61a806b5, 0xf0177a28, 0xc0f586e0,
+	0x006058aa, 0x30dc7d62, 0x11e69ed7, 0x2338ea63, 0x53c2dd94, 0xc2c21634,
+	0xbbcbee56, 0x90bcb6de, 0xebfc7da1, 0xce591d76, 0x6f05e409, 0x4b7c0188,
+	0x39720a3d, 0x7c927c24, 0x86e3725f, 0x724d9db9, 0x1ac15bb4, 0xd39eb8fc,
+	0xed545578, 0x08fca5b5, 0xd83d7cd3, 0x4dad0fc4, 0x1e50ef5e, 0xb161e6f8,
+	0xa28514d9, 0x6c51133c, 0x6fd5c7e7, 0x56e14ec4, 0x362abfce, 0xddc6c837,
+	0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0,
+}
+
+var s3 = [256]uint32{
+	0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b, 0x5cb0679e, 0x4fa33742,
+	0xd3822740, 0x99bc9bbe, 0xd5118e9d, 0xbf0f7315, 0xd62d1c7e, 0xc700c47b,
+	0xb78c1b6b, 0x21a19045, 0xb26eb1be, 0x6a366eb4, 0x5748ab2f, 0xbc946e79,
+	0xc6a376d2, 0x6549c2c8, 0x530ff8ee, 0x468dde7d, 0xd5730a1d, 0x4cd04dc6,
+	0x2939bbdb, 0xa9ba4650, 0xac9526e8, 0xbe5ee304, 0xa1fad5f0, 0x6a2d519a,
+	0x63ef8ce2, 0x9a86ee22, 0xc089c2b8, 0x43242ef6, 0xa51e03aa, 0x9cf2d0a4,
+	0x83c061ba, 0x9be96a4d, 0x8fe51550, 0xba645bd6, 0x2826a2f9, 0xa73a3ae1,
+	0x4ba99586, 0xef5562e9, 0xc72fefd3, 0xf752f7da, 0x3f046f69, 0x77fa0a59,
+	0x80e4a915, 0x87b08601, 0x9b09e6ad, 0x3b3ee593, 0xe990fd5a, 0x9e34d797,
+	0x2cf0b7d9, 0x022b8b51, 0x96d5ac3a, 0x017da67d, 0xd1cf3ed6, 0x7c7d2d28,
+	0x1f9f25cf, 0xadf2b89b, 0x5ad6b472, 0x5a88f54c, 0xe029ac71, 0xe019a5e6,
+	0x47b0acfd, 0xed93fa9b, 0xe8d3c48d, 0x283b57cc, 0xf8d56629, 0x79132e28,
+	0x785f0191, 0xed756055, 0xf7960e44, 0xe3d35e8c, 0x15056dd4, 0x88f46dba,
+	0x03a16125, 0x0564f0bd, 0xc3eb9e15, 0x3c9057a2, 0x97271aec, 0xa93a072a,
+	0x1b3f6d9b, 0x1e6321f5, 0xf59c66fb, 0x26dcf319, 0x7533d928, 0xb155fdf5,
+	0x03563482, 0x8aba3cbb, 0x28517711, 0xc20ad9f8, 0xabcc5167, 0xccad925f,
+	0x4de81751, 0x3830dc8e, 0x379d5862, 0x9320f991, 0xea7a90c2, 0xfb3e7bce,
+	0x5121ce64, 0x774fbe32, 0xa8b6e37e, 0xc3293d46, 0x48de5369, 0x6413e680,
+	0xa2ae0810, 0xdd6db224, 0x69852dfd, 0x09072166, 0xb39a460a, 0x6445c0dd,
+	0x586cdecf, 0x1c20c8ae, 0x5bbef7dd, 0x1b588d40, 0xccd2017f, 0x6bb4e3bb,
+	0xdda26a7e, 0x3a59ff45, 0x3e350a44, 0xbcb4cdd5, 0x72eacea8, 0xfa6484bb,
+	0x8d6612ae, 0xbf3c6f47, 0xd29be463, 0x542f5d9e, 0xaec2771b, 0xf64e6370,
+	0x740e0d8d, 0xe75b1357, 0xf8721671, 0xaf537d5d, 0x4040cb08, 0x4eb4e2cc,
+	0x34d2466a, 0x0115af84, 0xe1b00428, 0x95983a1d, 0x06b89fb4, 0xce6ea048,
+	0x6f3f3b82, 0x3520ab82, 0x011a1d4b, 0x277227f8, 0x611560b1, 0xe7933fdc,
+	0xbb3a792b, 0x344525bd, 0xa08839e1, 0x51ce794b, 0x2f32c9b7, 0xa01fbac9,
+	0xe01cc87e, 0xbcc7d1f6, 0xcf0111c3, 0xa1e8aac7, 0x1a908749, 0xd44fbd9a,
+	0xd0dadecb, 0xd50ada38, 0x0339c32a, 0xc6913667, 0x8df9317c, 0xe0b12b4f,
+	0xf79e59b7, 0x43f5bb3a, 0xf2d519ff, 0x27d9459c, 0xbf97222c, 0x15e6fc2a,
+	0x0f91fc71, 0x9b941525, 0xfae59361, 0xceb69ceb, 0xc2a86459, 0x12baa8d1,
+	0xb6c1075e, 0xe3056a0c, 0x10d25065, 0xcb03a442, 0xe0ec6e0e, 0x1698db3b,
+	0x4c98a0be, 0x3278e964, 0x9f1f9532, 0xe0d392df, 0xd3a0342b, 0x8971f21e,
+	0x1b0a7441, 0x4ba3348c, 0xc5be7120, 0xc37632d8, 0xdf359f8d, 0x9b992f2e,
+	0xe60b6f47, 0x0fe3f11d, 0xe54cda54, 0x1edad891, 0xce6279cf, 0xcd3e7e6f,
+	0x1618b166, 0xfd2c1d05, 0x848fd2c5, 0xf6fb2299, 0xf523f357, 0xa6327623,
+	0x93a83531, 0x56cccd02, 0xacf08162, 0x5a75ebb5, 0x6e163697, 0x88d273cc,
+	0xde966292, 0x81b949d0, 0x4c50901b, 0x71c65614, 0xe6c6c7bd, 0x327a140a,
+	0x45e1d006, 0xc3f27b9a, 0xc9aa53fd, 0x62a80f00, 0xbb25bfe2, 0x35bdd2f6,
+	0x71126905, 0xb2040222, 0xb6cbcf7c, 0xcd769c2b, 0x53113ec0, 0x1640e3d3,
+	0x38abbd60, 0x2547adf0, 0xba38209c, 0xf746ce76, 0x77afa1c5, 0x20756060,
+	0x85cbfe4e, 0x8ae88dd8, 0x7aaaf9b0, 0x4cf9aa7e, 0x1948c25c, 0x02fb8a8c,
+	0x01c36ae4, 0xd6ebe1f9, 0x90d4f869, 0xa65cdea0, 0x3f09252d, 0xc208e69f,
+	0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6,
+}
+
+var p = [18]uint32{
+	0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344, 0xa4093822, 0x299f31d0,
+	0x082efa98, 0xec4e6c89, 0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c,
+	0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917, 0x9216d5d9, 0x8979fb1b,
+}
--- a/vendor/golang.org/x/crypto/chacha20/chacha_arm64.go
+++ b/vendor/golang.org/x/crypto/chacha20/chacha_arm64.go
@ -0,0 +1,16 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build go1.11,!gccgo,!purego
+
+package chacha20
+
+const bufSize = 256
+
+//go:noescape
+func xorKeyStreamVX(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32)
+
+func (c *Cipher) xorKeyStreamBlocks(dst, src []byte) {
+	xorKeyStreamVX(dst, src, &c.key, &c.nonce, &c.counter)
+}
--- a/vendor/golang.org/x/crypto/internal/chacha20/asm_arm64.s
+++ b/vendor/golang.org/x/crypto/internal/chacha20/asm_arm64.s
@ -2,8 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// +build go1.11
-// +build !gccgo,!appengine
+// +build go1.11,!gccgo,!purego

 #include "textflag.h"

--- a/vendor/golang.org/x/crypto/chacha20/chacha_generic.go
+++ b/vendor/golang.org/x/crypto/chacha20/chacha_generic.go
@ -0,0 +1,398 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package chacha20 implements the ChaCha20 and XChaCha20 encryption algorithms
+// as specified in RFC 8439 and draft-irtf-cfrg-xchacha-01.
+package chacha20
+
+import (
+	"crypto/cipher"
+	"encoding/binary"
+	"errors"
+	"math/bits"
+
+	"golang.org/x/crypto/internal/subtle"
+)
+
+const (
+	// KeySize is the size of the key used by this cipher, in bytes.
+	KeySize = 32
+
+	// NonceSize is the size of the nonce used with the standard variant of this
+	// cipher, in bytes.
+	//
+	// Note that this is too short to be safely generated at random if the same
+	// key is reused more than 2³² times.
+	NonceSize = 12
+
+	// NonceSizeX is the size of the nonce used with the XChaCha20 variant of
+	// this cipher, in bytes.
+	NonceSizeX = 24
+)
+
+// Cipher is a stateful instance of ChaCha20 or XChaCha20 using a particular key
+// and nonce. A *Cipher implements the cipher.Stream interface.
+type Cipher struct {
+	// The ChaCha20 state is 16 words: 4 constant, 8 of key, 1 of counter
+	// (incremented after each block), and 3 of nonce.
+	key     [8]uint32
+	counter uint32
+	nonce   [3]uint32
+
+	// The last len bytes of buf are leftover key stream bytes from the previous
+	// XORKeyStream invocation. The size of buf depends on how many blocks are
+	// computed at a time by xorKeyStreamBlocks.
+	buf [bufSize]byte
+	len int
+
+	// overflow is set when the counter overflowed, no more blocks can be
+	// generated, and the next XORKeyStream call should panic.
+	overflow bool
+
+	// The counter-independent results of the first round are cached after they
+	// are computed the first time.
+	precompDone      bool
+	p1, p5, p9, p13  uint32
+	p2, p6, p10, p14 uint32
+	p3, p7, p11, p15 uint32
+}
+
+var _ cipher.Stream = (*Cipher)(nil)
+
+// NewUnauthenticatedCipher creates a new ChaCha20 stream cipher with the given
+// 32 bytes key and a 12 or 24 bytes nonce. If a nonce of 24 bytes is provided,
+// the XChaCha20 construction will be used. It returns an error if key or nonce
+// have any other length.
+//
+// Note that ChaCha20, like all stream ciphers, is not authenticated and allows
+// attackers to silently tamper with the plaintext. For this reason, it is more
+// appropriate as a building block than as a standalone encryption mechanism.
+// Instead, consider using package golang.org/x/crypto/chacha20poly1305.
+func NewUnauthenticatedCipher(key, nonce []byte) (*Cipher, error) {
+	// This function is split into a wrapper so that the Cipher allocation will
+	// be inlined, and depending on how the caller uses the return value, won't
+	// escape to the heap.
+	c := &Cipher{}
+	return newUnauthenticatedCipher(c, key, nonce)
+}
+
+func newUnauthenticatedCipher(c *Cipher, key, nonce []byte) (*Cipher, error) {
+	if len(key) != KeySize {
+		return nil, errors.New("chacha20: wrong key size")
+	}
+	if len(nonce) == NonceSizeX {
+		// XChaCha20 uses the ChaCha20 core to mix 16 bytes of the nonce into a
+		// derived key, allowing it to operate on a nonce of 24 bytes. See
+		// draft-irtf-cfrg-xchacha-01, Section 2.3.
+		key, _ = HChaCha20(key, nonce[0:16])
+		cNonce := make([]byte, NonceSize)
+		copy(cNonce[4:12], nonce[16:24])
+		nonce = cNonce
+	} else if len(nonce) != NonceSize {
+		return nil, errors.New("chacha20: wrong nonce size")
+	}
+
+	key, nonce = key[:KeySize], nonce[:NonceSize] // bounds check elimination hint
+	c.key = [8]uint32{
+		binary.LittleEndian.Uint32(key[0:4]),
+		binary.LittleEndian.Uint32(key[4:8]),
+		binary.LittleEndian.Uint32(key[8:12]),
+		binary.LittleEndian.Uint32(key[12:16]),
+		binary.LittleEndian.Uint32(key[16:20]),
+		binary.LittleEndian.Uint32(key[20:24]),
+		binary.LittleEndian.Uint32(key[24:28]),
+		binary.LittleEndian.Uint32(key[28:32]),
+	}
+	c.nonce = [3]uint32{
+		binary.LittleEndian.Uint32(nonce[0:4]),
+		binary.LittleEndian.Uint32(nonce[4:8]),
+		binary.LittleEndian.Uint32(nonce[8:12]),
+	}
+	return c, nil
+}
+
+// The constant first 4 words of the ChaCha20 state.
+const (
+	j0 uint32 = 0x61707865 // expa
+	j1 uint32 = 0x3320646e // nd 3
+	j2 uint32 = 0x79622d32 // 2-by
+	j3 uint32 = 0x6b206574 // te k
+)
+
+const blockSize = 64
+
+// quarterRound is the core of ChaCha20. It shuffles the bits of 4 state words.
+// It's executed 4 times for each of the 20 ChaCha20 rounds, operating on all 16
+// words each round, in columnar or diagonal groups of 4 at a time.
+func quarterRound(a, b, c, d uint32) (uint32, uint32, uint32, uint32) {
+	a += b
+	d ^= a
+	d = bits.RotateLeft32(d, 16)
+	c += d
+	b ^= c
+	b = bits.RotateLeft32(b, 12)
+	a += b
+	d ^= a
+	d = bits.RotateLeft32(d, 8)
+	c += d
+	b ^= c
+	b = bits.RotateLeft32(b, 7)
+	return a, b, c, d
+}
+
+// SetCounter sets the Cipher counter. The next invocation of XORKeyStream will
+// behave as if (64 * counter) bytes had been encrypted so far.
+//
+// To prevent accidental counter reuse, SetCounter panics if counter is less
+// than the current value.
+//
+// Note that the execution time of XORKeyStream is not independent of the
+// counter value.
+func (s *Cipher) SetCounter(counter uint32) {
+	// Internally, s may buffer multiple blocks, which complicates this
+	// implementation slightly. When checking whether the counter has rolled
+	// back, we must use both s.counter and s.len to determine how many blocks
+	// we have already output.
+	outputCounter := s.counter - uint32(s.len)/blockSize
+	if s.overflow || counter < outputCounter {
+		panic("chacha20: SetCounter attempted to rollback counter")
+	}
+
+	// In the general case, we set the new counter value and reset s.len to 0,
+	// causing the next call to XORKeyStream to refill the buffer. However, if
+	// we're advancing within the existing buffer, we can save work by simply
+	// setting s.len.
+	if counter < s.counter {
+		s.len = int(s.counter-counter) * blockSize
+	} else {
+		s.counter = counter
+		s.len = 0
+	}
+}
+
+// XORKeyStream XORs each byte in the given slice with a byte from the
+// cipher's key stream. Dst and src must overlap entirely or not at all.
+//
+// If len(dst) < len(src), XORKeyStream will panic. It is acceptable
+// to pass a dst bigger than src, and in that case, XORKeyStream will
+// only update dst[:len(src)] and will not touch the rest of dst.
+//
+// Multiple calls to XORKeyStream behave as if the concatenation of
+// the src buffers was passed in a single run. That is, Cipher
+// maintains state and does not reset at each XORKeyStream call.
+func (s *Cipher) XORKeyStream(dst, src []byte) {
+	if len(src) == 0 {
+		return
+	}
+	if len(dst) < len(src) {
+		panic("chacha20: output smaller than input")
+	}
+	dst = dst[:len(src)]
+	if subtle.InexactOverlap(dst, src) {
+		panic("chacha20: invalid buffer overlap")
+	}
+
+	// First, drain any remaining key stream from a previous XORKeyStream.
+	if s.len != 0 {
+		keyStream := s.buf[bufSize-s.len:]
+		if len(src) < len(keyStream) {
+			keyStream = keyStream[:len(src)]
+		}
+		_ = src[len(keyStream)-1] // bounds check elimination hint
+		for i, b := range keyStream {
+			dst[i] = src[i] ^ b
+		}
+		s.len -= len(keyStream)
+		dst, src = dst[len(keyStream):], src[len(keyStream):]
+	}
+	if len(src) == 0 {
+		return
+	}
+
+	// If we'd need to let the counter overflow and keep generating output,
+	// panic immediately. If instead we'd only reach the last block, remember
+	// not to generate any more output after the buffer is drained.
+	numBlocks := (uint64(len(src)) + blockSize - 1) / blockSize
+	if s.overflow || uint64(s.counter)+numBlocks > 1<<32 {
+		panic("chacha20: counter overflow")
+	} else if uint64(s.counter)+numBlocks == 1<<32 {
+		s.overflow = true
+	}
+
+	// xorKeyStreamBlocks implementations expect input lengths that are a
+	// multiple of bufSize. Platform-specific ones process multiple blocks at a
+	// time, so have bufSizes that are a multiple of blockSize.
+
+	full := len(src) - len(src)%bufSize
+	if full > 0 {
+		s.xorKeyStreamBlocks(dst[:full], src[:full])
+	}
+	dst, src = dst[full:], src[full:]
+
+	// If using a multi-block xorKeyStreamBlocks would overflow, use the generic
+	// one that does one block at a time.
+	const blocksPerBuf = bufSize / blockSize
+	if uint64(s.counter)+blocksPerBuf > 1<<32 {
+		s.buf = [bufSize]byte{}
+		numBlocks := (len(src) + blockSize - 1) / blockSize
+		buf := s.buf[bufSize-numBlocks*blockSize:]
+		copy(buf, src)
+		s.xorKeyStreamBlocksGeneric(buf, buf)
+		s.len = len(buf) - copy(dst, buf)
+		return
+	}
+
+	// If we have a partial (multi-)block, pad it for xorKeyStreamBlocks, and
+	// keep the leftover keystream for the next XORKeyStream invocation.
+	if len(src) > 0 {
+		s.buf = [bufSize]byte{}
+		copy(s.buf[:], src)
+		s.xorKeyStreamBlocks(s.buf[:], s.buf[:])
+		s.len = bufSize - copy(dst, s.buf[:])
+	}
+}
+
+func (s *Cipher) xorKeyStreamBlocksGeneric(dst, src []byte) {
+	if len(dst) != len(src) || len(dst)%blockSize != 0 {
+		panic("chacha20: internal error: wrong dst and/or src length")
+	}
+
+	// To generate each block of key stream, the initial cipher state
+	// (represented below) is passed through 20 rounds of shuffling,
+	// alternatively applying quarterRounds by columns (like 1, 5, 9, 13)
+	// or by diagonals (like 1, 6, 11, 12).
+	//
+	//      0:cccccccc   1:cccccccc   2:cccccccc   3:cccccccc
+	//      4:kkkkkkkk   5:kkkkkkkk   6:kkkkkkkk   7:kkkkkkkk
+	//      8:kkkkkkkk   9:kkkkkkkk  10:kkkkkkkk  11:kkkkkkkk
+	//     12:bbbbbbbb  13:nnnnnnnn  14:nnnnnnnn  15:nnnnnnnn
+	//
+	//            c=constant k=key b=blockcount n=nonce
+	var (
+		c0, c1, c2, c3   = j0, j1, j2, j3
+		c4, c5, c6, c7   = s.key[0], s.key[1], s.key[2], s.key[3]
+		c8, c9, c10, c11 = s.key[4], s.key[5], s.key[6], s.key[7]
+		_, c13, c14, c15 = s.counter, s.nonce[0], s.nonce[1], s.nonce[2]
+	)
+
+	// Three quarters of the first round don't depend on the counter, so we can
+	// calculate them here, and reuse them for multiple blocks in the loop, and
+	// for future XORKeyStream invocations.
+	if !s.precompDone {
+		s.p1, s.p5, s.p9, s.p13 = quarterRound(c1, c5, c9, c13)
+		s.p2, s.p6, s.p10, s.p14 = quarterRound(c2, c6, c10, c14)
+		s.p3, s.p7, s.p11, s.p15 = quarterRound(c3, c7, c11, c15)
+		s.precompDone = true
+	}
+
+	// A condition of len(src) > 0 would be sufficient, but this also
+	// acts as a bounds check elimination hint.
+	for len(src) >= 64 && len(dst) >= 64 {
+		// The remainder of the first column round.
+		fcr0, fcr4, fcr8, fcr12 := quarterRound(c0, c4, c8, s.counter)
+
+		// The second diagonal round.
+		x0, x5, x10, x15 := quarterRound(fcr0, s.p5, s.p10, s.p15)
+		x1, x6, x11, x12 := quarterRound(s.p1, s.p6, s.p11, fcr12)
+		x2, x7, x8, x13 := quarterRound(s.p2, s.p7, fcr8, s.p13)
+		x3, x4, x9, x14 := quarterRound(s.p3, fcr4, s.p9, s.p14)
+
+		// The remaining 18 rounds.
+		for i := 0; i < 9; i++ {
+			// Column round.
+			x0, x4, x8, x12 = quarterRound(x0, x4, x8, x12)
+			x1, x5, x9, x13 = quarterRound(x1, x5, x9, x13)
+			x2, x6, x10, x14 = quarterRound(x2, x6, x10, x14)
+			x3, x7, x11, x15 = quarterRound(x3, x7, x11, x15)
+
+			// Diagonal round.
+			x0, x5, x10, x15 = quarterRound(x0, x5, x10, x15)
+			x1, x6, x11, x12 = quarterRound(x1, x6, x11, x12)
+			x2, x7, x8, x13 = quarterRound(x2, x7, x8, x13)
+			x3, x4, x9, x14 = quarterRound(x3, x4, x9, x14)
+		}
+
+		// Add back the initial state to generate the key stream, then
+		// XOR the key stream with the source and write out the result.
+		addXor(dst[0:4], src[0:4], x0, c0)
+		addXor(dst[4:8], src[4:8], x1, c1)
+		addXor(dst[8:12], src[8:12], x2, c2)
+		addXor(dst[12:16], src[12:16], x3, c3)
+		addXor(dst[16:20], src[16:20], x4, c4)
+		addXor(dst[20:24], src[20:24], x5, c5)
+		addXor(dst[24:28], src[24:28], x6, c6)
+		addXor(dst[28:32], src[28:32], x7, c7)
+		addXor(dst[32:36], src[32:36], x8, c8)
+		addXor(dst[36:40], src[36:40], x9, c9)
+		addXor(dst[40:44], src[40:44], x10, c10)
+		addXor(dst[44:48], src[44:48], x11, c11)
+		addXor(dst[48:52], src[48:52], x12, s.counter)
+		addXor(dst[52:56], src[52:56], x13, c13)
+		addXor(dst[56:60], src[56:60], x14, c14)
+		addXor(dst[60:64], src[60:64], x15, c15)
+
+		s.counter += 1
+
+		src, dst = src[blockSize:], dst[blockSize:]
+	}
+}
+
+// HChaCha20 uses the ChaCha20 core to generate a derived key from a 32 bytes
+// key and a 16 bytes nonce. It returns an error if key or nonce have any other
+// length. It is used as part of the XChaCha20 construction.
+func HChaCha20(key, nonce []byte) ([]byte, error) {
+	// This function is split into a wrapper so that the slice allocation will
+	// be inlined, and depending on how the caller uses the return value, won't
+	// escape to the heap.
+	out := make([]byte, 32)
+	return hChaCha20(out, key, nonce)
+}
+
+func hChaCha20(out, key, nonce []byte) ([]byte, error) {
+	if len(key) != KeySize {
+		return nil, errors.New("chacha20: wrong HChaCha20 key size")
+	}
+	if len(nonce) != 16 {
+		return nil, errors.New("chacha20: wrong HChaCha20 nonce size")
+	}
+
+	x0, x1, x2, x3 := j0, j1, j2, j3
+	x4 := binary.LittleEndian.Uint32(key[0:4])
+	x5 := binary.LittleEndian.Uint32(key[4:8])
+	x6 := binary.LittleEndian.Uint32(key[8:12])
+	x7 := binary.LittleEndian.Uint32(key[12:16])
+	x8 := binary.LittleEndian.Uint32(key[16:20])
+	x9 := binary.LittleEndian.Uint32(key[20:24])
+	x10 := binary.LittleEndian.Uint32(key[24:28])
+	x11 := binary.LittleEndian.Uint32(key[28:32])
+	x12 := binary.LittleEndian.Uint32(nonce[0:4])
+	x13 := binary.LittleEndian.Uint32(nonce[4:8])
+	x14 := binary.LittleEndian.Uint32(nonce[8:12])
+	x15 := binary.LittleEndian.Uint32(nonce[12:16])
+
+	for i := 0; i < 10; i++ {
+		// Diagonal round.
+		x0, x4, x8, x12 = quarterRound(x0, x4, x8, x12)
+		x1, x5, x9, x13 = quarterRound(x1, x5, x9, x13)
+		x2, x6, x10, x14 = quarterRound(x2, x6, x10, x14)
+		x3, x7, x11, x15 = quarterRound(x3, x7, x11, x15)
+
+		// Column round.
+		x0, x5, x10, x15 = quarterRound(x0, x5, x10, x15)
+		x1, x6, x11, x12 = quarterRound(x1, x6, x11, x12)
+		x2, x7, x8, x13 = quarterRound(x2, x7, x8, x13)
+		x3, x4, x9, x14 = quarterRound(x3, x4, x9, x14)
+	}
+
+	_ = out[31] // bounds check elimination hint
+	binary.LittleEndian.PutUint32(out[0:4], x0)
+	binary.LittleEndian.PutUint32(out[4:8], x1)
+	binary.LittleEndian.PutUint32(out[8:12], x2)
+	binary.LittleEndian.PutUint32(out[12:16], x3)
+	binary.LittleEndian.PutUint32(out[16:20], x12)
+	binary.LittleEndian.PutUint32(out[20:24], x13)
+	binary.LittleEndian.PutUint32(out[24:28], x14)
+	binary.LittleEndian.PutUint32(out[28:32], x15)
+	return out, nil
+}
--- a/vendor/golang.org/x/crypto/internal/chacha20/chacha_noasm.go
+++ b/vendor/golang.org/x/crypto/internal/chacha20/chacha_noasm.go
@ -2,15 +2,12 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// +build !ppc64le,!arm64,!s390x arm64,!go1.11 gccgo appengine
+// +build !arm64,!s390x,!ppc64le arm64,!go1.11 gccgo purego

 package chacha20

-const (
-	bufSize = 64
-	haveAsm = false
-)
+const bufSize = blockSize

-func (*Cipher) xorKeyStreamAsm(dst, src []byte) {
-	panic("not implemented")
+func (s *Cipher) xorKeyStreamBlocks(dst, src []byte) {
+	s.xorKeyStreamBlocksGeneric(dst, src)
 }
--- a/vendor/golang.org/x/crypto/chacha20/chacha_ppc64le.go
+++ b/vendor/golang.org/x/crypto/chacha20/chacha_ppc64le.go
@ -0,0 +1,16 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !gccgo,!purego
+
+package chacha20
+
+const bufSize = 256
+
+//go:noescape
+func chaCha20_ctr32_vsx(out, inp *byte, len int, key *[8]uint32, counter *uint32)
+
+func (c *Cipher) xorKeyStreamBlocks(dst, src []byte) {
+	chaCha20_ctr32_vsx(&dst[0], &src[0], len(src), &c.key, &c.counter)
+}
--- a/vendor/golang.org/x/crypto/chacha20/chacha_ppc64le.s
+++ b/vendor/golang.org/x/crypto/chacha20/chacha_ppc64le.s
@ -0,0 +1,449 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Based on CRYPTOGAMS code with the following comment:
+// # ====================================================================
+// # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+// # project. The module is, however, dual licensed under OpenSSL and
+// # CRYPTOGAMS licenses depending on where you obtain it. For further
+// # details see http://www.openssl.org/~appro/cryptogams/.
+// # ====================================================================
+
+// Code for the perl script that generates the ppc64 assembler
+// can be found in the cryptogams repository at the link below. It is based on
+// the original from openssl.
+
+// https://github.com/dot-asm/cryptogams/commit/a60f5b50ed908e91
+
+// The differences in this and the original implementation are
+// due to the calling conventions and initialization of constants.
+
+// +build !gccgo,!purego
+
+#include "textflag.h"
+
+#define OUT  R3
+#define INP  R4
+#define LEN  R5
+#define KEY  R6
+#define CNT  R7
+#define TMP  R15
+
+#define CONSTBASE  R16
+#define BLOCKS R17
+
+DATA consts<>+0x00(SB)/8, $0x3320646e61707865
+DATA consts<>+0x08(SB)/8, $0x6b20657479622d32
+DATA consts<>+0x10(SB)/8, $0x0000000000000001
+DATA consts<>+0x18(SB)/8, $0x0000000000000000
+DATA consts<>+0x20(SB)/8, $0x0000000000000004
+DATA consts<>+0x28(SB)/8, $0x0000000000000000
+DATA consts<>+0x30(SB)/8, $0x0a0b08090e0f0c0d
+DATA consts<>+0x38(SB)/8, $0x0203000106070405
+DATA consts<>+0x40(SB)/8, $0x090a0b080d0e0f0c
+DATA consts<>+0x48(SB)/8, $0x0102030005060704
+DATA consts<>+0x50(SB)/8, $0x6170786561707865
+DATA consts<>+0x58(SB)/8, $0x6170786561707865
+DATA consts<>+0x60(SB)/8, $0x3320646e3320646e
+DATA consts<>+0x68(SB)/8, $0x3320646e3320646e
+DATA consts<>+0x70(SB)/8, $0x79622d3279622d32
+DATA consts<>+0x78(SB)/8, $0x79622d3279622d32
+DATA consts<>+0x80(SB)/8, $0x6b2065746b206574
+DATA consts<>+0x88(SB)/8, $0x6b2065746b206574
+DATA consts<>+0x90(SB)/8, $0x0000000100000000
+DATA consts<>+0x98(SB)/8, $0x0000000300000002
+GLOBL consts<>(SB), RODATA, $0xa0
+
+//func chaCha20_ctr32_vsx(out, inp *byte, len int, key *[8]uint32, counter *uint32)
+TEXT ·chaCha20_ctr32_vsx(SB),NOSPLIT,$64-40
+	MOVD out+0(FP), OUT
+	MOVD inp+8(FP), INP
+	MOVD len+16(FP), LEN
+	MOVD key+24(FP), KEY
+	MOVD counter+32(FP), CNT
+
+	// Addressing for constants
+	MOVD $consts<>+0x00(SB), CONSTBASE
+	MOVD $16, R8
+	MOVD $32, R9
+	MOVD $48, R10
+	MOVD $64, R11
+	SRD $6, LEN, BLOCKS
+	// V16
+	LXVW4X (CONSTBASE)(R0), VS48
+	ADD $80,CONSTBASE
+
+	// Load key into V17,V18
+	LXVW4X (KEY)(R0), VS49
+	LXVW4X (KEY)(R8), VS50
+
+	// Load CNT, NONCE into V19
+	LXVW4X (CNT)(R0), VS51
+
+	// Clear V27
+	VXOR V27, V27, V27
+
+	// V28
+	LXVW4X (CONSTBASE)(R11), VS60
+
+	// splat slot from V19 -> V26
+	VSPLTW $0, V19, V26
+
+	VSLDOI $4, V19, V27, V19
+	VSLDOI $12, V27, V19, V19
+
+	VADDUWM V26, V28, V26
+
+	MOVD $10, R14
+	MOVD R14, CTR
+
+loop_outer_vsx:
+	// V0, V1, V2, V3
+	LXVW4X (R0)(CONSTBASE), VS32
+	LXVW4X (R8)(CONSTBASE), VS33
+	LXVW4X (R9)(CONSTBASE), VS34
+	LXVW4X (R10)(CONSTBASE), VS35
+
+	// splat values from V17, V18 into V4-V11
+	VSPLTW $0, V17, V4
+	VSPLTW $1, V17, V5
+	VSPLTW $2, V17, V6
+	VSPLTW $3, V17, V7
+	VSPLTW $0, V18, V8
+	VSPLTW $1, V18, V9
+	VSPLTW $2, V18, V10
+	VSPLTW $3, V18, V11
+
+	// VOR
+	VOR V26, V26, V12
+
+	// splat values from V19 -> V13, V14, V15
+	VSPLTW $1, V19, V13
+	VSPLTW $2, V19, V14
+	VSPLTW $3, V19, V15
+
+	// splat   const values
+	VSPLTISW $-16, V27
+	VSPLTISW $12, V28
+	VSPLTISW $8, V29
+	VSPLTISW $7, V30
+
+loop_vsx:
+	VADDUWM V0, V4, V0
+	VADDUWM V1, V5, V1
+	VADDUWM V2, V6, V2
+	VADDUWM V3, V7, V3
+
+	VXOR V12, V0, V12
+	VXOR V13, V1, V13
+	VXOR V14, V2, V14
+	VXOR V15, V3, V15
+
+	VRLW V12, V27, V12
+	VRLW V13, V27, V13
+	VRLW V14, V27, V14
+	VRLW V15, V27, V15
+
+	VADDUWM V8, V12, V8
+	VADDUWM V9, V13, V9
+	VADDUWM V10, V14, V10
+	VADDUWM V11, V15, V11
+
+	VXOR V4, V8, V4
+	VXOR V5, V9, V5
+	VXOR V6, V10, V6
+	VXOR V7, V11, V7
+
+	VRLW V4, V28, V4
+	VRLW V5, V28, V5
+	VRLW V6, V28, V6
+	VRLW V7, V28, V7
+
+	VADDUWM V0, V4, V0
+	VADDUWM V1, V5, V1
+	VADDUWM V2, V6, V2
+	VADDUWM V3, V7, V3
+
+	VXOR V12, V0, V12
+	VXOR V13, V1, V13
+	VXOR V14, V2, V14
+	VXOR V15, V3, V15
+
+	VRLW V12, V29, V12
+	VRLW V13, V29, V13
+	VRLW V14, V29, V14
+	VRLW V15, V29, V15
+
+	VADDUWM V8, V12, V8
+	VADDUWM V9, V13, V9
+	VADDUWM V10, V14, V10
+	VADDUWM V11, V15, V11
+
+	VXOR V4, V8, V4
+	VXOR V5, V9, V5
+	VXOR V6, V10, V6
+	VXOR V7, V11, V7
+
+	VRLW V4, V30, V4
+	VRLW V5, V30, V5
+	VRLW V6, V30, V6
+	VRLW V7, V30, V7
+
+	VADDUWM V0, V5, V0
+	VADDUWM V1, V6, V1
+	VADDUWM V2, V7, V2
+	VADDUWM V3, V4, V3
+
+	VXOR V15, V0, V15
+	VXOR V12, V1, V12
+	VXOR V13, V2, V13
+	VXOR V14, V3, V14
+
+	VRLW V15, V27, V15
+	VRLW V12, V27, V12
+	VRLW V13, V27, V13
+	VRLW V14, V27, V14
+
+	VADDUWM V10, V15, V10
+	VADDUWM V11, V12, V11
+	VADDUWM V8, V13, V8
+	VADDUWM V9, V14, V9
+
+	VXOR V5, V10, V5
+	VXOR V6, V11, V6
+	VXOR V7, V8, V7
+	VXOR V4, V9, V4
+
+	VRLW V5, V28, V5
+	VRLW V6, V28, V6
+	VRLW V7, V28, V7
+	VRLW V4, V28, V4
+
+	VADDUWM V0, V5, V0
+	VADDUWM V1, V6, V1
+	VADDUWM V2, V7, V2
+	VADDUWM V3, V4, V3
+
+	VXOR V15, V0, V15
+	VXOR V12, V1, V12
+	VXOR V13, V2, V13
+	VXOR V14, V3, V14
+
+	VRLW V15, V29, V15
+	VRLW V12, V29, V12
+	VRLW V13, V29, V13
+	VRLW V14, V29, V14
+
+	VADDUWM V10, V15, V10
+	VADDUWM V11, V12, V11
+	VADDUWM V8, V13, V8
+	VADDUWM V9, V14, V9
+
+	VXOR V5, V10, V5
+	VXOR V6, V11, V6
+	VXOR V7, V8, V7
+	VXOR V4, V9, V4
+
+	VRLW V5, V30, V5
+	VRLW V6, V30, V6
+	VRLW V7, V30, V7
+	VRLW V4, V30, V4
+	BC   16, LT, loop_vsx
+
+	VADDUWM V12, V26, V12
+
+	WORD $0x13600F8C		// VMRGEW V0, V1, V27
+	WORD $0x13821F8C		// VMRGEW V2, V3, V28
+
+	WORD $0x10000E8C		// VMRGOW V0, V1, V0
+	WORD $0x10421E8C		// VMRGOW V2, V3, V2
+
+	WORD $0x13A42F8C		// VMRGEW V4, V5, V29
+	WORD $0x13C63F8C		// VMRGEW V6, V7, V30
+
+	XXPERMDI VS32, VS34, $0, VS33
+	XXPERMDI VS32, VS34, $3, VS35
+	XXPERMDI VS59, VS60, $0, VS32
+	XXPERMDI VS59, VS60, $3, VS34
+
+	WORD $0x10842E8C		// VMRGOW V4, V5, V4
+	WORD $0x10C63E8C		// VMRGOW V6, V7, V6
+
+	WORD $0x13684F8C		// VMRGEW V8, V9, V27
+	WORD $0x138A5F8C		// VMRGEW V10, V11, V28
+
+	XXPERMDI VS36, VS38, $0, VS37
+	XXPERMDI VS36, VS38, $3, VS39
+	XXPERMDI VS61, VS62, $0, VS36
+	XXPERMDI VS61, VS62, $3, VS38
+
+	WORD $0x11084E8C		// VMRGOW V8, V9, V8
+	WORD $0x114A5E8C		// VMRGOW V10, V11, V10
+
+	WORD $0x13AC6F8C		// VMRGEW V12, V13, V29
+	WORD $0x13CE7F8C		// VMRGEW V14, V15, V30
+
+	XXPERMDI VS40, VS42, $0, VS41
+	XXPERMDI VS40, VS42, $3, VS43
+	XXPERMDI VS59, VS60, $0, VS40
+	XXPERMDI VS59, VS60, $3, VS42
+
+	WORD $0x118C6E8C		// VMRGOW V12, V13, V12
+	WORD $0x11CE7E8C		// VMRGOW V14, V15, V14
+
+	VSPLTISW $4, V27
+	VADDUWM V26, V27, V26
+
+	XXPERMDI VS44, VS46, $0, VS45
+	XXPERMDI VS44, VS46, $3, VS47
+	XXPERMDI VS61, VS62, $0, VS44
+	XXPERMDI VS61, VS62, $3, VS46
+
+	VADDUWM V0, V16, V0
+	VADDUWM V4, V17, V4
+	VADDUWM V8, V18, V8
+	VADDUWM V12, V19, V12
+
+	CMPU LEN, $64
+	BLT tail_vsx
+
+	// Bottom of loop
+	LXVW4X (INP)(R0), VS59
+	LXVW4X (INP)(R8), VS60
+	LXVW4X (INP)(R9), VS61
+	LXVW4X (INP)(R10), VS62
+
+	VXOR V27, V0, V27
+	VXOR V28, V4, V28
+	VXOR V29, V8, V29
+	VXOR V30, V12, V30
+
+	STXVW4X VS59, (OUT)(R0)
+	STXVW4X VS60, (OUT)(R8)
+	ADD     $64, INP
+	STXVW4X VS61, (OUT)(R9)
+	ADD     $-64, LEN
+	STXVW4X VS62, (OUT)(R10)
+	ADD     $64, OUT
+	BEQ     done_vsx
+
+	VADDUWM V1, V16, V0
+	VADDUWM V5, V17, V4
+	VADDUWM V9, V18, V8
+	VADDUWM V13, V19, V12
+
+	CMPU  LEN, $64
+	BLT   tail_vsx
+
+	LXVW4X (INP)(R0), VS59
+	LXVW4X (INP)(R8), VS60
+	LXVW4X (INP)(R9), VS61
+	LXVW4X (INP)(R10), VS62
+	VXOR   V27, V0, V27
+
+	VXOR V28, V4, V28
+	VXOR V29, V8, V29
+	VXOR V30, V12, V30
+
+	STXVW4X VS59, (OUT)(R0)
+	STXVW4X VS60, (OUT)(R8)
+	ADD     $64, INP
+	STXVW4X VS61, (OUT)(R9)
+	ADD     $-64, LEN
+	STXVW4X VS62, (OUT)(V10)
+	ADD     $64, OUT
+	BEQ     done_vsx
+
+	VADDUWM V2, V16, V0
+	VADDUWM V6, V17, V4
+	VADDUWM V10, V18, V8
+	VADDUWM V14, V19, V12
+
+	CMPU LEN, $64
+	BLT  tail_vsx
+
+	LXVW4X (INP)(R0), VS59
+	LXVW4X (INP)(R8), VS60
+	LXVW4X (INP)(R9), VS61
+	LXVW4X (INP)(R10), VS62
+
+	VXOR V27, V0, V27
+	VXOR V28, V4, V28
+	VXOR V29, V8, V29
+	VXOR V30, V12, V30
+
+	STXVW4X VS59, (OUT)(R0)
+	STXVW4X VS60, (OUT)(R8)
+	ADD     $64, INP
+	STXVW4X VS61, (OUT)(R9)
+	ADD     $-64, LEN
+	STXVW4X VS62, (OUT)(R10)
+	ADD     $64, OUT
+	BEQ     done_vsx
+
+	VADDUWM V3, V16, V0
+	VADDUWM V7, V17, V4
+	VADDUWM V11, V18, V8
+	VADDUWM V15, V19, V12
+
+	CMPU  LEN, $64
+	BLT   tail_vsx
+
+	LXVW4X (INP)(R0), VS59
+	LXVW4X (INP)(R8), VS60
+	LXVW4X (INP)(R9), VS61
+	LXVW4X (INP)(R10), VS62
+
+	VXOR V27, V0, V27
+	VXOR V28, V4, V28
+	VXOR V29, V8, V29
+	VXOR V30, V12, V30
+
+	STXVW4X VS59, (OUT)(R0)
+	STXVW4X VS60, (OUT)(R8)
+	ADD     $64, INP
+	STXVW4X VS61, (OUT)(R9)
+	ADD     $-64, LEN
+	STXVW4X VS62, (OUT)(R10)
+	ADD     $64, OUT
+
+	MOVD $10, R14
+	MOVD R14, CTR
+	BNE  loop_outer_vsx
+
+done_vsx:
+	// Increment counter by number of 64 byte blocks
+	MOVD (CNT), R14
+	ADD  BLOCKS, R14
+	MOVD R14, (CNT)
+	RET
+
+tail_vsx:
+	ADD  $32, R1, R11
+	MOVD LEN, CTR
+
+	// Save values on stack to copy from
+	STXVW4X VS32, (R11)(R0)
+	STXVW4X VS36, (R11)(R8)
+	STXVW4X VS40, (R11)(R9)
+	STXVW4X VS44, (R11)(R10)
+	ADD $-1, R11, R12
+	ADD $-1, INP
+	ADD $-1, OUT
+
+looptail_vsx:
+	// Copying the result to OUT
+	// in bytes.
+	MOVBZU 1(R12), KEY
+	MOVBZU 1(INP), TMP
+	XOR    KEY, TMP, KEY
+	MOVBU  KEY, 1(OUT)
+	BC     16, LT, looptail_vsx
+
+	// Clear the stack values
+	STXVW4X VS48, (R11)(R0)
+	STXVW4X VS48, (R11)(R8)
+	STXVW4X VS48, (R11)(R9)
+	STXVW4X VS48, (R11)(R10)
+	BR      done_vsx
--- a/vendor/golang.org/x/crypto/chacha20/chacha_s390x.go
+++ b/vendor/golang.org/x/crypto/chacha20/chacha_s390x.go
@ -0,0 +1,26 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !gccgo,!purego
+
+package chacha20
+
+import "golang.org/x/sys/cpu"
+
+var haveAsm = cpu.S390X.HasVX
+
+const bufSize = 256
+
+// xorKeyStreamVX is an assembly implementation of XORKeyStream. It must only
+// be called when the vector facility is available. Implementation in asm_s390x.s.
+//go:noescape
+func xorKeyStreamVX(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32)
+
+func (c *Cipher) xorKeyStreamBlocks(dst, src []byte) {
+	if cpu.S390X.HasVX {
+		xorKeyStreamVX(dst, src, &c.key, &c.nonce, &c.counter)
+	} else {
+		c.xorKeyStreamBlocksGeneric(dst, src)
+	}
+}
--- a/vendor/golang.org/x/crypto/internal/chacha20/chacha_s390x.s
+++ b/vendor/golang.org/x/crypto/internal/chacha20/chacha_s390x.s
@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// +build s390x,!gccgo,!appengine
+// +build !gccgo,!purego

 #include "go_asm.h"
 #include "textflag.h"
@ -24,15 +24,6 @@ DATA ·constants<>+0x14(SB)/4, $0x3320646e
 DATA ·constants<>+0x18(SB)/4, $0x79622d32
 DATA ·constants<>+0x1c(SB)/4, $0x6b206574

-// EXRL targets:
-TEXT ·mvcSrcToBuf(SB), NOFRAME|NOSPLIT, $0
-	MVC $1, (R1), (R8)
-	RET
-
-TEXT ·mvcBufToDst(SB), NOFRAME|NOSPLIT, $0
-	MVC $1, (R8), (R9)
-	RET
-
 #define BSWAP V5
 #define J0    V6
 #define KEY0  V7
@ -144,7 +135,7 @@ TEXT ·mvcBufToDst(SB), NOFRAME|NOSPLIT, $0
 	VMRHF v, w, c \ // c = {a[2], b[2], c[2], d[2]}
 	VMRLF v, w, d // d = {a[3], b[3], c[3], d[3]}

-// func xorKeyStreamVX(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32, buf *[256]byte, len *int)
+// func xorKeyStreamVX(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32)
 TEXT ·xorKeyStreamVX(SB), NOSPLIT, $0
 	MOVD $·constants<>(SB), R1
 	MOVD dst+0(FP), R2         // R2=&dst[0]
@ -152,25 +143,10 @@ TEXT ·xorKeyStreamVX(SB), NOSPLIT, $0
 	MOVD key+48(FP), R5        // R5=key
 	MOVD nonce+56(FP), R6      // R6=nonce
 	MOVD counter+64(FP), R7    // R7=counter
-	MOVD buf+72(FP), R8        // R8=buf
-	MOVD len+80(FP), R9        // R9=len

 	// load BSWAP and J0
 	VLM (R1), BSWAP, J0

-	// set up tail buffer
-	ADD     $-1, R4, R12
-	MOVBZ   R12, R12
-	CMPUBEQ R12, $255, aligned
-	MOVD    R4, R1
-	AND     $~255, R1
-	MOVD    $(R3)(R1*1), R1
-	EXRL    $·mvcSrcToBuf(SB), R12
-	MOVD    $255, R0
-	SUB     R12, R0
-	MOVD    R0, (R9)               // update len
-
-aligned:
 	// setup
 	MOVD  $95, R0
 	VLM   (R5), KEY0, KEY1
@ -217,9 +193,7 @@ loop:

 	// decrement length
 	ADD $-256, R4
-	BLT tail

-continue:
 	// rearrange vectors
 	SHUFFLE(X0, X1, X2, X3, M0, M1, M2, M3)
 	ADDV(J0, X0, X1, X2, X3)
@ -245,16 +219,6 @@ continue:
 	MOVD $256(R3), R3

 	CMPBNE  R4, $0, chacha
-	CMPUBEQ R12, $255, return
-	EXRL    $·mvcBufToDst(SB), R12 // len was updated during setup

-return:
 	VSTEF $0, CTR, (R7)
 	RET
-
-tail:
-	MOVD R2, R9
-	MOVD R8, R2
-	MOVD R8, R3
-	MOVD $0, R4
-	JMP  continue
--- a/vendor/golang.org/x/crypto/internal/chacha20/xor.go
+++ b/vendor/golang.org/x/crypto/internal/chacha20/xor.go
@ -4,9 +4,7 @@

 package chacha20

-import (
-	"runtime"
-)
+import "runtime"

 // Platforms that have fast unaligned 32-bit little endian accesses.
 const unaligned = runtime.GOARCH == "386" ||
@ -15,10 +13,10 @@ const unaligned = runtime.GOARCH == "386" ||
 	runtime.GOARCH == "ppc64le" ||
 	runtime.GOARCH == "s390x"

-// xor reads a little endian uint32 from src, XORs it with u and
+// addXor reads a little endian uint32 from src, XORs it with (a + b) and
 // places the result in little endian byte order in dst.
-func xor(dst, src []byte, u uint32) {
-	_, _ = src[3], dst[3] // eliminate bounds checks
+func addXor(dst, src []byte, a, b uint32) {
+	_, _ = src[3], dst[3] // bounds check elimination hint
 	if unaligned {
 		// The compiler should optimize this code into
 		// 32-bit unaligned little endian loads and stores.
@ -29,15 +27,16 @@ func xor(dst, src []byte, u uint32) {
 		v |= uint32(src[1]) << 8
 		v |= uint32(src[2]) << 16
 		v |= uint32(src[3]) << 24
-		v ^= u
+		v ^= a + b
 		dst[0] = byte(v)
 		dst[1] = byte(v >> 8)
 		dst[2] = byte(v >> 16)
 		dst[3] = byte(v >> 24)
 	} else {
-		dst[0] = src[0] ^ byte(u)
-		dst[1] = src[1] ^ byte(u>>8)
-		dst[2] = src[2] ^ byte(u>>16)
-		dst[3] = src[3] ^ byte(u>>24)
+		a += b
+		dst[0] = src[0] ^ byte(a)
+		dst[1] = src[1] ^ byte(a>>8)
+		dst[2] = src[2] ^ byte(a>>16)
+		dst[3] = src[3] ^ byte(a>>24)
 	}
 }
--- a/vendor/golang.org/x/crypto/curve25519/const_amd64.h
+++ b/vendor/golang.org/x/crypto/curve25519/const_amd64.h
@ -1,8 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This code was translated into a form compatible with 6a from the public
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
-
-#define REDMASK51     0x0007FFFFFFFFFFFF
--- a/vendor/golang.org/x/crypto/curve25519/const_amd64.s
+++ b/vendor/golang.org/x/crypto/curve25519/const_amd64.s
@ -1,20 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This code was translated into a form compatible with 6a from the public
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
-
-// +build amd64,!gccgo,!appengine
-
-// These constants cannot be encoded in non-MOVQ immediates.
-// We access them directly from memory instead.
-
-DATA ·_121666_213(SB)/8, $996687872
-GLOBL ·_121666_213(SB), 8, $8
-
-DATA ·_2P0(SB)/8, $0xFFFFFFFFFFFDA
-GLOBL ·_2P0(SB), 8, $8
-
-DATA ·_2P1234(SB)/8, $0xFFFFFFFFFFFFE
-GLOBL ·_2P1234(SB), 8, $8
--- a/vendor/golang.org/x/crypto/curve25519/cswap_amd64.s
+++ b/vendor/golang.org/x/crypto/curve25519/cswap_amd64.s
@ -1,65 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build amd64,!gccgo,!appengine
-
-// func cswap(inout *[4][5]uint64, v uint64)
-TEXT ·cswap(SB),7,$0
-	MOVQ inout+0(FP),DI
-	MOVQ v+8(FP),SI
-
-	SUBQ $1, SI
-	NOTQ SI
-	MOVQ SI, X15
-	PSHUFD $0x44, X15, X15
-
-	MOVOU 0(DI), X0
-	MOVOU 16(DI), X2
-	MOVOU 32(DI), X4
-	MOVOU 48(DI), X6
-	MOVOU 64(DI), X8
-	MOVOU 80(DI), X1
-	MOVOU 96(DI), X3
-	MOVOU 112(DI), X5
-	MOVOU 128(DI), X7
-	MOVOU 144(DI), X9
-
-	MOVO X1, X10
-	MOVO X3, X11
-	MOVO X5, X12
-	MOVO X7, X13
-	MOVO X9, X14
-
-	PXOR X0, X10
-	PXOR X2, X11
-	PXOR X4, X12
-	PXOR X6, X13
-	PXOR X8, X14
-	PAND X15, X10
-	PAND X15, X11
-	PAND X15, X12
-	PAND X15, X13
-	PAND X15, X14
-	PXOR X10, X0
-	PXOR X10, X1
-	PXOR X11, X2
-	PXOR X11, X3
-	PXOR X12, X4
-	PXOR X12, X5
-	PXOR X13, X6
-	PXOR X13, X7
-	PXOR X14, X8
-	PXOR X14, X9
-
-	MOVOU X0, 0(DI)
-	MOVOU X2, 16(DI)
-	MOVOU X4, 32(DI)
-	MOVOU X6, 48(DI)
-	MOVOU X8, 64(DI)
-	MOVOU X1, 80(DI)
-	MOVOU X3, 96(DI)
-	MOVOU X5, 112(DI)
-	MOVOU X7, 128(DI)
-	MOVOU X9, 144(DI)
-	RET
--- a/vendor/golang.org/x/crypto/curve25519/curve25519.go
+++ b/vendor/golang.org/x/crypto/curve25519/curve25519.go
@ -1,834 +1,95 @@
-// Copyright 2013 The Go Authors. All rights reserved.
+// Copyright 2019 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// We have an implementation in amd64 assembly so this code is only run on
-// non-amd64 platforms. The amd64 assembly does not support gccgo.
-// +build !amd64 gccgo appengine
-
-package curve25519
+// Package curve25519 provides an implementation of the X25519 function, which
+// performs scalar multiplication on the elliptic curve known as Curve25519.
+// See RFC 7748.
+package curve25519 // import "golang.org/x/crypto/curve25519"

 import (
-	"encoding/binary"
+	"crypto/subtle"
+	"fmt"
 )

-// This code is a port of the public domain, "ref10" implementation of
-// curve25519 from SUPERCOP 20130419 by D. J. Bernstein.
-
-// fieldElement represents an element of the field GF(2^255 - 19). An element
-// t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
-// t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on
-// context.
-type fieldElement [10]int32
-
-func feZero(fe *fieldElement) {
-	for i := range fe {
-		fe[i] = 0
-	}
-}
-
-func feOne(fe *fieldElement) {
-	feZero(fe)
-	fe[0] = 1
-}
-
-func feAdd(dst, a, b *fieldElement) {
-	for i := range dst {
-		dst[i] = a[i] + b[i]
-	}
-}
-
-func feSub(dst, a, b *fieldElement) {
-	for i := range dst {
-		dst[i] = a[i] - b[i]
-	}
-}
-
-func feCopy(dst, src *fieldElement) {
-	for i := range dst {
-		dst[i] = src[i]
-	}
-}
-
-// feCSwap replaces (f,g) with (g,f) if b == 1; replaces (f,g) with (f,g) if b == 0.
-//
-// Preconditions: b in {0,1}.
-func feCSwap(f, g *fieldElement, b int32) {
-	b = -b
-	for i := range f {
-		t := b & (f[i] ^ g[i])
-		f[i] ^= t
-		g[i] ^= t
-	}
-}
-
-// load3 reads a 24-bit, little-endian value from in.
-func load3(in []byte) int64 {
-	var r int64
-	r = int64(in[0])
-	r |= int64(in[1]) << 8
-	r |= int64(in[2]) << 16
-	return r
-}
-
-// load4 reads a 32-bit, little-endian value from in.
-func load4(in []byte) int64 {
-	return int64(binary.LittleEndian.Uint32(in))
-}
-
-func feFromBytes(dst *fieldElement, src *[32]byte) {
-	h0 := load4(src[:])
-	h1 := load3(src[4:]) << 6
-	h2 := load3(src[7:]) << 5
-	h3 := load3(src[10:]) << 3
-	h4 := load3(src[13:]) << 2
-	h5 := load4(src[16:])
-	h6 := load3(src[20:]) << 7
-	h7 := load3(src[23:]) << 5
-	h8 := load3(src[26:]) << 4
-	h9 := (load3(src[29:]) & 0x7fffff) << 2
-
-	var carry [10]int64
-	carry[9] = (h9 + 1<<24) >> 25
-	h0 += carry[9] * 19
-	h9 -= carry[9] << 25
-	carry[1] = (h1 + 1<<24) >> 25
-	h2 += carry[1]
-	h1 -= carry[1] << 25
-	carry[3] = (h3 + 1<<24) >> 25
-	h4 += carry[3]
-	h3 -= carry[3] << 25
-	carry[5] = (h5 + 1<<24) >> 25
-	h6 += carry[5]
-	h5 -= carry[5] << 25
-	carry[7] = (h7 + 1<<24) >> 25
-	h8 += carry[7]
-	h7 -= carry[7] << 25
-
-	carry[0] = (h0 + 1<<25) >> 26
-	h1 += carry[0]
-	h0 -= carry[0] << 26
-	carry[2] = (h2 + 1<<25) >> 26
-	h3 += carry[2]
-	h2 -= carry[2] << 26
-	carry[4] = (h4 + 1<<25) >> 26
-	h5 += carry[4]
-	h4 -= carry[4] << 26
-	carry[6] = (h6 + 1<<25) >> 26
-	h7 += carry[6]
-	h6 -= carry[6] << 26
-	carry[8] = (h8 + 1<<25) >> 26
-	h9 += carry[8]
-	h8 -= carry[8] << 26
-
-	dst[0] = int32(h0)
-	dst[1] = int32(h1)
-	dst[2] = int32(h2)
-	dst[3] = int32(h3)
-	dst[4] = int32(h4)
-	dst[5] = int32(h5)
-	dst[6] = int32(h6)
-	dst[7] = int32(h7)
-	dst[8] = int32(h8)
-	dst[9] = int32(h9)
-}
-
-// feToBytes marshals h to s.
-// Preconditions:
-//   |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
-//
-// Write p=2^255-19; q=floor(h/p).
-// Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
-//
-// Proof:
-//   Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
-//   Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4.
-//
-//   Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
-//   Then 0<y<1.
+// ScalarMult sets dst to the product scalar * point.
 //
-//   Write r=h-pq.
-//   Have 0<=r<=p-1=2^255-20.
-//   Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
-//
-//   Write x=r+19(2^-255)r+y.
-//   Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
-//
-//   Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
-//   so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
-func feToBytes(s *[32]byte, h *fieldElement) {
-	var carry [10]int32
-
-	q := (19*h[9] + (1 << 24)) >> 25
-	q = (h[0] + q) >> 26
-	q = (h[1] + q) >> 25
-	q = (h[2] + q) >> 26
-	q = (h[3] + q) >> 25
-	q = (h[4] + q) >> 26
-	q = (h[5] + q) >> 25
-	q = (h[6] + q) >> 26
-	q = (h[7] + q) >> 25
-	q = (h[8] + q) >> 26
-	q = (h[9] + q) >> 25
-
-	// Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20.
-	h[0] += 19 * q
-	// Goal: Output h-2^255 q, which is between 0 and 2^255-20.
-
-	carry[0] = h[0] >> 26
-	h[1] += carry[0]
-	h[0] -= carry[0] << 26
-	carry[1] = h[1] >> 25
-	h[2] += carry[1]
-	h[1] -= carry[1] << 25
-	carry[2] = h[2] >> 26
-	h[3] += carry[2]
-	h[2] -= carry[2] << 26
-	carry[3] = h[3] >> 25
-	h[4] += carry[3]
-	h[3] -= carry[3] << 25
-	carry[4] = h[4] >> 26
-	h[5] += carry[4]
-	h[4] -= carry[4] << 26
-	carry[5] = h[5] >> 25
-	h[6] += carry[5]
-	h[5] -= carry[5] << 25
-	carry[6] = h[6] >> 26
-	h[7] += carry[6]
-	h[6] -= carry[6] << 26
-	carry[7] = h[7] >> 25
-	h[8] += carry[7]
-	h[7] -= carry[7] << 25
-	carry[8] = h[8] >> 26
-	h[9] += carry[8]
-	h[8] -= carry[8] << 26
-	carry[9] = h[9] >> 25
-	h[9] -= carry[9] << 25
-	// h10 = carry9
-
-	// Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
-	// Have h[0]+...+2^230 h[9] between 0 and 2^255-1;
-	// evidently 2^255 h10-2^255 q = 0.
-	// Goal: Output h[0]+...+2^230 h[9].
-
-	s[0] = byte(h[0] >> 0)
-	s[1] = byte(h[0] >> 8)
-	s[2] = byte(h[0] >> 16)
-	s[3] = byte((h[0] >> 24) | (h[1] << 2))
-	s[4] = byte(h[1] >> 6)
-	s[5] = byte(h[1] >> 14)
-	s[6] = byte((h[1] >> 22) | (h[2] << 3))
-	s[7] = byte(h[2] >> 5)
-	s[8] = byte(h[2] >> 13)
-	s[9] = byte((h[2] >> 21) | (h[3] << 5))
-	s[10] = byte(h[3] >> 3)
-	s[11] = byte(h[3] >> 11)
-	s[12] = byte((h[3] >> 19) | (h[4] << 6))
-	s[13] = byte(h[4] >> 2)
-	s[14] = byte(h[4] >> 10)
-	s[15] = byte(h[4] >> 18)
-	s[16] = byte(h[5] >> 0)
-	s[17] = byte(h[5] >> 8)
-	s[18] = byte(h[5] >> 16)
-	s[19] = byte((h[5] >> 24) | (h[6] << 1))
-	s[20] = byte(h[6] >> 7)
-	s[21] = byte(h[6] >> 15)
-	s[22] = byte((h[6] >> 23) | (h[7] << 3))
-	s[23] = byte(h[7] >> 5)
-	s[24] = byte(h[7] >> 13)
-	s[25] = byte((h[7] >> 21) | (h[8] << 4))
-	s[26] = byte(h[8] >> 4)
-	s[27] = byte(h[8] >> 12)
-	s[28] = byte((h[8] >> 20) | (h[9] << 6))
-	s[29] = byte(h[9] >> 2)
-	s[30] = byte(h[9] >> 10)
-	s[31] = byte(h[9] >> 18)
+// Deprecated: when provided a low-order point, ScalarMult will set dst to all
+// zeroes, irrespective of the scalar. Instead, use the X25519 function, which
+// will return an error.
+func ScalarMult(dst, scalar, point *[32]byte) {
+	scalarMult(dst, scalar, point)
 }

-// feMul calculates h = f * g
-// Can overlap h with f or g.
-//
-// Preconditions:
-//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
-//    |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
-//
-// Postconditions:
-//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
-//
-// Notes on implementation strategy:
-//
-// Using schoolbook multiplication.
-// Karatsuba would save a little in some cost models.
+// ScalarBaseMult sets dst to the product scalar * base where base is the
+// standard generator.
 //
-// Most multiplications by 2 and 19 are 32-bit precomputations;
-// cheaper than 64-bit postcomputations.
-//
-// There is one remaining multiplication by 19 in the carry chain;
-// one *19 precomputation can be merged into this,
-// but the resulting data flow is considerably less clean.
-//
-// There are 12 carries below.
-// 10 of them are 2-way parallelizable and vectorizable.
-// Can get away with 11 carries, but then data flow is much deeper.
-//
-// With tighter constraints on inputs can squeeze carries into int32.
-func feMul(h, f, g *fieldElement) {
-	f0 := f[0]
-	f1 := f[1]
-	f2 := f[2]
-	f3 := f[3]
-	f4 := f[4]
-	f5 := f[5]
-	f6 := f[6]
-	f7 := f[7]
-	f8 := f[8]
-	f9 := f[9]
-	g0 := g[0]
-	g1 := g[1]
-	g2 := g[2]
-	g3 := g[3]
-	g4 := g[4]
-	g5 := g[5]
-	g6 := g[6]
-	g7 := g[7]
-	g8 := g[8]
-	g9 := g[9]
-	g1_19 := 19 * g1 // 1.4*2^29
-	g2_19 := 19 * g2 // 1.4*2^30; still ok
-	g3_19 := 19 * g3
-	g4_19 := 19 * g4
-	g5_19 := 19 * g5
-	g6_19 := 19 * g6
-	g7_19 := 19 * g7
-	g8_19 := 19 * g8
-	g9_19 := 19 * g9
-	f1_2 := 2 * f1
-	f3_2 := 2 * f3
-	f5_2 := 2 * f5
-	f7_2 := 2 * f7
-	f9_2 := 2 * f9
-	f0g0 := int64(f0) * int64(g0)
-	f0g1 := int64(f0) * int64(g1)
-	f0g2 := int64(f0) * int64(g2)
-	f0g3 := int64(f0) * int64(g3)
-	f0g4 := int64(f0) * int64(g4)
-	f0g5 := int64(f0) * int64(g5)
-	f0g6 := int64(f0) * int64(g6)
-	f0g7 := int64(f0) * int64(g7)
-	f0g8 := int64(f0) * int64(g8)
-	f0g9 := int64(f0) * int64(g9)
-	f1g0 := int64(f1) * int64(g0)
-	f1g1_2 := int64(f1_2) * int64(g1)
-	f1g2 := int64(f1) * int64(g2)
-	f1g3_2 := int64(f1_2) * int64(g3)
-	f1g4 := int64(f1) * int64(g4)
-	f1g5_2 := int64(f1_2) * int64(g5)
-	f1g6 := int64(f1) * int64(g6)
-	f1g7_2 := int64(f1_2) * int64(g7)
-	f1g8 := int64(f1) * int64(g8)
-	f1g9_38 := int64(f1_2) * int64(g9_19)
-	f2g0 := int64(f2) * int64(g0)
-	f2g1 := int64(f2) * int64(g1)
-	f2g2 := int64(f2) * int64(g2)
-	f2g3 := int64(f2) * int64(g3)
-	f2g4 := int64(f2) * int64(g4)
-	f2g5 := int64(f2) * int64(g5)
-	f2g6 := int64(f2) * int64(g6)
-	f2g7 := int64(f2) * int64(g7)
-	f2g8_19 := int64(f2) * int64(g8_19)
-	f2g9_19 := int64(f2) * int64(g9_19)
-	f3g0 := int64(f3) * int64(g0)
-	f3g1_2 := int64(f3_2) * int64(g1)
-	f3g2 := int64(f3) * int64(g2)
-	f3g3_2 := int64(f3_2) * int64(g3)
-	f3g4 := int64(f3) * int64(g4)
-	f3g5_2 := int64(f3_2) * int64(g5)
-	f3g6 := int64(f3) * int64(g6)
-	f3g7_38 := int64(f3_2) * int64(g7_19)
-	f3g8_19 := int64(f3) * int64(g8_19)
-	f3g9_38 := int64(f3_2) * int64(g9_19)
-	f4g0 := int64(f4) * int64(g0)
-	f4g1 := int64(f4) * int64(g1)
-	f4g2 := int64(f4) * int64(g2)
-	f4g3 := int64(f4) * int64(g3)
-	f4g4 := int64(f4) * int64(g4)
-	f4g5 := int64(f4) * int64(g5)
-	f4g6_19 := int64(f4) * int64(g6_19)
-	f4g7_19 := int64(f4) * int64(g7_19)
-	f4g8_19 := int64(f4) * int64(g8_19)
-	f4g9_19 := int64(f4) * int64(g9_19)
-	f5g0 := int64(f5) * int64(g0)
-	f5g1_2 := int64(f5_2) * int64(g1)
-	f5g2 := int64(f5) * int64(g2)
-	f5g3_2 := int64(f5_2) * int64(g3)
-	f5g4 := int64(f5) * int64(g4)
-	f5g5_38 := int64(f5_2) * int64(g5_19)
-	f5g6_19 := int64(f5) * int64(g6_19)
-	f5g7_38 := int64(f5_2) * int64(g7_19)
-	f5g8_19 := int64(f5) * int64(g8_19)
-	f5g9_38 := int64(f5_2) * int64(g9_19)
-	f6g0 := int64(f6) * int64(g0)
-	f6g1 := int64(f6) * int64(g1)
-	f6g2 := int64(f6) * int64(g2)
-	f6g3 := int64(f6) * int64(g3)
-	f6g4_19 := int64(f6) * int64(g4_19)
-	f6g5_19 := int64(f6) * int64(g5_19)
-	f6g6_19 := int64(f6) * int64(g6_19)
-	f6g7_19 := int64(f6) * int64(g7_19)
-	f6g8_19 := int64(f6) * int64(g8_19)
-	f6g9_19 := int64(f6) * int64(g9_19)
-	f7g0 := int64(f7) * int64(g0)
-	f7g1_2 := int64(f7_2) * int64(g1)
-	f7g2 := int64(f7) * int64(g2)
-	f7g3_38 := int64(f7_2) * int64(g3_19)
-	f7g4_19 := int64(f7) * int64(g4_19)
-	f7g5_38 := int64(f7_2) * int64(g5_19)
-	f7g6_19 := int64(f7) * int64(g6_19)
-	f7g7_38 := int64(f7_2) * int64(g7_19)
-	f7g8_19 := int64(f7) * int64(g8_19)
-	f7g9_38 := int64(f7_2) * int64(g9_19)
-	f8g0 := int64(f8) * int64(g0)
-	f8g1 := int64(f8) * int64(g1)
-	f8g2_19 := int64(f8) * int64(g2_19)
-	f8g3_19 := int64(f8) * int64(g3_19)
-	f8g4_19 := int64(f8) * int64(g4_19)
-	f8g5_19 := int64(f8) * int64(g5_19)
-	f8g6_19 := int64(f8) * int64(g6_19)
-	f8g7_19 := int64(f8) * int64(g7_19)
-	f8g8_19 := int64(f8) * int64(g8_19)
-	f8g9_19 := int64(f8) * int64(g9_19)
-	f9g0 := int64(f9) * int64(g0)
-	f9g1_38 := int64(f9_2) * int64(g1_19)
-	f9g2_19 := int64(f9) * int64(g2_19)
-	f9g3_38 := int64(f9_2) * int64(g3_19)
-	f9g4_19 := int64(f9) * int64(g4_19)
-	f9g5_38 := int64(f9_2) * int64(g5_19)
-	f9g6_19 := int64(f9) * int64(g6_19)
-	f9g7_38 := int64(f9_2) * int64(g7_19)
-	f9g8_19 := int64(f9) * int64(g8_19)
-	f9g9_38 := int64(f9_2) * int64(g9_19)
-	h0 := f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38
-	h1 := f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19
-	h2 := f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38
-	h3 := f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19
-	h4 := f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38
-	h5 := f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19
-	h6 := f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38
-	h7 := f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19
-	h8 := f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38
-	h9 := f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0
-	var carry [10]int64
-
-	// |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38))
-	//   i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8
-	// |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19))
-	//   i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9
-
-	carry[0] = (h0 + (1 << 25)) >> 26
-	h1 += carry[0]
-	h0 -= carry[0] << 26
-	carry[4] = (h4 + (1 << 25)) >> 26
-	h5 += carry[4]
-	h4 -= carry[4] << 26
-	// |h0| <= 2^25
-	// |h4| <= 2^25
-	// |h1| <= 1.51*2^58
-	// |h5| <= 1.51*2^58
-
-	carry[1] = (h1 + (1 << 24)) >> 25
-	h2 += carry[1]
-	h1 -= carry[1] << 25
-	carry[5] = (h5 + (1 << 24)) >> 25
-	h6 += carry[5]
-	h5 -= carry[5] << 25
-	// |h1| <= 2^24; from now on fits into int32
-	// |h5| <= 2^24; from now on fits into int32
-	// |h2| <= 1.21*2^59
-	// |h6| <= 1.21*2^59
-
-	carry[2] = (h2 + (1 << 25)) >> 26
-	h3 += carry[2]
-	h2 -= carry[2] << 26
-	carry[6] = (h6 + (1 << 25)) >> 26
-	h7 += carry[6]
-	h6 -= carry[6] << 26
-	// |h2| <= 2^25; from now on fits into int32 unchanged
-	// |h6| <= 2^25; from now on fits into int32 unchanged
-	// |h3| <= 1.51*2^58
-	// |h7| <= 1.51*2^58
-
-	carry[3] = (h3 + (1 << 24)) >> 25
-	h4 += carry[3]
-	h3 -= carry[3] << 25
-	carry[7] = (h7 + (1 << 24)) >> 25
-	h8 += carry[7]
-	h7 -= carry[7] << 25
-	// |h3| <= 2^24; from now on fits into int32 unchanged
-	// |h7| <= 2^24; from now on fits into int32 unchanged
-	// |h4| <= 1.52*2^33
-	// |h8| <= 1.52*2^33
-
-	carry[4] = (h4 + (1 << 25)) >> 26
-	h5 += carry[4]
-	h4 -= carry[4] << 26
-	carry[8] = (h8 + (1 << 25)) >> 26
-	h9 += carry[8]
-	h8 -= carry[8] << 26
-	// |h4| <= 2^25; from now on fits into int32 unchanged
-	// |h8| <= 2^25; from now on fits into int32 unchanged
-	// |h5| <= 1.01*2^24
-	// |h9| <= 1.51*2^58
-
-	carry[9] = (h9 + (1 << 24)) >> 25
-	h0 += carry[9] * 19
-	h9 -= carry[9] << 25
-	// |h9| <= 2^24; from now on fits into int32 unchanged
-	// |h0| <= 1.8*2^37
-
-	carry[0] = (h0 + (1 << 25)) >> 26
-	h1 += carry[0]
-	h0 -= carry[0] << 26
-	// |h0| <= 2^25; from now on fits into int32 unchanged
-	// |h1| <= 1.01*2^24
-
-	h[0] = int32(h0)
-	h[1] = int32(h1)
-	h[2] = int32(h2)
-	h[3] = int32(h3)
-	h[4] = int32(h4)
-	h[5] = int32(h5)
-	h[6] = int32(h6)
-	h[7] = int32(h7)
-	h[8] = int32(h8)
-	h[9] = int32(h9)
+// It is recommended to use the X25519 function with Basepoint instead, as
+// copying into fixed size arrays can lead to unexpected bugs.
+func ScalarBaseMult(dst, scalar *[32]byte) {
+	ScalarMult(dst, scalar, &basePoint)
 }

-// feSquare calculates h = f*f. Can overlap h with f.
-//
-// Preconditions:
-//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
-//
-// Postconditions:
-//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
-func feSquare(h, f *fieldElement) {
-	f0 := f[0]
-	f1 := f[1]
-	f2 := f[2]
-	f3 := f[3]
-	f4 := f[4]
-	f5 := f[5]
-	f6 := f[6]
-	f7 := f[7]
-	f8 := f[8]
-	f9 := f[9]
-	f0_2 := 2 * f0
-	f1_2 := 2 * f1
-	f2_2 := 2 * f2
-	f3_2 := 2 * f3
-	f4_2 := 2 * f4
-	f5_2 := 2 * f5
-	f6_2 := 2 * f6
-	f7_2 := 2 * f7
-	f5_38 := 38 * f5 // 1.31*2^30
-	f6_19 := 19 * f6 // 1.31*2^30
-	f7_38 := 38 * f7 // 1.31*2^30
-	f8_19 := 19 * f8 // 1.31*2^30
-	f9_38 := 38 * f9 // 1.31*2^30
-	f0f0 := int64(f0) * int64(f0)
-	f0f1_2 := int64(f0_2) * int64(f1)
-	f0f2_2 := int64(f0_2) * int64(f2)
-	f0f3_2 := int64(f0_2) * int64(f3)
-	f0f4_2 := int64(f0_2) * int64(f4)
-	f0f5_2 := int64(f0_2) * int64(f5)
-	f0f6_2 := int64(f0_2) * int64(f6)
-	f0f7_2 := int64(f0_2) * int64(f7)
-	f0f8_2 := int64(f0_2) * int64(f8)
-	f0f9_2 := int64(f0_2) * int64(f9)
-	f1f1_2 := int64(f1_2) * int64(f1)
-	f1f2_2 := int64(f1_2) * int64(f2)
-	f1f3_4 := int64(f1_2) * int64(f3_2)
-	f1f4_2 := int64(f1_2) * int64(f4)
-	f1f5_4 := int64(f1_2) * int64(f5_2)
-	f1f6_2 := int64(f1_2) * int64(f6)
-	f1f7_4 := int64(f1_2) * int64(f7_2)
-	f1f8_2 := int64(f1_2) * int64(f8)
-	f1f9_76 := int64(f1_2) * int64(f9_38)
-	f2f2 := int64(f2) * int64(f2)
-	f2f3_2 := int64(f2_2) * int64(f3)
-	f2f4_2 := int64(f2_2) * int64(f4)
-	f2f5_2 := int64(f2_2) * int64(f5)
-	f2f6_2 := int64(f2_2) * int64(f6)
-	f2f7_2 := int64(f2_2) * int64(f7)
-	f2f8_38 := int64(f2_2) * int64(f8_19)
-	f2f9_38 := int64(f2) * int64(f9_38)
-	f3f3_2 := int64(f3_2) * int64(f3)
-	f3f4_2 := int64(f3_2) * int64(f4)
-	f3f5_4 := int64(f3_2) * int64(f5_2)
-	f3f6_2 := int64(f3_2) * int64(f6)
-	f3f7_76 := int64(f3_2) * int64(f7_38)
-	f3f8_38 := int64(f3_2) * int64(f8_19)
-	f3f9_76 := int64(f3_2) * int64(f9_38)
-	f4f4 := int64(f4) * int64(f4)
-	f4f5_2 := int64(f4_2) * int64(f5)
-	f4f6_38 := int64(f4_2) * int64(f6_19)
-	f4f7_38 := int64(f4) * int64(f7_38)
-	f4f8_38 := int64(f4_2) * int64(f8_19)
-	f4f9_38 := int64(f4) * int64(f9_38)
-	f5f5_38 := int64(f5) * int64(f5_38)
-	f5f6_38 := int64(f5_2) * int64(f6_19)
-	f5f7_76 := int64(f5_2) * int64(f7_38)
-	f5f8_38 := int64(f5_2) * int64(f8_19)
-	f5f9_76 := int64(f5_2) * int64(f9_38)
-	f6f6_19 := int64(f6) * int64(f6_19)
-	f6f7_38 := int64(f6) * int64(f7_38)
-	f6f8_38 := int64(f6_2) * int64(f8_19)
-	f6f9_38 := int64(f6) * int64(f9_38)
-	f7f7_38 := int64(f7) * int64(f7_38)
-	f7f8_38 := int64(f7_2) * int64(f8_19)
-	f7f9_76 := int64(f7_2) * int64(f9_38)
-	f8f8_19 := int64(f8) * int64(f8_19)
-	f8f9_38 := int64(f8) * int64(f9_38)
-	f9f9_38 := int64(f9) * int64(f9_38)
-	h0 := f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38
-	h1 := f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38
-	h2 := f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19
-	h3 := f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38
-	h4 := f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38
-	h5 := f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38
-	h6 := f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19
-	h7 := f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38
-	h8 := f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38
-	h9 := f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2
-	var carry [10]int64
-
-	carry[0] = (h0 + (1 << 25)) >> 26
-	h1 += carry[0]
-	h0 -= carry[0] << 26
-	carry[4] = (h4 + (1 << 25)) >> 26
-	h5 += carry[4]
-	h4 -= carry[4] << 26
-
-	carry[1] = (h1 + (1 << 24)) >> 25
-	h2 += carry[1]
-	h1 -= carry[1] << 25
-	carry[5] = (h5 + (1 << 24)) >> 25
-	h6 += carry[5]
-	h5 -= carry[5] << 25
-
-	carry[2] = (h2 + (1 << 25)) >> 26
-	h3 += carry[2]
-	h2 -= carry[2] << 26
-	carry[6] = (h6 + (1 << 25)) >> 26
-	h7 += carry[6]
-	h6 -= carry[6] << 26
-
-	carry[3] = (h3 + (1 << 24)) >> 25
-	h4 += carry[3]
-	h3 -= carry[3] << 25
-	carry[7] = (h7 + (1 << 24)) >> 25
-	h8 += carry[7]
-	h7 -= carry[7] << 25
+const (
+	// ScalarSize is the size of the scalar input to X25519.
+	ScalarSize = 32
+	// PointSize is the size of the point input to X25519.
+	PointSize = 32
+)

-	carry[4] = (h4 + (1 << 25)) >> 26
-	h5 += carry[4]
-	h4 -= carry[4] << 26
-	carry[8] = (h8 + (1 << 25)) >> 26
-	h9 += carry[8]
-	h8 -= carry[8] << 26
+// Basepoint is the canonical Curve25519 generator.
+var Basepoint []byte

-	carry[9] = (h9 + (1 << 24)) >> 25
-	h0 += carry[9] * 19
-	h9 -= carry[9] << 25
+var basePoint = [32]byte{9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}

-	carry[0] = (h0 + (1 << 25)) >> 26
-	h1 += carry[0]
-	h0 -= carry[0] << 26
+func init() { Basepoint = basePoint[:] }

-	h[0] = int32(h0)
-	h[1] = int32(h1)
-	h[2] = int32(h2)
-	h[3] = int32(h3)
-	h[4] = int32(h4)
-	h[5] = int32(h5)
-	h[6] = int32(h6)
-	h[7] = int32(h7)
-	h[8] = int32(h8)
-	h[9] = int32(h9)
+func checkBasepoint() {
+	if subtle.ConstantTimeCompare(Basepoint, []byte{
+		0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	}) != 1 {
+		panic("curve25519: global Basepoint value was modified")
+	}
 }

-// feMul121666 calculates h = f * 121666. Can overlap h with f.
+// X25519 returns the result of the scalar multiplication (scalar * point),
+// according to RFC 7748, Section 5. scalar, point and the return value are
+// slices of 32 bytes.
 //
-// Preconditions:
-//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
+// scalar can be generated at random, for example with crypto/rand. point should
+// be either Basepoint or the output of another X25519 call.
 //
-// Postconditions:
-//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
-func feMul121666(h, f *fieldElement) {
-	h0 := int64(f[0]) * 121666
-	h1 := int64(f[1]) * 121666
-	h2 := int64(f[2]) * 121666
-	h3 := int64(f[3]) * 121666
-	h4 := int64(f[4]) * 121666
-	h5 := int64(f[5]) * 121666
-	h6 := int64(f[6]) * 121666
-	h7 := int64(f[7]) * 121666
-	h8 := int64(f[8]) * 121666
-	h9 := int64(f[9]) * 121666
-	var carry [10]int64
-
-	carry[9] = (h9 + (1 << 24)) >> 25
-	h0 += carry[9] * 19
-	h9 -= carry[9] << 25
-	carry[1] = (h1 + (1 << 24)) >> 25
-	h2 += carry[1]
-	h1 -= carry[1] << 25
-	carry[3] = (h3 + (1 << 24)) >> 25
-	h4 += carry[3]
-	h3 -= carry[3] << 25
-	carry[5] = (h5 + (1 << 24)) >> 25
-	h6 += carry[5]
-	h5 -= carry[5] << 25
-	carry[7] = (h7 + (1 << 24)) >> 25
-	h8 += carry[7]
-	h7 -= carry[7] << 25
-
-	carry[0] = (h0 + (1 << 25)) >> 26
-	h1 += carry[0]
-	h0 -= carry[0] << 26
-	carry[2] = (h2 + (1 << 25)) >> 26
-	h3 += carry[2]
-	h2 -= carry[2] << 26
-	carry[4] = (h4 + (1 << 25)) >> 26
-	h5 += carry[4]
-	h4 -= carry[4] << 26
-	carry[6] = (h6 + (1 << 25)) >> 26
-	h7 += carry[6]
-	h6 -= carry[6] << 26
-	carry[8] = (h8 + (1 << 25)) >> 26
-	h9 += carry[8]
-	h8 -= carry[8] << 26
-
-	h[0] = int32(h0)
-	h[1] = int32(h1)
-	h[2] = int32(h2)
-	h[3] = int32(h3)
-	h[4] = int32(h4)
-	h[5] = int32(h5)
-	h[6] = int32(h6)
-	h[7] = int32(h7)
-	h[8] = int32(h8)
-	h[9] = int32(h9)
-}
-
-// feInvert sets out = z^-1.
-func feInvert(out, z *fieldElement) {
-	var t0, t1, t2, t3 fieldElement
-	var i int
-
-	feSquare(&t0, z)
-	for i = 1; i < 1; i++ {
-		feSquare(&t0, &t0)
-	}
-	feSquare(&t1, &t0)
-	for i = 1; i < 2; i++ {
-		feSquare(&t1, &t1)
-	}
-	feMul(&t1, z, &t1)
-	feMul(&t0, &t0, &t1)
-	feSquare(&t2, &t0)
-	for i = 1; i < 1; i++ {
-		feSquare(&t2, &t2)
-	}
-	feMul(&t1, &t1, &t2)
-	feSquare(&t2, &t1)
-	for i = 1; i < 5; i++ {
-		feSquare(&t2, &t2)
-	}
-	feMul(&t1, &t2, &t1)
-	feSquare(&t2, &t1)
-	for i = 1; i < 10; i++ {
-		feSquare(&t2, &t2)
-	}
-	feMul(&t2, &t2, &t1)
-	feSquare(&t3, &t2)
-	for i = 1; i < 20; i++ {
-		feSquare(&t3, &t3)
-	}
-	feMul(&t2, &t3, &t2)
-	feSquare(&t2, &t2)
-	for i = 1; i < 10; i++ {
-		feSquare(&t2, &t2)
-	}
-	feMul(&t1, &t2, &t1)
-	feSquare(&t2, &t1)
-	for i = 1; i < 50; i++ {
-		feSquare(&t2, &t2)
-	}
-	feMul(&t2, &t2, &t1)
-	feSquare(&t3, &t2)
-	for i = 1; i < 100; i++ {
-		feSquare(&t3, &t3)
-	}
-	feMul(&t2, &t3, &t2)
-	feSquare(&t2, &t2)
-	for i = 1; i < 50; i++ {
-		feSquare(&t2, &t2)
-	}
-	feMul(&t1, &t2, &t1)
-	feSquare(&t1, &t1)
-	for i = 1; i < 5; i++ {
-		feSquare(&t1, &t1)
-	}
-	feMul(out, &t1, &t0)
+// If point is Basepoint (but not if it's a different slice with the same
+// contents) a precomputed implementation might be used for performance.
+func X25519(scalar, point []byte) ([]byte, error) {
+	// Outline the body of function, to let the allocation be inlined in the
+	// caller, and possibly avoid escaping to the heap.
+	var dst [32]byte
+	return x25519(&dst, scalar, point)
 }

-func scalarMult(out, in, base *[32]byte) {
-	var e [32]byte
-
-	copy(e[:], in[:])
-	e[0] &= 248
-	e[31] &= 127
-	e[31] |= 64
-
-	var x1, x2, z2, x3, z3, tmp0, tmp1 fieldElement
-	feFromBytes(&x1, base)
-	feOne(&x2)
-	feCopy(&x3, &x1)
-	feOne(&z3)
-
-	swap := int32(0)
-	for pos := 254; pos >= 0; pos-- {
-		b := e[pos/8] >> uint(pos&7)
-		b &= 1
-		swap ^= int32(b)
-		feCSwap(&x2, &x3, swap)
-		feCSwap(&z2, &z3, swap)
-		swap = int32(b)
-
-		feSub(&tmp0, &x3, &z3)
-		feSub(&tmp1, &x2, &z2)
-		feAdd(&x2, &x2, &z2)
-		feAdd(&z2, &x3, &z3)
-		feMul(&z3, &tmp0, &x2)
-		feMul(&z2, &z2, &tmp1)
-		feSquare(&tmp0, &tmp1)
-		feSquare(&tmp1, &x2)
-		feAdd(&x3, &z3, &z2)
-		feSub(&z2, &z3, &z2)
-		feMul(&x2, &tmp1, &tmp0)
-		feSub(&tmp1, &tmp1, &tmp0)
-		feSquare(&z2, &z2)
-		feMul121666(&z3, &tmp1)
-		feSquare(&x3, &x3)
-		feAdd(&tmp0, &tmp0, &z3)
-		feMul(&z3, &x1, &z2)
-		feMul(&z2, &tmp1, &tmp0)
-	}
-
-	feCSwap(&x2, &x3, swap)
-	feCSwap(&z2, &z3, swap)
-
-	feInvert(&z2, &z2)
-	feMul(&x2, &x2, &z2)
-	feToBytes(out, &x2)
+func x25519(dst *[32]byte, scalar, point []byte) ([]byte, error) {
+	var in [32]byte
+	if l := len(scalar); l != 32 {
+		return nil, fmt.Errorf("bad scalar length: %d, expected %d", l, 32)
+	}
+	if l := len(point); l != 32 {
+		return nil, fmt.Errorf("bad point length: %d, expected %d", l, 32)
+	}
+	copy(in[:], scalar)
+	if &point[0] == &Basepoint[0] {
+		checkBasepoint()
+		ScalarBaseMult(dst, &in)
+	} else {
+		var base, zero [32]byte
+		copy(base[:], point)
+		ScalarMult(dst, &in, &base)
+		if subtle.ConstantTimeCompare(dst[:], zero[:]) == 1 {
+			return nil, fmt.Errorf("bad input point: low order point")
+		}
+	}
+	return dst[:], nil
 }
--- a/vendor/golang.org/x/crypto/curve25519/curve25519_amd64.go
+++ b/vendor/golang.org/x/crypto/curve25519/curve25519_amd64.go
@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// +build amd64,!gccgo,!appengine
+// +build amd64,!gccgo,!appengine,!purego

 package curve25519

--- a/vendor/golang.org/x/crypto/curve25519/curve25519_amd64.s
+++ b/vendor/golang.org/x/crypto/curve25519/curve25519_amd64.s
@ -5,9 +5,84 @@
 // This code was translated into a form compatible with 6a from the public
 // domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html

-// +build amd64,!gccgo,!appengine
+// +build amd64,!gccgo,!appengine,!purego

-#include "const_amd64.h"
+#define REDMASK51     0x0007FFFFFFFFFFFF
+
+// These constants cannot be encoded in non-MOVQ immediates.
+// We access them directly from memory instead.
+
+DATA ·_121666_213(SB)/8, $996687872
+GLOBL ·_121666_213(SB), 8, $8
+
+DATA ·_2P0(SB)/8, $0xFFFFFFFFFFFDA
+GLOBL ·_2P0(SB), 8, $8
+
+DATA ·_2P1234(SB)/8, $0xFFFFFFFFFFFFE
+GLOBL ·_2P1234(SB), 8, $8
+
+// func freeze(inout *[5]uint64)
+TEXT ·freeze(SB),7,$0-8
+	MOVQ inout+0(FP), DI
+
+	MOVQ 0(DI),SI
+	MOVQ 8(DI),DX
+	MOVQ 16(DI),CX
+	MOVQ 24(DI),R8
+	MOVQ 32(DI),R9
+	MOVQ $REDMASK51,AX
+	MOVQ AX,R10
+	SUBQ $18,R10
+	MOVQ $3,R11
+REDUCELOOP:
+	MOVQ SI,R12
+	SHRQ $51,R12
+	ANDQ AX,SI
+	ADDQ R12,DX
+	MOVQ DX,R12
+	SHRQ $51,R12
+	ANDQ AX,DX
+	ADDQ R12,CX
+	MOVQ CX,R12
+	SHRQ $51,R12
+	ANDQ AX,CX
+	ADDQ R12,R8
+	MOVQ R8,R12
+	SHRQ $51,R12
+	ANDQ AX,R8
+	ADDQ R12,R9
+	MOVQ R9,R12
+	SHRQ $51,R12
+	ANDQ AX,R9
+	IMUL3Q $19,R12,R12
+	ADDQ R12,SI
+	SUBQ $1,R11
+	JA REDUCELOOP
+	MOVQ $1,R12
+	CMPQ R10,SI
+	CMOVQLT R11,R12
+	CMPQ AX,DX
+	CMOVQNE R11,R12
+	CMPQ AX,CX
+	CMOVQNE R11,R12
+	CMPQ AX,R8
+	CMOVQNE R11,R12
+	CMPQ AX,R9
+	CMOVQNE R11,R12
+	NEGQ R12
+	ANDQ R12,AX
+	ANDQ R12,R10
+	SUBQ R10,SI
+	SUBQ AX,DX
+	SUBQ AX,CX
+	SUBQ AX,R8
+	SUBQ AX,R9
+	MOVQ SI,0(DI)
+	MOVQ DX,8(DI)
+	MOVQ CX,16(DI)
+	MOVQ R8,24(DI)
+	MOVQ R9,32(DI)
+	RET

 // func ladderstep(inout *[5][5]uint64)
 TEXT ·ladderstep(SB),0,$296-8
@ -1375,3 +1450,344 @@ TEXT ·ladderstep(SB),0,$296-8
 	MOVQ AX,104(DI)
 	MOVQ R10,112(DI)
 	RET
+
+// func cswap(inout *[4][5]uint64, v uint64)
+TEXT ·cswap(SB),7,$0
+	MOVQ inout+0(FP),DI
+	MOVQ v+8(FP),SI
+
+	SUBQ $1, SI
+	NOTQ SI
+	MOVQ SI, X15
+	PSHUFD $0x44, X15, X15
+
+	MOVOU 0(DI), X0
+	MOVOU 16(DI), X2
+	MOVOU 32(DI), X4
+	MOVOU 48(DI), X6
+	MOVOU 64(DI), X8
+	MOVOU 80(DI), X1
+	MOVOU 96(DI), X3
+	MOVOU 112(DI), X5
+	MOVOU 128(DI), X7
+	MOVOU 144(DI), X9
+
+	MOVO X1, X10
+	MOVO X3, X11
+	MOVO X5, X12
+	MOVO X7, X13
+	MOVO X9, X14
+
+	PXOR X0, X10
+	PXOR X2, X11
+	PXOR X4, X12
+	PXOR X6, X13
+	PXOR X8, X14
+	PAND X15, X10
+	PAND X15, X11
+	PAND X15, X12
+	PAND X15, X13
+	PAND X15, X14
+	PXOR X10, X0
+	PXOR X10, X1
+	PXOR X11, X2
+	PXOR X11, X3
+	PXOR X12, X4
+	PXOR X12, X5
+	PXOR X13, X6
+	PXOR X13, X7
+	PXOR X14, X8
+	PXOR X14, X9
+
+	MOVOU X0, 0(DI)
+	MOVOU X2, 16(DI)
+	MOVOU X4, 32(DI)
+	MOVOU X6, 48(DI)
+	MOVOU X8, 64(DI)
+	MOVOU X1, 80(DI)
+	MOVOU X3, 96(DI)
+	MOVOU X5, 112(DI)
+	MOVOU X7, 128(DI)
+	MOVOU X9, 144(DI)
+	RET
+
+// func mul(dest, a, b *[5]uint64)
+TEXT ·mul(SB),0,$16-24
+	MOVQ dest+0(FP), DI
+	MOVQ a+8(FP), SI
+	MOVQ b+16(FP), DX
+
+	MOVQ DX,CX
+	MOVQ 24(SI),DX
+	IMUL3Q $19,DX,AX
+	MOVQ AX,0(SP)
+	MULQ 16(CX)
+	MOVQ AX,R8
+	MOVQ DX,R9
+	MOVQ 32(SI),DX
+	IMUL3Q $19,DX,AX
+	MOVQ AX,8(SP)
+	MULQ 8(CX)
+	ADDQ AX,R8
+	ADCQ DX,R9
+	MOVQ 0(SI),AX
+	MULQ 0(CX)
+	ADDQ AX,R8
+	ADCQ DX,R9
+	MOVQ 0(SI),AX
+	MULQ 8(CX)
+	MOVQ AX,R10
+	MOVQ DX,R11
+	MOVQ 0(SI),AX
+	MULQ 16(CX)
+	MOVQ AX,R12
+	MOVQ DX,R13
+	MOVQ 0(SI),AX
+	MULQ 24(CX)
+	MOVQ AX,R14
+	MOVQ DX,R15
+	MOVQ 0(SI),AX
+	MULQ 32(CX)
+	MOVQ AX,BX
+	MOVQ DX,BP
+	MOVQ 8(SI),AX
+	MULQ 0(CX)
+	ADDQ AX,R10
+	ADCQ DX,R11
+	MOVQ 8(SI),AX
+	MULQ 8(CX)
+	ADDQ AX,R12
+	ADCQ DX,R13
+	MOVQ 8(SI),AX
+	MULQ 16(CX)
+	ADDQ AX,R14
+	ADCQ DX,R15
+	MOVQ 8(SI),AX
+	MULQ 24(CX)
+	ADDQ AX,BX
+	ADCQ DX,BP
+	MOVQ 8(SI),DX
+	IMUL3Q $19,DX,AX
+	MULQ 32(CX)
+	ADDQ AX,R8
+	ADCQ DX,R9
+	MOVQ 16(SI),AX
+	MULQ 0(CX)
+	ADDQ AX,R12
+	ADCQ DX,R13
+	MOVQ 16(SI),AX
+	MULQ 8(CX)
+	ADDQ AX,R14
+	ADCQ DX,R15
+	MOVQ 16(SI),AX
+	MULQ 16(CX)
+	ADDQ AX,BX
+	ADCQ DX,BP
+	MOVQ 16(SI),DX
+	IMUL3Q $19,DX,AX
+	MULQ 24(CX)
+	ADDQ AX,R8
+	ADCQ DX,R9
+	MOVQ 16(SI),DX
+	IMUL3Q $19,DX,AX
+	MULQ 32(CX)
+	ADDQ AX,R10
+	ADCQ DX,R11
+	MOVQ 24(SI),AX
+	MULQ 0(CX)
+	ADDQ AX,R14
+	ADCQ DX,R15
+	MOVQ 24(SI),AX
+	MULQ 8(CX)
+	ADDQ AX,BX
+	ADCQ DX,BP
+	MOVQ 0(SP),AX
+	MULQ 24(CX)
+	ADDQ AX,R10
+	ADCQ DX,R11
+	MOVQ 0(SP),AX
+	MULQ 32(CX)
+	ADDQ AX,R12
+	ADCQ DX,R13
+	MOVQ 32(SI),AX
+	MULQ 0(CX)
+	ADDQ AX,BX
+	ADCQ DX,BP
+	MOVQ 8(SP),AX
+	MULQ 16(CX)
+	ADDQ AX,R10
+	ADCQ DX,R11
+	MOVQ 8(SP),AX
+	MULQ 24(CX)
+	ADDQ AX,R12
+	ADCQ DX,R13
+	MOVQ 8(SP),AX
+	MULQ 32(CX)
+	ADDQ AX,R14
+	ADCQ DX,R15
+	MOVQ $REDMASK51,SI
+	SHLQ $13,R8,R9
+	ANDQ SI,R8
+	SHLQ $13,R10,R11
+	ANDQ SI,R10
+	ADDQ R9,R10
+	SHLQ $13,R12,R13
+	ANDQ SI,R12
+	ADDQ R11,R12
+	SHLQ $13,R14,R15
+	ANDQ SI,R14
+	ADDQ R13,R14
+	SHLQ $13,BX,BP
+	ANDQ SI,BX
+	ADDQ R15,BX
+	IMUL3Q $19,BP,DX
+	ADDQ DX,R8
+	MOVQ R8,DX
+	SHRQ $51,DX
+	ADDQ R10,DX
+	MOVQ DX,CX
+	SHRQ $51,DX
+	ANDQ SI,R8
+	ADDQ R12,DX
+	MOVQ DX,R9
+	SHRQ $51,DX
+	ANDQ SI,CX
+	ADDQ R14,DX
+	MOVQ DX,AX
+	SHRQ $51,DX
+	ANDQ SI,R9
+	ADDQ BX,DX
+	MOVQ DX,R10
+	SHRQ $51,DX
+	ANDQ SI,AX
+	IMUL3Q $19,DX,DX
+	ADDQ DX,R8
+	ANDQ SI,R10
+	MOVQ R8,0(DI)
+	MOVQ CX,8(DI)
+	MOVQ R9,16(DI)
+	MOVQ AX,24(DI)
+	MOVQ R10,32(DI)
+	RET
+
+// func square(out, in *[5]uint64)
+TEXT ·square(SB),7,$0-16
+	MOVQ out+0(FP), DI
+	MOVQ in+8(FP), SI
+
+	MOVQ 0(SI),AX
+	MULQ 0(SI)
+	MOVQ AX,CX
+	MOVQ DX,R8
+	MOVQ 0(SI),AX
+	SHLQ $1,AX
+	MULQ 8(SI)
+	MOVQ AX,R9
+	MOVQ DX,R10
+	MOVQ 0(SI),AX
+	SHLQ $1,AX
+	MULQ 16(SI)
+	MOVQ AX,R11
+	MOVQ DX,R12
+	MOVQ 0(SI),AX
+	SHLQ $1,AX
+	MULQ 24(SI)
+	MOVQ AX,R13
+	MOVQ DX,R14
+	MOVQ 0(SI),AX
+	SHLQ $1,AX
+	MULQ 32(SI)
+	MOVQ AX,R15
+	MOVQ DX,BX
+	MOVQ 8(SI),AX
+	MULQ 8(SI)
+	ADDQ AX,R11
+	ADCQ DX,R12
+	MOVQ 8(SI),AX
+	SHLQ $1,AX
+	MULQ 16(SI)
+	ADDQ AX,R13
+	ADCQ DX,R14
+	MOVQ 8(SI),AX
+	SHLQ $1,AX
+	MULQ 24(SI)
+	ADDQ AX,R15
+	ADCQ DX,BX
+	MOVQ 8(SI),DX
+	IMUL3Q $38,DX,AX
+	MULQ 32(SI)
+	ADDQ AX,CX
+	ADCQ DX,R8
+	MOVQ 16(SI),AX
+	MULQ 16(SI)
+	ADDQ AX,R15
+	ADCQ DX,BX
+	MOVQ 16(SI),DX
+	IMUL3Q $38,DX,AX
+	MULQ 24(SI)
+	ADDQ AX,CX
+	ADCQ DX,R8
+	MOVQ 16(SI),DX
+	IMUL3Q $38,DX,AX
+	MULQ 32(SI)
+	ADDQ AX,R9
+	ADCQ DX,R10
+	MOVQ 24(SI),DX
+	IMUL3Q $19,DX,AX
+	MULQ 24(SI)
+	ADDQ AX,R9
+	ADCQ DX,R10
+	MOVQ 24(SI),DX
+	IMUL3Q $38,DX,AX
+	MULQ 32(SI)
+	ADDQ AX,R11
+	ADCQ DX,R12
+	MOVQ 32(SI),DX
+	IMUL3Q $19,DX,AX
+	MULQ 32(SI)
+	ADDQ AX,R13
+	ADCQ DX,R14
+	MOVQ $REDMASK51,SI
+	SHLQ $13,CX,R8
+	ANDQ SI,CX
+	SHLQ $13,R9,R10
+	ANDQ SI,R9
+	ADDQ R8,R9
+	SHLQ $13,R11,R12
+	ANDQ SI,R11
+	ADDQ R10,R11
+	SHLQ $13,R13,R14
+	ANDQ SI,R13
+	ADDQ R12,R13
+	SHLQ $13,R15,BX
+	ANDQ SI,R15
+	ADDQ R14,R15
+	IMUL3Q $19,BX,DX
+	ADDQ DX,CX
+	MOVQ CX,DX
+	SHRQ $51,DX
+	ADDQ R9,DX
+	ANDQ SI,CX
+	MOVQ DX,R8
+	SHRQ $51,DX
+	ADDQ R11,DX
+	ANDQ SI,R8
+	MOVQ DX,R9
+	SHRQ $51,DX
+	ADDQ R13,DX
+	ANDQ SI,R9
+	MOVQ DX,AX
+	SHRQ $51,DX
+	ADDQ R15,DX
+	ANDQ SI,AX
+	MOVQ DX,R10
+	SHRQ $51,DX
+	IMUL3Q $19,DX,DX
+	ADDQ DX,CX
+	ANDQ SI,R10
+	MOVQ CX,0(DI)
+	MOVQ R8,8(DI)
+	MOVQ R9,16(DI)
+	MOVQ AX,24(DI)
+	MOVQ R10,32(DI)
+	RET
--- a/vendor/golang.org/x/crypto/curve25519/curve25519_generic.go
+++ b/vendor/golang.org/x/crypto/curve25519/curve25519_generic.go
@ -0,0 +1,828 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package curve25519
+
+import "encoding/binary"
+
+// This code is a port of the public domain, "ref10" implementation of
+// curve25519 from SUPERCOP 20130419 by D. J. Bernstein.
+
+// fieldElement represents an element of the field GF(2^255 - 19). An element
+// t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
+// t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on
+// context.
+type fieldElement [10]int32
+
+func feZero(fe *fieldElement) {
+	for i := range fe {
+		fe[i] = 0
+	}
+}
+
+func feOne(fe *fieldElement) {
+	feZero(fe)
+	fe[0] = 1
+}
+
+func feAdd(dst, a, b *fieldElement) {
+	for i := range dst {
+		dst[i] = a[i] + b[i]
+	}
+}
+
+func feSub(dst, a, b *fieldElement) {
+	for i := range dst {
+		dst[i] = a[i] - b[i]
+	}
+}
+
+func feCopy(dst, src *fieldElement) {
+	for i := range dst {
+		dst[i] = src[i]
+	}
+}
+
+// feCSwap replaces (f,g) with (g,f) if b == 1; replaces (f,g) with (f,g) if b == 0.
+//
+// Preconditions: b in {0,1}.
+func feCSwap(f, g *fieldElement, b int32) {
+	b = -b
+	for i := range f {
+		t := b & (f[i] ^ g[i])
+		f[i] ^= t
+		g[i] ^= t
+	}
+}
+
+// load3 reads a 24-bit, little-endian value from in.
+func load3(in []byte) int64 {
+	var r int64
+	r = int64(in[0])
+	r |= int64(in[1]) << 8
+	r |= int64(in[2]) << 16
+	return r
+}
+
+// load4 reads a 32-bit, little-endian value from in.
+func load4(in []byte) int64 {
+	return int64(binary.LittleEndian.Uint32(in))
+}
+
+func feFromBytes(dst *fieldElement, src *[32]byte) {
+	h0 := load4(src[:])
+	h1 := load3(src[4:]) << 6
+	h2 := load3(src[7:]) << 5
+	h3 := load3(src[10:]) << 3
+	h4 := load3(src[13:]) << 2
+	h5 := load4(src[16:])
+	h6 := load3(src[20:]) << 7
+	h7 := load3(src[23:]) << 5
+	h8 := load3(src[26:]) << 4
+	h9 := (load3(src[29:]) & 0x7fffff) << 2
+
+	var carry [10]int64
+	carry[9] = (h9 + 1<<24) >> 25
+	h0 += carry[9] * 19
+	h9 -= carry[9] << 25
+	carry[1] = (h1 + 1<<24) >> 25
+	h2 += carry[1]
+	h1 -= carry[1] << 25
+	carry[3] = (h3 + 1<<24) >> 25
+	h4 += carry[3]
+	h3 -= carry[3] << 25
+	carry[5] = (h5 + 1<<24) >> 25
+	h6 += carry[5]
+	h5 -= carry[5] << 25
+	carry[7] = (h7 + 1<<24) >> 25
+	h8 += carry[7]
+	h7 -= carry[7] << 25
+
+	carry[0] = (h0 + 1<<25) >> 26
+	h1 += carry[0]
+	h0 -= carry[0] << 26
+	carry[2] = (h2 + 1<<25) >> 26
+	h3 += carry[2]
+	h2 -= carry[2] << 26
+	carry[4] = (h4 + 1<<25) >> 26
+	h5 += carry[4]
+	h4 -= carry[4] << 26
+	carry[6] = (h6 + 1<<25) >> 26
+	h7 += carry[6]
+	h6 -= carry[6] << 26
+	carry[8] = (h8 + 1<<25) >> 26
+	h9 += carry[8]
+	h8 -= carry[8] << 26
+
+	dst[0] = int32(h0)
+	dst[1] = int32(h1)
+	dst[2] = int32(h2)
+	dst[3] = int32(h3)
+	dst[4] = int32(h4)
+	dst[5] = int32(h5)
+	dst[6] = int32(h6)
+	dst[7] = int32(h7)
+	dst[8] = int32(h8)
+	dst[9] = int32(h9)
+}
+
+// feToBytes marshals h to s.
+// Preconditions:
+//   |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+//
+// Write p=2^255-19; q=floor(h/p).
+// Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
+//
+// Proof:
+//   Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
+//   Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4.
+//
+//   Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
+//   Then 0<y<1.
+//
+//   Write r=h-pq.
+//   Have 0<=r<=p-1=2^255-20.
+//   Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
+//
+//   Write x=r+19(2^-255)r+y.
+//   Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
+//
+//   Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
+//   so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
+func feToBytes(s *[32]byte, h *fieldElement) {
+	var carry [10]int32
+
+	q := (19*h[9] + (1 << 24)) >> 25
+	q = (h[0] + q) >> 26
+	q = (h[1] + q) >> 25
+	q = (h[2] + q) >> 26
+	q = (h[3] + q) >> 25
+	q = (h[4] + q) >> 26
+	q = (h[5] + q) >> 25
+	q = (h[6] + q) >> 26
+	q = (h[7] + q) >> 25
+	q = (h[8] + q) >> 26
+	q = (h[9] + q) >> 25
+
+	// Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20.
+	h[0] += 19 * q
+	// Goal: Output h-2^255 q, which is between 0 and 2^255-20.
+
+	carry[0] = h[0] >> 26
+	h[1] += carry[0]
+	h[0] -= carry[0] << 26
+	carry[1] = h[1] >> 25
+	h[2] += carry[1]
+	h[1] -= carry[1] << 25
+	carry[2] = h[2] >> 26
+	h[3] += carry[2]
+	h[2] -= carry[2] << 26
+	carry[3] = h[3] >> 25
+	h[4] += carry[3]
+	h[3] -= carry[3] << 25
+	carry[4] = h[4] >> 26
+	h[5] += carry[4]
+	h[4] -= carry[4] << 26
+	carry[5] = h[5] >> 25
+	h[6] += carry[5]
+	h[5] -= carry[5] << 25
+	carry[6] = h[6] >> 26
+	h[7] += carry[6]
+	h[6] -= carry[6] << 26
+	carry[7] = h[7] >> 25
+	h[8] += carry[7]
+	h[7] -= carry[7] << 25
+	carry[8] = h[8] >> 26
+	h[9] += carry[8]
+	h[8] -= carry[8] << 26
+	carry[9] = h[9] >> 25
+	h[9] -= carry[9] << 25
+	// h10 = carry9
+
+	// Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
+	// Have h[0]+...+2^230 h[9] between 0 and 2^255-1;
+	// evidently 2^255 h10-2^255 q = 0.
+	// Goal: Output h[0]+...+2^230 h[9].
+
+	s[0] = byte(h[0] >> 0)
+	s[1] = byte(h[0] >> 8)
+	s[2] = byte(h[0] >> 16)
+	s[3] = byte((h[0] >> 24) | (h[1] << 2))
+	s[4] = byte(h[1] >> 6)
+	s[5] = byte(h[1] >> 14)
+	s[6] = byte((h[1] >> 22) | (h[2] << 3))
+	s[7] = byte(h[2] >> 5)
+	s[8] = byte(h[2] >> 13)
+	s[9] = byte((h[2] >> 21) | (h[3] << 5))
+	s[10] = byte(h[3] >> 3)
+	s[11] = byte(h[3] >> 11)
+	s[12] = byte((h[3] >> 19) | (h[4] << 6))
+	s[13] = byte(h[4] >> 2)
+	s[14] = byte(h[4] >> 10)
+	s[15] = byte(h[4] >> 18)
+	s[16] = byte(h[5] >> 0)
+	s[17] = byte(h[5] >> 8)
+	s[18] = byte(h[5] >> 16)
+	s[19] = byte((h[5] >> 24) | (h[6] << 1))
+	s[20] = byte(h[6] >> 7)
+	s[21] = byte(h[6] >> 15)
+	s[22] = byte((h[6] >> 23) | (h[7] << 3))
+	s[23] = byte(h[7] >> 5)
+	s[24] = byte(h[7] >> 13)
+	s[25] = byte((h[7] >> 21) | (h[8] << 4))
+	s[26] = byte(h[8] >> 4)
+	s[27] = byte(h[8] >> 12)
+	s[28] = byte((h[8] >> 20) | (h[9] << 6))
+	s[29] = byte(h[9] >> 2)
+	s[30] = byte(h[9] >> 10)
+	s[31] = byte(h[9] >> 18)
+}
+
+// feMul calculates h = f * g
+// Can overlap h with f or g.
+//
+// Preconditions:
+//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
+//    |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
+//
+// Postconditions:
+//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+//
+// Notes on implementation strategy:
+//
+// Using schoolbook multiplication.
+// Karatsuba would save a little in some cost models.
+//
+// Most multiplications by 2 and 19 are 32-bit precomputations;
+// cheaper than 64-bit postcomputations.
+//
+// There is one remaining multiplication by 19 in the carry chain;
+// one *19 precomputation can be merged into this,
+// but the resulting data flow is considerably less clean.
+//
+// There are 12 carries below.
+// 10 of them are 2-way parallelizable and vectorizable.
+// Can get away with 11 carries, but then data flow is much deeper.
+//
+// With tighter constraints on inputs can squeeze carries into int32.
+func feMul(h, f, g *fieldElement) {
+	f0 := f[0]
+	f1 := f[1]
+	f2 := f[2]
+	f3 := f[3]
+	f4 := f[4]
+	f5 := f[5]
+	f6 := f[6]
+	f7 := f[7]
+	f8 := f[8]
+	f9 := f[9]
+	g0 := g[0]
+	g1 := g[1]
+	g2 := g[2]
+	g3 := g[3]
+	g4 := g[4]
+	g5 := g[5]
+	g6 := g[6]
+	g7 := g[7]
+	g8 := g[8]
+	g9 := g[9]
+	g1_19 := 19 * g1 // 1.4*2^29
+	g2_19 := 19 * g2 // 1.4*2^30; still ok
+	g3_19 := 19 * g3
+	g4_19 := 19 * g4
+	g5_19 := 19 * g5
+	g6_19 := 19 * g6
+	g7_19 := 19 * g7
+	g8_19 := 19 * g8
+	g9_19 := 19 * g9
+	f1_2 := 2 * f1
+	f3_2 := 2 * f3
+	f5_2 := 2 * f5
+	f7_2 := 2 * f7
+	f9_2 := 2 * f9
+	f0g0 := int64(f0) * int64(g0)
+	f0g1 := int64(f0) * int64(g1)
+	f0g2 := int64(f0) * int64(g2)
+	f0g3 := int64(f0) * int64(g3)
+	f0g4 := int64(f0) * int64(g4)
+	f0g5 := int64(f0) * int64(g5)
+	f0g6 := int64(f0) * int64(g6)
+	f0g7 := int64(f0) * int64(g7)
+	f0g8 := int64(f0) * int64(g8)
+	f0g9 := int64(f0) * int64(g9)
+	f1g0 := int64(f1) * int64(g0)
+	f1g1_2 := int64(f1_2) * int64(g1)
+	f1g2 := int64(f1) * int64(g2)
+	f1g3_2 := int64(f1_2) * int64(g3)
+	f1g4 := int64(f1) * int64(g4)
+	f1g5_2 := int64(f1_2) * int64(g5)
+	f1g6 := int64(f1) * int64(g6)
+	f1g7_2 := int64(f1_2) * int64(g7)
+	f1g8 := int64(f1) * int64(g8)
+	f1g9_38 := int64(f1_2) * int64(g9_19)
+	f2g0 := int64(f2) * int64(g0)
+	f2g1 := int64(f2) * int64(g1)
+	f2g2 := int64(f2) * int64(g2)
+	f2g3 := int64(f2) * int64(g3)
+	f2g4 := int64(f2) * int64(g4)
+	f2g5 := int64(f2) * int64(g5)
+	f2g6 := int64(f2) * int64(g6)
+	f2g7 := int64(f2) * int64(g7)
+	f2g8_19 := int64(f2) * int64(g8_19)
+	f2g9_19 := int64(f2) * int64(g9_19)
+	f3g0 := int64(f3) * int64(g0)
+	f3g1_2 := int64(f3_2) * int64(g1)
+	f3g2 := int64(f3) * int64(g2)
+	f3g3_2 := int64(f3_2) * int64(g3)
+	f3g4 := int64(f3) * int64(g4)
+	f3g5_2 := int64(f3_2) * int64(g5)
+	f3g6 := int64(f3) * int64(g6)
+	f3g7_38 := int64(f3_2) * int64(g7_19)
+	f3g8_19 := int64(f3) * int64(g8_19)
+	f3g9_38 := int64(f3_2) * int64(g9_19)
+	f4g0 := int64(f4) * int64(g0)
+	f4g1 := int64(f4) * int64(g1)
+	f4g2 := int64(f4) * int64(g2)
+	f4g3 := int64(f4) * int64(g3)
+	f4g4 := int64(f4) * int64(g4)
+	f4g5 := int64(f4) * int64(g5)
+	f4g6_19 := int64(f4) * int64(g6_19)
+	f4g7_19 := int64(f4) * int64(g7_19)
+	f4g8_19 := int64(f4) * int64(g8_19)
+	f4g9_19 := int64(f4) * int64(g9_19)
+	f5g0 := int64(f5) * int64(g0)
+	f5g1_2 := int64(f5_2) * int64(g1)
+	f5g2 := int64(f5) * int64(g2)
+	f5g3_2 := int64(f5_2) * int64(g3)
+	f5g4 := int64(f5) * int64(g4)
+	f5g5_38 := int64(f5_2) * int64(g5_19)
+	f5g6_19 := int64(f5) * int64(g6_19)
+	f5g7_38 := int64(f5_2) * int64(g7_19)
+	f5g8_19 := int64(f5) * int64(g8_19)
+	f5g9_38 := int64(f5_2) * int64(g9_19)
+	f6g0 := int64(f6) * int64(g0)
+	f6g1 := int64(f6) * int64(g1)
+	f6g2 := int64(f6) * int64(g2)
+	f6g3 := int64(f6) * int64(g3)
+	f6g4_19 := int64(f6) * int64(g4_19)
+	f6g5_19 := int64(f6) * int64(g5_19)
+	f6g6_19 := int64(f6) * int64(g6_19)
+	f6g7_19 := int64(f6) * int64(g7_19)
+	f6g8_19 := int64(f6) * int64(g8_19)
+	f6g9_19 := int64(f6) * int64(g9_19)
+	f7g0 := int64(f7) * int64(g0)
+	f7g1_2 := int64(f7_2) * int64(g1)
+	f7g2 := int64(f7) * int64(g2)
+	f7g3_38 := int64(f7_2) * int64(g3_19)
+	f7g4_19 := int64(f7) * int64(g4_19)
+	f7g5_38 := int64(f7_2) * int64(g5_19)
+	f7g6_19 := int64(f7) * int64(g6_19)
+	f7g7_38 := int64(f7_2) * int64(g7_19)
+	f7g8_19 := int64(f7) * int64(g8_19)
+	f7g9_38 := int64(f7_2) * int64(g9_19)
+	f8g0 := int64(f8) * int64(g0)
+	f8g1 := int64(f8) * int64(g1)
+	f8g2_19 := int64(f8) * int64(g2_19)
+	f8g3_19 := int64(f8) * int64(g3_19)
+	f8g4_19 := int64(f8) * int64(g4_19)
+	f8g5_19 := int64(f8) * int64(g5_19)
+	f8g6_19 := int64(f8) * int64(g6_19)
+	f8g7_19 := int64(f8) * int64(g7_19)
+	f8g8_19 := int64(f8) * int64(g8_19)
+	f8g9_19 := int64(f8) * int64(g9_19)
+	f9g0 := int64(f9) * int64(g0)
+	f9g1_38 := int64(f9_2) * int64(g1_19)
+	f9g2_19 := int64(f9) * int64(g2_19)
+	f9g3_38 := int64(f9_2) * int64(g3_19)
+	f9g4_19 := int64(f9) * int64(g4_19)
+	f9g5_38 := int64(f9_2) * int64(g5_19)
+	f9g6_19 := int64(f9) * int64(g6_19)
+	f9g7_38 := int64(f9_2) * int64(g7_19)
+	f9g8_19 := int64(f9) * int64(g8_19)
+	f9g9_38 := int64(f9_2) * int64(g9_19)
+	h0 := f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38
+	h1 := f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19
+	h2 := f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38
+	h3 := f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19
+	h4 := f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38
+	h5 := f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19
+	h6 := f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38
+	h7 := f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19
+	h8 := f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38
+	h9 := f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0
+	var carry [10]int64
+
+	// |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38))
+	//   i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8
+	// |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19))
+	//   i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9
+
+	carry[0] = (h0 + (1 << 25)) >> 26
+	h1 += carry[0]
+	h0 -= carry[0] << 26
+	carry[4] = (h4 + (1 << 25)) >> 26
+	h5 += carry[4]
+	h4 -= carry[4] << 26
+	// |h0| <= 2^25
+	// |h4| <= 2^25
+	// |h1| <= 1.51*2^58
+	// |h5| <= 1.51*2^58
+
+	carry[1] = (h1 + (1 << 24)) >> 25
+	h2 += carry[1]
+	h1 -= carry[1] << 25
+	carry[5] = (h5 + (1 << 24)) >> 25
+	h6 += carry[5]
+	h5 -= carry[5] << 25
+	// |h1| <= 2^24; from now on fits into int32
+	// |h5| <= 2^24; from now on fits into int32
+	// |h2| <= 1.21*2^59
+	// |h6| <= 1.21*2^59
+
+	carry[2] = (h2 + (1 << 25)) >> 26
+	h3 += carry[2]
+	h2 -= carry[2] << 26
+	carry[6] = (h6 + (1 << 25)) >> 26
+	h7 += carry[6]
+	h6 -= carry[6] << 26
+	// |h2| <= 2^25; from now on fits into int32 unchanged
+	// |h6| <= 2^25; from now on fits into int32 unchanged
+	// |h3| <= 1.51*2^58
+	// |h7| <= 1.51*2^58
+
+	carry[3] = (h3 + (1 << 24)) >> 25
+	h4 += carry[3]
+	h3 -= carry[3] << 25
+	carry[7] = (h7 + (1 << 24)) >> 25
+	h8 += carry[7]
+	h7 -= carry[7] << 25
+	// |h3| <= 2^24; from now on fits into int32 unchanged
+	// |h7| <= 2^24; from now on fits into int32 unchanged
+	// |h4| <= 1.52*2^33
+	// |h8| <= 1.52*2^33
+
+	carry[4] = (h4 + (1 << 25)) >> 26
+	h5 += carry[4]
+	h4 -= carry[4] << 26
+	carry[8] = (h8 + (1 << 25)) >> 26
+	h9 += carry[8]
+	h8 -= carry[8] << 26
+	// |h4| <= 2^25; from now on fits into int32 unchanged
+	// |h8| <= 2^25; from now on fits into int32 unchanged
+	// |h5| <= 1.01*2^24
+	// |h9| <= 1.51*2^58
+
+	carry[9] = (h9 + (1 << 24)) >> 25
+	h0 += carry[9] * 19
+	h9 -= carry[9] << 25
+	// |h9| <= 2^24; from now on fits into int32 unchanged
+	// |h0| <= 1.8*2^37
+
+	carry[0] = (h0 + (1 << 25)) >> 26
+	h1 += carry[0]
+	h0 -= carry[0] << 26
+	// |h0| <= 2^25; from now on fits into int32 unchanged
+	// |h1| <= 1.01*2^24
+
+	h[0] = int32(h0)
+	h[1] = int32(h1)
+	h[2] = int32(h2)
+	h[3] = int32(h3)
+	h[4] = int32(h4)
+	h[5] = int32(h5)
+	h[6] = int32(h6)
+	h[7] = int32(h7)
+	h[8] = int32(h8)
+	h[9] = int32(h9)
+}
+
+// feSquare calculates h = f*f. Can overlap h with f.
+//
+// Preconditions:
+//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
+//
+// Postconditions:
+//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+func feSquare(h, f *fieldElement) {
+	f0 := f[0]
+	f1 := f[1]
+	f2 := f[2]
+	f3 := f[3]
+	f4 := f[4]
+	f5 := f[5]
+	f6 := f[6]
+	f7 := f[7]
+	f8 := f[8]
+	f9 := f[9]
+	f0_2 := 2 * f0
+	f1_2 := 2 * f1
+	f2_2 := 2 * f2
+	f3_2 := 2 * f3
+	f4_2 := 2 * f4
+	f5_2 := 2 * f5
+	f6_2 := 2 * f6
+	f7_2 := 2 * f7
+	f5_38 := 38 * f5 // 1.31*2^30
+	f6_19 := 19 * f6 // 1.31*2^30
+	f7_38 := 38 * f7 // 1.31*2^30
+	f8_19 := 19 * f8 // 1.31*2^30
+	f9_38 := 38 * f9 // 1.31*2^30
+	f0f0 := int64(f0) * int64(f0)
+	f0f1_2 := int64(f0_2) * int64(f1)
+	f0f2_2 := int64(f0_2) * int64(f2)
+	f0f3_2 := int64(f0_2) * int64(f3)
+	f0f4_2 := int64(f0_2) * int64(f4)
+	f0f5_2 := int64(f0_2) * int64(f5)
+	f0f6_2 := int64(f0_2) * int64(f6)
+	f0f7_2 := int64(f0_2) * int64(f7)
+	f0f8_2 := int64(f0_2) * int64(f8)
+	f0f9_2 := int64(f0_2) * int64(f9)
+	f1f1_2 := int64(f1_2) * int64(f1)
+	f1f2_2 := int64(f1_2) * int64(f2)
+	f1f3_4 := int64(f1_2) * int64(f3_2)
+	f1f4_2 := int64(f1_2) * int64(f4)
+	f1f5_4 := int64(f1_2) * int64(f5_2)
+	f1f6_2 := int64(f1_2) * int64(f6)
+	f1f7_4 := int64(f1_2) * int64(f7_2)
+	f1f8_2 := int64(f1_2) * int64(f8)
+	f1f9_76 := int64(f1_2) * int64(f9_38)
+	f2f2 := int64(f2) * int64(f2)
+	f2f3_2 := int64(f2_2) * int64(f3)
+	f2f4_2 := int64(f2_2) * int64(f4)
+	f2f5_2 := int64(f2_2) * int64(f5)
+	f2f6_2 := int64(f2_2) * int64(f6)
+	f2f7_2 := int64(f2_2) * int64(f7)
+	f2f8_38 := int64(f2_2) * int64(f8_19)
+	f2f9_38 := int64(f2) * int64(f9_38)
+	f3f3_2 := int64(f3_2) * int64(f3)
+	f3f4_2 := int64(f3_2) * int64(f4)
+	f3f5_4 := int64(f3_2) * int64(f5_2)
+	f3f6_2 := int64(f3_2) * int64(f6)
+	f3f7_76 := int64(f3_2) * int64(f7_38)
+	f3f8_38 := int64(f3_2) * int64(f8_19)
+	f3f9_76 := int64(f3_2) * int64(f9_38)
+	f4f4 := int64(f4) * int64(f4)
+	f4f5_2 := int64(f4_2) * int64(f5)
+	f4f6_38 := int64(f4_2) * int64(f6_19)
+	f4f7_38 := int64(f4) * int64(f7_38)
+	f4f8_38 := int64(f4_2) * int64(f8_19)
+	f4f9_38 := int64(f4) * int64(f9_38)
+	f5f5_38 := int64(f5) * int64(f5_38)
+	f5f6_38 := int64(f5_2) * int64(f6_19)
+	f5f7_76 := int64(f5_2) * int64(f7_38)
+	f5f8_38 := int64(f5_2) * int64(f8_19)
+	f5f9_76 := int64(f5_2) * int64(f9_38)
+	f6f6_19 := int64(f6) * int64(f6_19)
+	f6f7_38 := int64(f6) * int64(f7_38)
+	f6f8_38 := int64(f6_2) * int64(f8_19)
+	f6f9_38 := int64(f6) * int64(f9_38)
+	f7f7_38 := int64(f7) * int64(f7_38)
+	f7f8_38 := int64(f7_2) * int64(f8_19)
+	f7f9_76 := int64(f7_2) * int64(f9_38)
+	f8f8_19 := int64(f8) * int64(f8_19)
+	f8f9_38 := int64(f8) * int64(f9_38)
+	f9f9_38 := int64(f9) * int64(f9_38)
+	h0 := f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38
+	h1 := f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38
+	h2 := f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19
+	h3 := f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38
+	h4 := f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38
+	h5 := f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38
+	h6 := f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19
+	h7 := f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38
+	h8 := f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38
+	h9 := f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2
+	var carry [10]int64
+
+	carry[0] = (h0 + (1 << 25)) >> 26
+	h1 += carry[0]
+	h0 -= carry[0] << 26
+	carry[4] = (h4 + (1 << 25)) >> 26
+	h5 += carry[4]
+	h4 -= carry[4] << 26
+
+	carry[1] = (h1 + (1 << 24)) >> 25
+	h2 += carry[1]
+	h1 -= carry[1] << 25
+	carry[5] = (h5 + (1 << 24)) >> 25
+	h6 += carry[5]
+	h5 -= carry[5] << 25
+
+	carry[2] = (h2 + (1 << 25)) >> 26
+	h3 += carry[2]
+	h2 -= carry[2] << 26
+	carry[6] = (h6 + (1 << 25)) >> 26
+	h7 += carry[6]
+	h6 -= carry[6] << 26
+
+	carry[3] = (h3 + (1 << 24)) >> 25
+	h4 += carry[3]
+	h3 -= carry[3] << 25
+	carry[7] = (h7 + (1 << 24)) >> 25
+	h8 += carry[7]
+	h7 -= carry[7] << 25
+
+	carry[4] = (h4 + (1 << 25)) >> 26
+	h5 += carry[4]
+	h4 -= carry[4] << 26
+	carry[8] = (h8 + (1 << 25)) >> 26
+	h9 += carry[8]
+	h8 -= carry[8] << 26
+
+	carry[9] = (h9 + (1 << 24)) >> 25
+	h0 += carry[9] * 19
+	h9 -= carry[9] << 25
+
+	carry[0] = (h0 + (1 << 25)) >> 26
+	h1 += carry[0]
+	h0 -= carry[0] << 26
+
+	h[0] = int32(h0)
+	h[1] = int32(h1)
+	h[2] = int32(h2)
+	h[3] = int32(h3)
+	h[4] = int32(h4)
+	h[5] = int32(h5)
+	h[6] = int32(h6)
+	h[7] = int32(h7)
+	h[8] = int32(h8)
+	h[9] = int32(h9)
+}
+
+// feMul121666 calculates h = f * 121666. Can overlap h with f.
+//
+// Preconditions:
+//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
+//
+// Postconditions:
+//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+func feMul121666(h, f *fieldElement) {
+	h0 := int64(f[0]) * 121666
+	h1 := int64(f[1]) * 121666
+	h2 := int64(f[2]) * 121666
+	h3 := int64(f[3]) * 121666
+	h4 := int64(f[4]) * 121666
+	h5 := int64(f[5]) * 121666
+	h6 := int64(f[6]) * 121666
+	h7 := int64(f[7]) * 121666
+	h8 := int64(f[8]) * 121666
+	h9 := int64(f[9]) * 121666
+	var carry [10]int64
+
+	carry[9] = (h9 + (1 << 24)) >> 25
+	h0 += carry[9] * 19
+	h9 -= carry[9] << 25
+	carry[1] = (h1 + (1 << 24)) >> 25
+	h2 += carry[1]
+	h1 -= carry[1] << 25
+	carry[3] = (h3 + (1 << 24)) >> 25
+	h4 += carry[3]
+	h3 -= carry[3] << 25
+	carry[5] = (h5 + (1 << 24)) >> 25
+	h6 += carry[5]
+	h5 -= carry[5] << 25
+	carry[7] = (h7 + (1 << 24)) >> 25
+	h8 += carry[7]
+	h7 -= carry[7] << 25
+
+	carry[0] = (h0 + (1 << 25)) >> 26
+	h1 += carry[0]
+	h0 -= carry[0] << 26
+	carry[2] = (h2 + (1 << 25)) >> 26
+	h3 += carry[2]
+	h2 -= carry[2] << 26
+	carry[4] = (h4 + (1 << 25)) >> 26
+	h5 += carry[4]
+	h4 -= carry[4] << 26
+	carry[6] = (h6 + (1 << 25)) >> 26
+	h7 += carry[6]
+	h6 -= carry[6] << 26
+	carry[8] = (h8 + (1 << 25)) >> 26
+	h9 += carry[8]
+	h8 -= carry[8] << 26
+
+	h[0] = int32(h0)
+	h[1] = int32(h1)
+	h[2] = int32(h2)
+	h[3] = int32(h3)
+	h[4] = int32(h4)
+	h[5] = int32(h5)
+	h[6] = int32(h6)
+	h[7] = int32(h7)
+	h[8] = int32(h8)
+	h[9] = int32(h9)
+}
+
+// feInvert sets out = z^-1.
+func feInvert(out, z *fieldElement) {
+	var t0, t1, t2, t3 fieldElement
+	var i int
+
+	feSquare(&t0, z)
+	for i = 1; i < 1; i++ {
+		feSquare(&t0, &t0)
+	}
+	feSquare(&t1, &t0)
+	for i = 1; i < 2; i++ {
+		feSquare(&t1, &t1)
+	}
+	feMul(&t1, z, &t1)
+	feMul(&t0, &t0, &t1)
+	feSquare(&t2, &t0)
+	for i = 1; i < 1; i++ {
+		feSquare(&t2, &t2)
+	}
+	feMul(&t1, &t1, &t2)
+	feSquare(&t2, &t1)
+	for i = 1; i < 5; i++ {
+		feSquare(&t2, &t2)
+	}
+	feMul(&t1, &t2, &t1)
+	feSquare(&t2, &t1)
+	for i = 1; i < 10; i++ {
+		feSquare(&t2, &t2)
+	}
+	feMul(&t2, &t2, &t1)
+	feSquare(&t3, &t2)
+	for i = 1; i < 20; i++ {
+		feSquare(&t3, &t3)
+	}
+	feMul(&t2, &t3, &t2)
+	feSquare(&t2, &t2)
+	for i = 1; i < 10; i++ {
+		feSquare(&t2, &t2)
+	}
+	feMul(&t1, &t2, &t1)
+	feSquare(&t2, &t1)
+	for i = 1; i < 50; i++ {
+		feSquare(&t2, &t2)
+	}
+	feMul(&t2, &t2, &t1)
+	feSquare(&t3, &t2)
+	for i = 1; i < 100; i++ {
+		feSquare(&t3, &t3)
+	}
+	feMul(&t2, &t3, &t2)
+	feSquare(&t2, &t2)
+	for i = 1; i < 50; i++ {
+		feSquare(&t2, &t2)
+	}
+	feMul(&t1, &t2, &t1)
+	feSquare(&t1, &t1)
+	for i = 1; i < 5; i++ {
+		feSquare(&t1, &t1)
+	}
+	feMul(out, &t1, &t0)
+}
+
+func scalarMultGeneric(out, in, base *[32]byte) {
+	var e [32]byte
+
+	copy(e[:], in[:])
+	e[0] &= 248
+	e[31] &= 127
+	e[31] |= 64
+
+	var x1, x2, z2, x3, z3, tmp0, tmp1 fieldElement
+	feFromBytes(&x1, base)
+	feOne(&x2)
+	feCopy(&x3, &x1)
+	feOne(&z3)
+
+	swap := int32(0)
+	for pos := 254; pos >= 0; pos-- {
+		b := e[pos/8] >> uint(pos&7)
+		b &= 1
+		swap ^= int32(b)
+		feCSwap(&x2, &x3, swap)
+		feCSwap(&z2, &z3, swap)
+		swap = int32(b)
+
+		feSub(&tmp0, &x3, &z3)
+		feSub(&tmp1, &x2, &z2)
+		feAdd(&x2, &x2, &z2)
+		feAdd(&z2, &x3, &z3)
+		feMul(&z3, &tmp0, &x2)
+		feMul(&z2, &z2, &tmp1)
+		feSquare(&tmp0, &tmp1)
+		feSquare(&tmp1, &x2)
+		feAdd(&x3, &z3, &z2)
+		feSub(&z2, &z3, &z2)
+		feMul(&x2, &tmp1, &tmp0)
+		feSub(&tmp1, &tmp1, &tmp0)
+		feSquare(&z2, &z2)
+		feMul121666(&z3, &tmp1)
+		feSquare(&x3, &x3)
+		feAdd(&tmp0, &tmp0, &z3)
+		feMul(&z3, &x1, &z2)
+		feMul(&z2, &tmp1, &tmp0)
+	}
+
+	feCSwap(&x2, &x3, swap)
+	feCSwap(&z2, &z3, swap)
+
+	feInvert(&z2, &z2)
+	feMul(&x2, &x2, &z2)
+	feToBytes(out, &x2)
+}
--- a/vendor/golang.org/x/crypto/curve25519/curve25519_noasm.go
+++ b/vendor/golang.org/x/crypto/curve25519/curve25519_noasm.go
@ -0,0 +1,11 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !amd64 gccgo appengine purego
+
+package curve25519
+
+func scalarMult(out, in, base *[32]byte) {
+	scalarMultGeneric(out, in, base)
+}
--- a/vendor/golang.org/x/crypto/curve25519/doc.go
+++ b/vendor/golang.org/x/crypto/curve25519/doc.go
@ -1,23 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package curve25519 provides an implementation of scalar multiplication on
-// the elliptic curve known as curve25519. See https://cr.yp.to/ecdh.html
-package curve25519 // import "golang.org/x/crypto/curve25519"
-
-// basePoint is the x coordinate of the generator of the curve.
-var basePoint = [32]byte{9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
-
-// ScalarMult sets dst to the product in*base where dst and base are the x
-// coordinates of group points and all values are in little-endian form.
-func ScalarMult(dst, in, base *[32]byte) {
-	scalarMult(dst, in, base)
-}
-
-// ScalarBaseMult sets dst to the product in*base where dst and base are the x
-// coordinates of group points, base is the standard generator and all values
-// are in little-endian form.
-func ScalarBaseMult(dst, in *[32]byte) {
-	ScalarMult(dst, in, &basePoint)
-}
--- a/vendor/golang.org/x/crypto/curve25519/freeze_amd64.s
+++ b/vendor/golang.org/x/crypto/curve25519/freeze_amd64.s
@ -1,73 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This code was translated into a form compatible with 6a from the public
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
-
-// +build amd64,!gccgo,!appengine
-
-#include "const_amd64.h"
-
-// func freeze(inout *[5]uint64)
-TEXT ·freeze(SB),7,$0-8
-	MOVQ inout+0(FP), DI
-
-	MOVQ 0(DI),SI
-	MOVQ 8(DI),DX
-	MOVQ 16(DI),CX
-	MOVQ 24(DI),R8
-	MOVQ 32(DI),R9
-	MOVQ $REDMASK51,AX
-	MOVQ AX,R10
-	SUBQ $18,R10
-	MOVQ $3,R11
-REDUCELOOP:
-	MOVQ SI,R12
-	SHRQ $51,R12
-	ANDQ AX,SI
-	ADDQ R12,DX
-	MOVQ DX,R12
-	SHRQ $51,R12
-	ANDQ AX,DX
-	ADDQ R12,CX
-	MOVQ CX,R12
-	SHRQ $51,R12
-	ANDQ AX,CX
-	ADDQ R12,R8
-	MOVQ R8,R12
-	SHRQ $51,R12
-	ANDQ AX,R8
-	ADDQ R12,R9
-	MOVQ R9,R12
-	SHRQ $51,R12
-	ANDQ AX,R9
-	IMUL3Q $19,R12,R12
-	ADDQ R12,SI
-	SUBQ $1,R11
-	JA REDUCELOOP
-	MOVQ $1,R12
-	CMPQ R10,SI
-	CMOVQLT R11,R12
-	CMPQ AX,DX
-	CMOVQNE R11,R12
-	CMPQ AX,CX
-	CMOVQNE R11,R12
-	CMPQ AX,R8
-	CMOVQNE R11,R12
-	CMPQ AX,R9
-	CMOVQNE R11,R12
-	NEGQ R12
-	ANDQ R12,AX
-	ANDQ R12,R10
-	SUBQ R10,SI
-	SUBQ AX,DX
-	SUBQ AX,CX
-	SUBQ AX,R8
-	SUBQ AX,R9
-	MOVQ SI,0(DI)
-	MOVQ DX,8(DI)
-	MOVQ CX,16(DI)
-	MOVQ R8,24(DI)
-	MOVQ R9,32(DI)
-	RET
--- a/vendor/golang.org/x/crypto/curve25519/mul_amd64.s
+++ b/vendor/golang.org/x/crypto/curve25519/mul_amd64.s
@ -1,169 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This code was translated into a form compatible with 6a from the public
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
-
-// +build amd64,!gccgo,!appengine
-
-#include "const_amd64.h"
-
-// func mul(dest, a, b *[5]uint64)
-TEXT ·mul(SB),0,$16-24
-	MOVQ dest+0(FP), DI
-	MOVQ a+8(FP), SI
-	MOVQ b+16(FP), DX
-
-	MOVQ DX,CX
-	MOVQ 24(SI),DX
-	IMUL3Q $19,DX,AX
-	MOVQ AX,0(SP)
-	MULQ 16(CX)
-	MOVQ AX,R8
-	MOVQ DX,R9
-	MOVQ 32(SI),DX
-	IMUL3Q $19,DX,AX
-	MOVQ AX,8(SP)
-	MULQ 8(CX)
-	ADDQ AX,R8
-	ADCQ DX,R9
-	MOVQ 0(SI),AX
-	MULQ 0(CX)
-	ADDQ AX,R8
-	ADCQ DX,R9
-	MOVQ 0(SI),AX
-	MULQ 8(CX)
-	MOVQ AX,R10
-	MOVQ DX,R11
-	MOVQ 0(SI),AX
-	MULQ 16(CX)
-	MOVQ AX,R12
-	MOVQ DX,R13
-	MOVQ 0(SI),AX
-	MULQ 24(CX)
-	MOVQ AX,R14
-	MOVQ DX,R15
-	MOVQ 0(SI),AX
-	MULQ 32(CX)
-	MOVQ AX,BX
-	MOVQ DX,BP
-	MOVQ 8(SI),AX
-	MULQ 0(CX)
-	ADDQ AX,R10
-	ADCQ DX,R11
-	MOVQ 8(SI),AX
-	MULQ 8(CX)
-	ADDQ AX,R12
-	ADCQ DX,R13
-	MOVQ 8(SI),AX
-	MULQ 16(CX)
-	ADDQ AX,R14
-	ADCQ DX,R15
-	MOVQ 8(SI),AX
-	MULQ 24(CX)
-	ADDQ AX,BX
-	ADCQ DX,BP
-	MOVQ 8(SI),DX
-	IMUL3Q $19,DX,AX
-	MULQ 32(CX)
-	ADDQ AX,R8
-	ADCQ DX,R9
-	MOVQ 16(SI),AX
-	MULQ 0(CX)
-	ADDQ AX,R12
-	ADCQ DX,R13
-	MOVQ 16(SI),AX
-	MULQ 8(CX)
-	ADDQ AX,R14
-	ADCQ DX,R15
-	MOVQ 16(SI),AX
-	MULQ 16(CX)
-	ADDQ AX,BX
-	ADCQ DX,BP
-	MOVQ 16(SI),DX
-	IMUL3Q $19,DX,AX
-	MULQ 24(CX)
-	ADDQ AX,R8
-	ADCQ DX,R9
-	MOVQ 16(SI),DX
-	IMUL3Q $19,DX,AX
-	MULQ 32(CX)
-	ADDQ AX,R10
-	ADCQ DX,R11
-	MOVQ 24(SI),AX
-	MULQ 0(CX)
-	ADDQ AX,R14
-	ADCQ DX,R15
-	MOVQ 24(SI),AX
-	MULQ 8(CX)
-	ADDQ AX,BX
-	ADCQ DX,BP
-	MOVQ 0(SP),AX
-	MULQ 24(CX)
-	ADDQ AX,R10
-	ADCQ DX,R11
-	MOVQ 0(SP),AX
-	MULQ 32(CX)
-	ADDQ AX,R12
-	ADCQ DX,R13
-	MOVQ 32(SI),AX
-	MULQ 0(CX)
-	ADDQ AX,BX
-	ADCQ DX,BP
-	MOVQ 8(SP),AX
-	MULQ 16(CX)
-	ADDQ AX,R10
-	ADCQ DX,R11
-	MOVQ 8(SP),AX
-	MULQ 24(CX)
-	ADDQ AX,R12
-	ADCQ DX,R13
-	MOVQ 8(SP),AX
-	MULQ 32(CX)
-	ADDQ AX,R14
-	ADCQ DX,R15
-	MOVQ $REDMASK51,SI
-	SHLQ $13,R8,R9
-	ANDQ SI,R8
-	SHLQ $13,R10,R11
-	ANDQ SI,R10
-	ADDQ R9,R10
-	SHLQ $13,R12,R13
-	ANDQ SI,R12
-	ADDQ R11,R12
-	SHLQ $13,R14,R15
-	ANDQ SI,R14
-	ADDQ R13,R14
-	SHLQ $13,BX,BP
-	ANDQ SI,BX
-	ADDQ R15,BX
-	IMUL3Q $19,BP,DX
-	ADDQ DX,R8
-	MOVQ R8,DX
-	SHRQ $51,DX
-	ADDQ R10,DX
-	MOVQ DX,CX
-	SHRQ $51,DX
-	ANDQ SI,R8
-	ADDQ R12,DX
-	MOVQ DX,R9
-	SHRQ $51,DX
-	ANDQ SI,CX
-	ADDQ R14,DX
-	MOVQ DX,AX
-	SHRQ $51,DX
-	ANDQ SI,R9
-	ADDQ BX,DX
-	MOVQ DX,R10
-	SHRQ $51,DX
-	ANDQ SI,AX
-	IMUL3Q $19,DX,DX
-	ADDQ DX,R8
-	ANDQ SI,R10
-	MOVQ R8,0(DI)
-	MOVQ CX,8(DI)
-	MOVQ R9,16(DI)
-	MOVQ AX,24(DI)
-	MOVQ R10,32(DI)
-	RET
--- a/vendor/golang.org/x/crypto/curve25519/square_amd64.s
+++ b/vendor/golang.org/x/crypto/curve25519/square_amd64.s
@ -1,132 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This code was translated into a form compatible with 6a from the public
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
-
-// +build amd64,!gccgo,!appengine
-
-#include "const_amd64.h"
-
-// func square(out, in *[5]uint64)
-TEXT ·square(SB),7,$0-16
-	MOVQ out+0(FP), DI
-	MOVQ in+8(FP), SI
-
-	MOVQ 0(SI),AX
-	MULQ 0(SI)
-	MOVQ AX,CX
-	MOVQ DX,R8
-	MOVQ 0(SI),AX
-	SHLQ $1,AX
-	MULQ 8(SI)
-	MOVQ AX,R9
-	MOVQ DX,R10
-	MOVQ 0(SI),AX
-	SHLQ $1,AX
-	MULQ 16(SI)
-	MOVQ AX,R11
-	MOVQ DX,R12
-	MOVQ 0(SI),AX
-	SHLQ $1,AX
-	MULQ 24(SI)
-	MOVQ AX,R13
-	MOVQ DX,R14
-	MOVQ 0(SI),AX
-	SHLQ $1,AX
-	MULQ 32(SI)
-	MOVQ AX,R15
-	MOVQ DX,BX
-	MOVQ 8(SI),AX
-	MULQ 8(SI)
-	ADDQ AX,R11
-	ADCQ DX,R12
-	MOVQ 8(SI),AX
-	SHLQ $1,AX
-	MULQ 16(SI)
-	ADDQ AX,R13
-	ADCQ DX,R14
-	MOVQ 8(SI),AX
-	SHLQ $1,AX
-	MULQ 24(SI)
-	ADDQ AX,R15
-	ADCQ DX,BX
-	MOVQ 8(SI),DX
-	IMUL3Q $38,DX,AX
-	MULQ 32(SI)
-	ADDQ AX,CX
-	ADCQ DX,R8
-	MOVQ 16(SI),AX
-	MULQ 16(SI)
-	ADDQ AX,R15
-	ADCQ DX,BX
-	MOVQ 16(SI),DX
-	IMUL3Q $38,DX,AX
-	MULQ 24(SI)
-	ADDQ AX,CX
-	ADCQ DX,R8
-	MOVQ 16(SI),DX
-	IMUL3Q $38,DX,AX
-	MULQ 32(SI)
-	ADDQ AX,R9
-	ADCQ DX,R10
-	MOVQ 24(SI),DX
-	IMUL3Q $19,DX,AX
-	MULQ 24(SI)
-	ADDQ AX,R9
-	ADCQ DX,R10
-	MOVQ 24(SI),DX
-	IMUL3Q $38,DX,AX
-	MULQ 32(SI)
-	ADDQ AX,R11
-	ADCQ DX,R12
-	MOVQ 32(SI),DX
-	IMUL3Q $19,DX,AX
-	MULQ 32(SI)
-	ADDQ AX,R13
-	ADCQ DX,R14
-	MOVQ $REDMASK51,SI
-	SHLQ $13,CX,R8
-	ANDQ SI,CX
-	SHLQ $13,R9,R10
-	ANDQ SI,R9
-	ADDQ R8,R9
-	SHLQ $13,R11,R12
-	ANDQ SI,R11
-	ADDQ R10,R11
-	SHLQ $13,R13,R14
-	ANDQ SI,R13
-	ADDQ R12,R13
-	SHLQ $13,R15,BX
-	ANDQ SI,R15
-	ADDQ R14,R15
-	IMUL3Q $19,BX,DX
-	ADDQ DX,CX
-	MOVQ CX,DX
-	SHRQ $51,DX
-	ADDQ R9,DX
-	ANDQ SI,CX
-	MOVQ DX,R8
-	SHRQ $51,DX
-	ADDQ R11,DX
-	ANDQ SI,R8
-	MOVQ DX,R9
-	SHRQ $51,DX
-	ADDQ R13,DX
-	ANDQ SI,R9
-	MOVQ DX,AX
-	SHRQ $51,DX
-	ADDQ R15,DX
-	ANDQ SI,AX
-	MOVQ DX,R10
-	SHRQ $51,DX
-	IMUL3Q $19,DX,DX
-	ADDQ DX,CX
-	ANDQ SI,R10
-	MOVQ CX,0(DI)
-	MOVQ R8,8(DI)
-	MOVQ R9,16(DI)
-	MOVQ AX,24(DI)
-	MOVQ R10,32(DI)
-	RET
--- a/vendor/golang.org/x/crypto/internal/chacha20/asm_ppc64le.s
+++ b/vendor/golang.org/x/crypto/internal/chacha20/asm_ppc64le.s
@ -1,668 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Based on CRYPTOGAMS code with the following comment:
-// # ====================================================================
-// # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-// # project. The module is, however, dual licensed under OpenSSL and
-// # CRYPTOGAMS licenses depending on where you obtain it. For further
-// # details see http://www.openssl.org/~appro/cryptogams/.
-// # ====================================================================
-
-// Original code can be found at the link below:
-// https://github.com/dot-asm/cryptogams/commit/a60f5b50ed908e91e5c39ca79126a4a876d5d8ff
-
-// There are some differences between CRYPTOGAMS code and this one. The round
-// loop for "_int" isn't the same as the original. Some adjustments were
-// necessary because there are less vector registers available.  For example, some
-// X variables (r12, r13, r14, and r15) share the same register used by the
-// counter. The original code uses ctr to name the counter. Here we use CNT
-// because golang uses CTR as the counter register name.
-
-// +build ppc64le,!gccgo,!appengine
-
-#include "textflag.h"
-
-#define OUT  R3
-#define INP  R4
-#define LEN  R5
-#define KEY  R6
-#define CNT  R7
-
-#define TEMP R8
-
-#define X0   R11
-#define X1   R12
-#define X2   R14
-#define X3   R15
-#define X4   R16
-#define X5   R17
-#define X6   R18
-#define X7   R19
-#define X8   R20
-#define X9   R21
-#define X10  R22
-#define X11  R23
-#define X12  R24
-#define X13  R25
-#define X14  R26
-#define X15  R27
-
-#define CON0 X0
-#define CON1 X1
-#define CON2 X2
-#define CON3 X3
-
-#define KEY0 X4
-#define KEY1 X5
-#define KEY2 X6
-#define KEY3 X7
-#define KEY4 X8
-#define KEY5 X9
-#define KEY6 X10
-#define KEY7 X11
-
-#define CNT0 X12
-#define CNT1 X13
-#define CNT2 X14
-#define CNT3 X15
-
-#define TMP0 R9
-#define TMP1 R10
-#define TMP2 R28
-#define TMP3 R29
-
-#define CONSTS  R8
-
-#define A0      V0
-#define B0      V1
-#define C0      V2
-#define D0      V3
-#define A1      V4
-#define B1      V5
-#define C1      V6
-#define D1      V7
-#define A2      V8
-#define B2      V9
-#define C2      V10
-#define D2      V11
-#define T0      V12
-#define T1      V13
-#define T2      V14
-
-#define K0      V15
-#define K1      V16
-#define K2      V17
-#define K3      V18
-#define K4      V19
-#define K5      V20
-
-#define FOUR    V21
-#define SIXTEEN V22
-#define TWENTY4 V23
-#define TWENTY  V24
-#define TWELVE  V25
-#define TWENTY5 V26
-#define SEVEN   V27
-
-#define INPPERM V28
-#define OUTPERM V29
-#define OUTMASK V30
-
-#define DD0     V31
-#define DD1     SEVEN
-#define DD2     T0
-#define DD3     T1
-#define DD4     T2
-
-DATA  ·consts+0x00(SB)/8, $0x3320646e61707865
-DATA  ·consts+0x08(SB)/8, $0x6b20657479622d32
-DATA  ·consts+0x10(SB)/8, $0x0000000000000001
-DATA  ·consts+0x18(SB)/8, $0x0000000000000000
-DATA  ·consts+0x20(SB)/8, $0x0000000000000004
-DATA  ·consts+0x28(SB)/8, $0x0000000000000000
-DATA  ·consts+0x30(SB)/8, $0x0a0b08090e0f0c0d
-DATA  ·consts+0x38(SB)/8, $0x0203000106070405
-DATA  ·consts+0x40(SB)/8, $0x090a0b080d0e0f0c
-DATA  ·consts+0x48(SB)/8, $0x0102030005060704
-GLOBL ·consts(SB), RODATA, $80
-
-//func chaCha20_ctr32_vmx(out, inp *byte, len int, key *[32]byte, counter *[16]byte)
-TEXT ·chaCha20_ctr32_vmx(SB),NOSPLIT|NOFRAME,$0
-	// Load the arguments inside the registers
-	MOVD out+0(FP), OUT
-	MOVD inp+8(FP), INP
-	MOVD len+16(FP), LEN
-	MOVD key+24(FP), KEY
-	MOVD counter+32(FP), CNT
-
-	MOVD $·consts(SB), CONSTS // point to consts addr
-
-	MOVD $16, X0
-	MOVD $32, X1
-	MOVD $48, X2
-	MOVD $64, X3
-	MOVD $31, X4
-	MOVD $15, X5
-
-	// Load key
-	LVX  (KEY)(R0), K1
-	LVSR (KEY)(R0), T0
-	LVX  (KEY)(X0), K2
-	LVX  (KEY)(X4), DD0
-
-	// Load counter
-	LVX  (CNT)(R0), K3
-	LVSR (CNT)(R0), T1
-	LVX  (CNT)(X5), DD1
-
-	// Load constants
-	LVX (CONSTS)(R0), K0
-	LVX (CONSTS)(X0), K5
-	LVX (CONSTS)(X1), FOUR
-	LVX (CONSTS)(X2), SIXTEEN
-	LVX (CONSTS)(X3), TWENTY4
-
-	// Align key and counter
-	VPERM K2,  K1, T0, K1
-	VPERM DD0, K2, T0, K2
-	VPERM DD1, K3, T1, K3
-
-	// Load counter to GPR
-	MOVWZ 0(CNT), CNT0
-	MOVWZ 4(CNT), CNT1
-	MOVWZ 8(CNT), CNT2
-	MOVWZ 12(CNT), CNT3
-
-	// Adjust vectors for the initial state
-	VADDUWM K3, K5, K3
-	VADDUWM K3, K5, K4
-	VADDUWM K4, K5, K5
-
-	// Synthesized constants
-	VSPLTISW $-12, TWENTY
-	VSPLTISW $12, TWELVE
-	VSPLTISW $-7, TWENTY5
-
-	VXOR T0, T0, T0
-	VSPLTISW $-1, OUTMASK
-	LVSR (INP)(R0), INPPERM
-	LVSL (OUT)(R0), OUTPERM
-	VPERM OUTMASK, T0, OUTPERM, OUTMASK
-
-loop_outer_vmx:
-	// Load constant
-	MOVD $0x61707865, CON0
-	MOVD $0x3320646e, CON1
-	MOVD $0x79622d32, CON2
-	MOVD $0x6b206574, CON3
-
-	VOR K0, K0, A0
-	VOR K0, K0, A1
-	VOR K0, K0, A2
-	VOR K1, K1, B0
-
-	MOVD $10, TEMP
-
-	// Load key to GPR
-	MOVWZ 0(KEY), X4
-	MOVWZ 4(KEY), X5
-	MOVWZ 8(KEY), X6
-	MOVWZ 12(KEY), X7
-	VOR K1, K1, B1
-	VOR K1, K1, B2
-	MOVWZ 16(KEY), X8
-	MOVWZ  0(CNT), X12
-	MOVWZ 20(KEY), X9
-	MOVWZ 4(CNT), X13
-	VOR K2, K2, C0
-	VOR K2, K2, C1
-	MOVWZ 24(KEY), X10
-	MOVWZ 8(CNT), X14
-	VOR K2, K2, C2
-	VOR K3, K3, D0
-	MOVWZ 28(KEY), X11
-	MOVWZ 12(CNT), X15
-	VOR K4, K4, D1
-	VOR K5, K5, D2
-
-	MOVD X4, TMP0
-	MOVD X5, TMP1
-	MOVD X6, TMP2
-	MOVD X7, TMP3
-	VSPLTISW $7, SEVEN
-
-	MOVD TEMP, CTR
-
-loop_vmx:
-	// CRYPTOGAMS uses a macro to create a loop using perl. This isn't possible
-	// using assembly macros.  Therefore, the macro expansion result was used
-	// in order to maintain the algorithm efficiency.
-	// This loop generates three keystream blocks using VMX instructions and,
-	// in parallel, one keystream block using scalar instructions.
-	ADD X4, X0, X0
-	ADD X5, X1, X1
-	VADDUWM A0, B0, A0
-	VADDUWM A1, B1, A1
-	ADD X6, X2, X2
-	ADD X7, X3, X3
-	VADDUWM A2, B2, A2
-	VXOR D0, A0, D0
-	XOR X0, X12, X12
-	XOR X1, X13, X13
-	VXOR D1, A1, D1
-	VXOR D2, A2, D2
-	XOR X2, X14, X14
-	XOR X3, X15, X15
-	VPERM D0, D0, SIXTEEN, D0
-	VPERM D1, D1, SIXTEEN, D1
-	ROTLW $16, X12, X12
-	ROTLW $16, X13, X13
-	VPERM D2, D2, SIXTEEN, D2
-	VADDUWM C0, D0, C0
-	ROTLW $16, X14, X14
-	ROTLW $16, X15, X15
-	VADDUWM C1, D1, C1
-	VADDUWM C2, D2, C2
-	ADD X12, X8, X8
-	ADD X13, X9, X9
-	VXOR B0, C0, T0
-	VXOR B1, C1, T1
-	ADD X14, X10, X10
-	ADD X15, X11, X11
-	VXOR B2, C2, T2
-	VRLW T0, TWELVE, B0
-	XOR X8, X4, X4
-	XOR X9, X5, X5
-	VRLW T1, TWELVE, B1
-	VRLW T2, TWELVE, B2
-	XOR X10, X6, X6
-	XOR X11, X7, X7
-	VADDUWM A0, B0, A0
-	VADDUWM A1, B1, A1
-	ROTLW $12, X4, X4
-	ROTLW $12, X5, X5
-	VADDUWM A2, B2, A2
-	VXOR D0, A0, D0
-	ROTLW $12, X6, X6
-	ROTLW $12, X7, X7
-	VXOR D1, A1, D1
-	VXOR D2, A2, D2
-	ADD X4, X0, X0
-	ADD X5, X1, X1
-	VPERM D0, D0, TWENTY4, D0
-	VPERM D1, D1, TWENTY4, D1
-	ADD X6, X2, X2
-	ADD X7, X3, X3
-	VPERM D2, D2, TWENTY4, D2
-	VADDUWM C0, D0, C0
-	XOR X0, X12, X12
-	XOR X1, X13, X13
-	VADDUWM C1, D1, C1
-	VADDUWM C2, D2, C2
-	XOR X2, X14, X14
-	XOR X3, X15, X15
-	VXOR B0, C0, T0
-	VXOR B1, C1, T1
-	ROTLW $8, X12, X12
-	ROTLW $8, X13, X13
-	VXOR B2, C2, T2
-	VRLW T0, SEVEN, B0
-	ROTLW $8, X14, X14
-	ROTLW $8, X15, X15
-	VRLW T1, SEVEN, B1
-	VRLW T2, SEVEN, B2
-	ADD X12, X8, X8
-	ADD X13, X9, X9
-	VSLDOI $8, C0, C0, C0
-	VSLDOI $8, C1, C1, C1
-	ADD X14, X10, X10
-	ADD X15, X11, X11
-	VSLDOI $8, C2, C2, C2
-	VSLDOI $12, B0, B0, B0
-	XOR X8, X4, X4
-	XOR X9, X5, X5
-	VSLDOI $12, B1, B1, B1
-	VSLDOI $12, B2, B2, B2
-	XOR X10, X6, X6
-	XOR X11, X7, X7
-	VSLDOI $4, D0, D0, D0
-	VSLDOI $4, D1, D1, D1
-	ROTLW $7, X4, X4
-	ROTLW $7, X5, X5
-	VSLDOI $4, D2, D2, D2
-	VADDUWM A0, B0, A0
-	ROTLW $7, X6, X6
-	ROTLW $7, X7, X7
-	VADDUWM A1, B1, A1
-	VADDUWM A2, B2, A2
-	ADD X5, X0, X0
-	ADD X6, X1, X1
-	VXOR D0, A0, D0
-	VXOR D1, A1, D1
-	ADD X7, X2, X2
-	ADD X4, X3, X3
-	VXOR D2, A2, D2
-	VPERM D0, D0, SIXTEEN, D0
-	XOR X0, X15, X15
-	XOR X1, X12, X12
-	VPERM D1, D1, SIXTEEN, D1
-	VPERM D2, D2, SIXTEEN, D2
-	XOR X2, X13, X13
-	XOR X3, X14, X14
-	VADDUWM C0, D0, C0
-	VADDUWM C1, D1, C1
-	ROTLW $16, X15, X15
-	ROTLW $16, X12, X12
-	VADDUWM C2, D2, C2
-	VXOR B0, C0, T0
-	ROTLW $16, X13, X13
-	ROTLW $16, X14, X14
-	VXOR B1, C1, T1
-	VXOR B2, C2, T2
-	ADD X15, X10, X10
-	ADD X12, X11, X11
-	VRLW T0, TWELVE, B0
-	VRLW T1, TWELVE, B1
-	ADD X13, X8, X8
-	ADD X14, X9, X9
-	VRLW T2, TWELVE, B2
-	VADDUWM A0, B0, A0
-	XOR X10, X5, X5
-	XOR X11, X6, X6
-	VADDUWM A1, B1, A1
-	VADDUWM A2, B2, A2
-	XOR X8, X7, X7
-	XOR X9, X4, X4
-	VXOR D0, A0, D0
-	VXOR D1, A1, D1
-	ROTLW $12, X5, X5
-	ROTLW $12, X6, X6
-	VXOR D2, A2, D2
-	VPERM D0, D0, TWENTY4, D0
-	ROTLW $12, X7, X7
-	ROTLW $12, X4, X4
-	VPERM D1, D1, TWENTY4, D1
-	VPERM D2, D2, TWENTY4, D2
-	ADD X5, X0, X0
-	ADD X6, X1, X1
-	VADDUWM C0, D0, C0
-	VADDUWM C1, D1, C1
-	ADD X7, X2, X2
-	ADD X4, X3, X3
-	VADDUWM C2, D2, C2
-	VXOR B0, C0, T0
-	XOR X0, X15, X15
-	XOR X1, X12, X12
-	VXOR B1, C1, T1
-	VXOR B2, C2, T2
-	XOR X2, X13, X13
-	XOR X3, X14, X14
-	VRLW T0, SEVEN, B0
-	VRLW T1, SEVEN, B1
-	ROTLW $8, X15, X15
-	ROTLW $8, X12, X12
-	VRLW T2, SEVEN, B2
-	VSLDOI $8, C0, C0, C0
-	ROTLW $8, X13, X13
-	ROTLW $8, X14, X14
-	VSLDOI $8, C1, C1, C1
-	VSLDOI $8, C2, C2, C2
-	ADD X15, X10, X10
-	ADD X12, X11, X11
-	VSLDOI $4, B0, B0, B0
-	VSLDOI $4, B1, B1, B1
-	ADD X13, X8, X8
-	ADD X14, X9, X9
-	VSLDOI $4, B2, B2, B2
-	VSLDOI $12, D0, D0, D0
-	XOR X10, X5, X5
-	XOR X11, X6, X6
-	VSLDOI $12, D1, D1, D1
-	VSLDOI $12, D2, D2, D2
-	XOR X8, X7, X7
-	XOR X9, X4, X4
-	ROTLW $7, X5, X5
-	ROTLW $7, X6, X6
-	ROTLW $7, X7, X7
-	ROTLW $7, X4, X4
-	BC 0x10, 0, loop_vmx
-
-	SUB $256, LEN, LEN
-
-	// Accumulate key block
-	ADD $0x61707865, X0, X0
-	ADD $0x3320646e, X1, X1
-	ADD $0x79622d32, X2, X2
-	ADD $0x6b206574, X3, X3
-	ADD TMP0, X4, X4
-	ADD TMP1, X5, X5
-	ADD TMP2, X6, X6
-	ADD TMP3, X7, X7
-	MOVWZ 16(KEY), TMP0
-	MOVWZ 20(KEY), TMP1
-	MOVWZ 24(KEY), TMP2
-	MOVWZ 28(KEY), TMP3
-	ADD TMP0, X8, X8
-	ADD TMP1, X9, X9
-	ADD TMP2, X10, X10
-	ADD TMP3, X11, X11
-
-	MOVWZ 12(CNT), TMP0
-	MOVWZ 8(CNT), TMP1
-	MOVWZ 4(CNT), TMP2
-	MOVWZ 0(CNT), TEMP
-	ADD TMP0, X15, X15
-	ADD TMP1, X14, X14
-	ADD TMP2, X13, X13
-	ADD TEMP, X12, X12
-
-	// Accumulate key block
-	VADDUWM A0, K0, A0
-	VADDUWM A1, K0, A1
-	VADDUWM A2, K0, A2
-	VADDUWM B0, K1, B0
-	VADDUWM B1, K1, B1
-	VADDUWM B2, K1, B2
-	VADDUWM C0, K2, C0
-	VADDUWM C1, K2, C1
-	VADDUWM C2, K2, C2
-	VADDUWM D0, K3, D0
-	VADDUWM D1, K4, D1
-	VADDUWM D2, K5, D2
-
-	// Increment counter
-	ADD $4, TEMP, TEMP
-	MOVW TEMP, 0(CNT)
-
-	VADDUWM K3, FOUR, K3
-	VADDUWM K4, FOUR, K4
-	VADDUWM K5, FOUR, K5
-
-	// XOR the input slice (INP) with the keystream, which is stored in GPRs (X0-X3).
-
-	// Load input (aligned or not)
-	MOVWZ 0(INP), TMP0
-	MOVWZ 4(INP), TMP1
-	MOVWZ 8(INP), TMP2
-	MOVWZ 12(INP), TMP3
-
-	// XOR with input
-	XOR TMP0, X0, X0
-	XOR TMP1, X1, X1
-	XOR TMP2, X2, X2
-	XOR TMP3, X3, X3
-	MOVWZ 16(INP), TMP0
-	MOVWZ 20(INP), TMP1
-	MOVWZ 24(INP), TMP2
-	MOVWZ 28(INP), TMP3
-	XOR TMP0, X4, X4
-	XOR TMP1, X5, X5
-	XOR TMP2, X6, X6
-	XOR TMP3, X7, X7
-	MOVWZ 32(INP), TMP0
-	MOVWZ 36(INP), TMP1
-	MOVWZ 40(INP), TMP2
-	MOVWZ 44(INP), TMP3
-	XOR TMP0, X8, X8
-	XOR TMP1, X9, X9
-	XOR TMP2, X10, X10
-	XOR TMP3, X11, X11
-	MOVWZ 48(INP), TMP0
-	MOVWZ 52(INP), TMP1
-	MOVWZ 56(INP), TMP2
-	MOVWZ 60(INP), TMP3
-	XOR TMP0, X12, X12
-	XOR TMP1, X13, X13
-	XOR TMP2, X14, X14
-	XOR TMP3, X15, X15
-
-	// Store output (aligned or not)
-	MOVW X0, 0(OUT)
-	MOVW X1, 4(OUT)
-	MOVW X2, 8(OUT)
-	MOVW X3, 12(OUT)
-
-	ADD $64, INP, INP // INP points to the end of the slice for the alignment code below
-
-	MOVW X4, 16(OUT)
-	MOVD $16, TMP0
-	MOVW X5, 20(OUT)
-	MOVD $32, TMP1
-	MOVW X6, 24(OUT)
-	MOVD $48, TMP2
-	MOVW X7, 28(OUT)
-	MOVD $64, TMP3
-	MOVW X8, 32(OUT)
-	MOVW X9, 36(OUT)
-	MOVW X10, 40(OUT)
-	MOVW X11, 44(OUT)
-	MOVW X12, 48(OUT)
-	MOVW X13, 52(OUT)
-	MOVW X14, 56(OUT)
-	MOVW X15, 60(OUT)
-	ADD $64, OUT, OUT
-
-	// Load input
-	LVX (INP)(R0), DD0
-	LVX (INP)(TMP0), DD1
-	LVX (INP)(TMP1), DD2
-	LVX (INP)(TMP2), DD3
-	LVX (INP)(TMP3), DD4
-	ADD $64, INP, INP
-
-	VPERM DD1, DD0, INPPERM, DD0 // Align input
-	VPERM DD2, DD1, INPPERM, DD1
-	VPERM DD3, DD2, INPPERM, DD2
-	VPERM DD4, DD3, INPPERM, DD3
-	VXOR A0, DD0, A0 // XOR with input
-	VXOR B0, DD1, B0
-	LVX (INP)(TMP0), DD1 // Keep loading input
-	VXOR C0, DD2, C0
-	LVX (INP)(TMP1), DD2
-	VXOR D0, DD3, D0
-	LVX (INP)(TMP2), DD3
-	LVX (INP)(TMP3), DD0
-	ADD $64, INP, INP
-	MOVD $63, TMP3 // 63 is not a typo
-	VPERM A0, A0, OUTPERM, A0
-	VPERM B0, B0, OUTPERM, B0
-	VPERM C0, C0, OUTPERM, C0
-	VPERM D0, D0, OUTPERM, D0
-
-	VPERM DD1, DD4, INPPERM, DD4 // Align input
-	VPERM DD2, DD1, INPPERM, DD1
-	VPERM DD3, DD2, INPPERM, DD2
-	VPERM DD0, DD3, INPPERM, DD3
-	VXOR A1, DD4, A1
-	VXOR B1, DD1, B1
-	LVX (INP)(TMP0), DD1 // Keep loading
-	VXOR C1, DD2, C1
-	LVX (INP)(TMP1), DD2
-	VXOR D1, DD3, D1
-	LVX (INP)(TMP2), DD3
-
-	// Note that the LVX address is always rounded down to the nearest 16-byte
-	// boundary, and that it always points to at most 15 bytes beyond the end of
-	// the slice, so we cannot cross a page boundary.
-	LVX (INP)(TMP3), DD4 // Redundant in aligned case.
-	ADD $64, INP, INP
-	VPERM A1, A1, OUTPERM, A1 // Pre-misalign output
-	VPERM B1, B1, OUTPERM, B1
-	VPERM C1, C1, OUTPERM, C1
-	VPERM D1, D1, OUTPERM, D1
-
-	VPERM DD1, DD0, INPPERM, DD0 // Align Input
-	VPERM DD2, DD1, INPPERM, DD1
-	VPERM DD3, DD2, INPPERM, DD2
-	VPERM DD4, DD3, INPPERM, DD3
-	VXOR A2, DD0, A2
-	VXOR B2, DD1, B2
-	VXOR C2, DD2, C2
-	VXOR D2, DD3, D2
-	VPERM A2, A2, OUTPERM, A2
-	VPERM B2, B2, OUTPERM, B2
-	VPERM C2, C2, OUTPERM, C2
-	VPERM D2, D2, OUTPERM, D2
-
-	ANDCC $15, OUT, X1 // Is out aligned?
-	MOVD OUT, X0
-
-	VSEL A0, B0, OUTMASK, DD0 // Collect pre-misaligned output
-	VSEL B0, C0, OUTMASK, DD1
-	VSEL C0, D0, OUTMASK, DD2
-	VSEL D0, A1, OUTMASK, DD3
-	VSEL A1, B1, OUTMASK, B0
-	VSEL B1, C1, OUTMASK, C0
-	VSEL C1, D1, OUTMASK, D0
-	VSEL D1, A2, OUTMASK, A1
-	VSEL A2, B2, OUTMASK, B1
-	VSEL B2, C2, OUTMASK, C1
-	VSEL C2, D2, OUTMASK, D1
-
-	STVX DD0, (OUT+TMP0)
-	STVX DD1, (OUT+TMP1)
-	STVX DD2, (OUT+TMP2)
-	ADD $64, OUT, OUT
-	STVX DD3, (OUT+R0)
-	STVX B0, (OUT+TMP0)
-	STVX C0, (OUT+TMP1)
-	STVX D0, (OUT+TMP2)
-	ADD $64, OUT, OUT
-	STVX A1, (OUT+R0)
-	STVX B1, (OUT+TMP0)
-	STVX C1, (OUT+TMP1)
-	STVX D1, (OUT+TMP2)
-	ADD $64, OUT, OUT
-
-	BEQ aligned_vmx
-
-	SUB X1, OUT, X2 // in misaligned case edges
-	MOVD $0, X3 // are written byte-by-byte
-
-unaligned_tail_vmx:
-	STVEBX D2, (X2+X3)
-	ADD $1, X3, X3
-	CMPW X3, X1
-	BNE unaligned_tail_vmx
-	SUB X1, X0, X2
-
-unaligned_head_vmx:
-	STVEBX A0, (X2+X1)
-	CMPW X1, $15
-	ADD $1, X1, X1
-	BNE unaligned_head_vmx
-
-	CMPU LEN, $255 // done with 256-byte block yet?
-	BGT loop_outer_vmx
-
-	JMP done_vmx
-
-aligned_vmx:
-	STVX A0, (X0+R0)
-	CMPU LEN, $255 // done with 256-byte block yet?
-	BGT loop_outer_vmx
-
-done_vmx:
-	RET
--- a/vendor/golang.org/x/crypto/internal/chacha20/chacha_arm64.go
+++ b/vendor/golang.org/x/crypto/internal/chacha20/chacha_arm64.go
@ -1,31 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build go1.11
-// +build !gccgo
-
-package chacha20
-
-const (
-	haveAsm = true
-	bufSize = 256
-)
-
-//go:noescape
-func xorKeyStreamVX(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32)
-
-func (c *Cipher) xorKeyStreamAsm(dst, src []byte) {
-
-	if len(src) >= bufSize {
-		xorKeyStreamVX(dst, src, &c.key, &c.nonce, &c.counter)
-	}
-
-	if len(src)%bufSize != 0 {
-		i := len(src) - len(src)%bufSize
-		c.buf = [bufSize]byte{}
-		copy(c.buf[:], src[i:])
-		xorKeyStreamVX(c.buf[:], c.buf[:], &c.key, &c.nonce, &c.counter)
-		c.len = bufSize - copy(dst[i:], c.buf[:len(src)%bufSize])
-	}
-}
--- a/vendor/golang.org/x/crypto/internal/chacha20/chacha_generic.go
+++ b/vendor/golang.org/x/crypto/internal/chacha20/chacha_generic.go
@ -1,264 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package ChaCha20 implements the core ChaCha20 function as specified
-// in https://tools.ietf.org/html/rfc7539#section-2.3.
-package chacha20
-
-import (
-	"crypto/cipher"
-	"encoding/binary"
-
-	"golang.org/x/crypto/internal/subtle"
-)
-
-// assert that *Cipher implements cipher.Stream
-var _ cipher.Stream = (*Cipher)(nil)
-
-// Cipher is a stateful instance of ChaCha20 using a particular key
-// and nonce. A *Cipher implements the cipher.Stream interface.
-type Cipher struct {
-	key     [8]uint32
-	counter uint32 // incremented after each block
-	nonce   [3]uint32
-	buf     [bufSize]byte // buffer for unused keystream bytes
-	len     int           // number of unused keystream bytes at end of buf
-}
-
-// New creates a new ChaCha20 stream cipher with the given key and nonce.
-// The initial counter value is set to 0.
-func New(key [8]uint32, nonce [3]uint32) *Cipher {
-	return &Cipher{key: key, nonce: nonce}
-}
-
-// ChaCha20 constants spelling "expand 32-byte k"
-const (
-	j0 uint32 = 0x61707865
-	j1 uint32 = 0x3320646e
-	j2 uint32 = 0x79622d32
-	j3 uint32 = 0x6b206574
-)
-
-func quarterRound(a, b, c, d uint32) (uint32, uint32, uint32, uint32) {
-	a += b
-	d ^= a
-	d = (d << 16) | (d >> 16)
-	c += d
-	b ^= c
-	b = (b << 12) | (b >> 20)
-	a += b
-	d ^= a
-	d = (d << 8) | (d >> 24)
-	c += d
-	b ^= c
-	b = (b << 7) | (b >> 25)
-	return a, b, c, d
-}
-
-// XORKeyStream XORs each byte in the given slice with a byte from the
-// cipher's key stream. Dst and src must overlap entirely or not at all.
-//
-// If len(dst) < len(src), XORKeyStream will panic. It is acceptable
-// to pass a dst bigger than src, and in that case, XORKeyStream will
-// only update dst[:len(src)] and will not touch the rest of dst.
-//
-// Multiple calls to XORKeyStream behave as if the concatenation of
-// the src buffers was passed in a single run. That is, Cipher
-// maintains state and does not reset at each XORKeyStream call.
-func (s *Cipher) XORKeyStream(dst, src []byte) {
-	if len(dst) < len(src) {
-		panic("chacha20: output smaller than input")
-	}
-	if subtle.InexactOverlap(dst[:len(src)], src) {
-		panic("chacha20: invalid buffer overlap")
-	}
-
-	// xor src with buffered keystream first
-	if s.len != 0 {
-		buf := s.buf[len(s.buf)-s.len:]
-		if len(src) < len(buf) {
-			buf = buf[:len(src)]
-		}
-		td, ts := dst[:len(buf)], src[:len(buf)] // BCE hint
-		for i, b := range buf {
-			td[i] = ts[i] ^ b
-		}
-		s.len -= len(buf)
-		if s.len != 0 {
-			return
-		}
-		s.buf = [len(s.buf)]byte{} // zero the empty buffer
-		src = src[len(buf):]
-		dst = dst[len(buf):]
-	}
-
-	if len(src) == 0 {
-		return
-	}
-	if haveAsm {
-		if uint64(len(src))+uint64(s.counter)*64 > (1<<38)-64 {
-			panic("chacha20: counter overflow")
-		}
-		s.xorKeyStreamAsm(dst, src)
-		return
-	}
-
-	// set up a 64-byte buffer to pad out the final block if needed
-	// (hoisted out of the main loop to avoid spills)
-	rem := len(src) % 64  // length of final block
-	fin := len(src) - rem // index of final block
-	if rem > 0 {
-		copy(s.buf[len(s.buf)-64:], src[fin:])
-	}
-
-	// pre-calculate most of the first round
-	s1, s5, s9, s13 := quarterRound(j1, s.key[1], s.key[5], s.nonce[0])
-	s2, s6, s10, s14 := quarterRound(j2, s.key[2], s.key[6], s.nonce[1])
-	s3, s7, s11, s15 := quarterRound(j3, s.key[3], s.key[7], s.nonce[2])
-
-	n := len(src)
-	src, dst = src[:n:n], dst[:n:n] // BCE hint
-	for i := 0; i < n; i += 64 {
-		// calculate the remainder of the first round
-		s0, s4, s8, s12 := quarterRound(j0, s.key[0], s.key[4], s.counter)
-
-		// execute the second round
-		x0, x5, x10, x15 := quarterRound(s0, s5, s10, s15)
-		x1, x6, x11, x12 := quarterRound(s1, s6, s11, s12)
-		x2, x7, x8, x13 := quarterRound(s2, s7, s8, s13)
-		x3, x4, x9, x14 := quarterRound(s3, s4, s9, s14)
-
-		// execute the remaining 18 rounds
-		for i := 0; i < 9; i++ {
-			x0, x4, x8, x12 = quarterRound(x0, x4, x8, x12)
-			x1, x5, x9, x13 = quarterRound(x1, x5, x9, x13)
-			x2, x6, x10, x14 = quarterRound(x2, x6, x10, x14)
-			x3, x7, x11, x15 = quarterRound(x3, x7, x11, x15)
-
-			x0, x5, x10, x15 = quarterRound(x0, x5, x10, x15)
-			x1, x6, x11, x12 = quarterRound(x1, x6, x11, x12)
-			x2, x7, x8, x13 = quarterRound(x2, x7, x8, x13)
-			x3, x4, x9, x14 = quarterRound(x3, x4, x9, x14)
-		}
-
-		x0 += j0
-		x1 += j1
-		x2 += j2
-		x3 += j3
-
-		x4 += s.key[0]
-		x5 += s.key[1]
-		x6 += s.key[2]
-		x7 += s.key[3]
-		x8 += s.key[4]
-		x9 += s.key[5]
-		x10 += s.key[6]
-		x11 += s.key[7]
-
-		x12 += s.counter
-		x13 += s.nonce[0]
-		x14 += s.nonce[1]
-		x15 += s.nonce[2]
-
-		// increment the counter
-		s.counter += 1
-		if s.counter == 0 {
-			panic("chacha20: counter overflow")
-		}
-
-		// pad to 64 bytes if needed
-		in, out := src[i:], dst[i:]
-		if i == fin {
-			// src[fin:] has already been copied into s.buf before
-			// the main loop
-			in, out = s.buf[len(s.buf)-64:], s.buf[len(s.buf)-64:]
-		}
-		in, out = in[:64], out[:64] // BCE hint
-
-		// XOR the key stream with the source and write out the result
-		xor(out[0:], in[0:], x0)
-		xor(out[4:], in[4:], x1)
-		xor(out[8:], in[8:], x2)
-		xor(out[12:], in[12:], x3)
-		xor(out[16:], in[16:], x4)
-		xor(out[20:], in[20:], x5)
-		xor(out[24:], in[24:], x6)
-		xor(out[28:], in[28:], x7)
-		xor(out[32:], in[32:], x8)
-		xor(out[36:], in[36:], x9)
-		xor(out[40:], in[40:], x10)
-		xor(out[44:], in[44:], x11)
-		xor(out[48:], in[48:], x12)
-		xor(out[52:], in[52:], x13)
-		xor(out[56:], in[56:], x14)
-		xor(out[60:], in[60:], x15)
-	}
-	// copy any trailing bytes out of the buffer and into dst
-	if rem != 0 {
-		s.len = 64 - rem
-		copy(dst[fin:], s.buf[len(s.buf)-64:])
-	}
-}
-
-// Advance discards bytes in the key stream until the next 64 byte block
-// boundary is reached and updates the counter accordingly. If the key
-// stream is already at a block boundary no bytes will be discarded and
-// the counter will be unchanged.
-func (s *Cipher) Advance() {
-	s.len -= s.len % 64
-	if s.len == 0 {
-		s.buf = [len(s.buf)]byte{}
-	}
-}
-
-// XORKeyStream crypts bytes from in to out using the given key and counters.
-// In and out must overlap entirely or not at all. Counter contains the raw
-// ChaCha20 counter bytes (i.e. block counter followed by nonce).
-func XORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) {
-	s := Cipher{
-		key: [8]uint32{
-			binary.LittleEndian.Uint32(key[0:4]),
-			binary.LittleEndian.Uint32(key[4:8]),
-			binary.LittleEndian.Uint32(key[8:12]),
-			binary.LittleEndian.Uint32(key[12:16]),
-			binary.LittleEndian.Uint32(key[16:20]),
-			binary.LittleEndian.Uint32(key[20:24]),
-			binary.LittleEndian.Uint32(key[24:28]),
-			binary.LittleEndian.Uint32(key[28:32]),
-		},
-		nonce: [3]uint32{
-			binary.LittleEndian.Uint32(counter[4:8]),
-			binary.LittleEndian.Uint32(counter[8:12]),
-			binary.LittleEndian.Uint32(counter[12:16]),
-		},
-		counter: binary.LittleEndian.Uint32(counter[0:4]),
-	}
-	s.XORKeyStream(out, in)
-}
-
-// HChaCha20 uses the ChaCha20 core to generate a derived key from a key and a
-// nonce. It should only be used as part of the XChaCha20 construction.
-func HChaCha20(key *[8]uint32, nonce *[4]uint32) [8]uint32 {
-	x0, x1, x2, x3 := j0, j1, j2, j3
-	x4, x5, x6, x7 := key[0], key[1], key[2], key[3]
-	x8, x9, x10, x11 := key[4], key[5], key[6], key[7]
-	x12, x13, x14, x15 := nonce[0], nonce[1], nonce[2], nonce[3]
-
-	for i := 0; i < 10; i++ {
-		x0, x4, x8, x12 = quarterRound(x0, x4, x8, x12)
-		x1, x5, x9, x13 = quarterRound(x1, x5, x9, x13)
-		x2, x6, x10, x14 = quarterRound(x2, x6, x10, x14)
-		x3, x7, x11, x15 = quarterRound(x3, x7, x11, x15)
-
-		x0, x5, x10, x15 = quarterRound(x0, x5, x10, x15)
-		x1, x6, x11, x12 = quarterRound(x1, x6, x11, x12)
-		x2, x7, x8, x13 = quarterRound(x2, x7, x8, x13)
-		x3, x4, x9, x14 = quarterRound(x3, x4, x9, x14)
-	}
-
-	var out [8]uint32
-	out[0], out[1], out[2], out[3] = x0, x1, x2, x3
-	out[4], out[5], out[6], out[7] = x12, x13, x14, x15
-	return out
-}
--- a/vendor/golang.org/x/crypto/internal/chacha20/chacha_ppc64le.go
+++ b/vendor/golang.org/x/crypto/internal/chacha20/chacha_ppc64le.go
@ -1,52 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ppc64le,!gccgo,!appengine
-
-package chacha20
-
-import "encoding/binary"
-
-const (
-	bufSize = 256
-	haveAsm = true
-)
-
-//go:noescape
-func chaCha20_ctr32_vmx(out, inp *byte, len int, key *[8]uint32, counter *uint32)
-
-func (c *Cipher) xorKeyStreamAsm(dst, src []byte) {
-	if len(src) >= bufSize {
-		chaCha20_ctr32_vmx(&dst[0], &src[0], len(src)-len(src)%bufSize, &c.key, &c.counter)
-	}
-	if len(src)%bufSize != 0 {
-		chaCha20_ctr32_vmx(&c.buf[0], &c.buf[0], bufSize, &c.key, &c.counter)
-		start := len(src) - len(src)%bufSize
-		ts, td, tb := src[start:], dst[start:], c.buf[:]
-		// Unroll loop to XOR 32 bytes per iteration.
-		for i := 0; i < len(ts)-32; i += 32 {
-			td, tb = td[:len(ts)], tb[:len(ts)] // bounds check elimination
-			s0 := binary.LittleEndian.Uint64(ts[0:8])
-			s1 := binary.LittleEndian.Uint64(ts[8:16])
-			s2 := binary.LittleEndian.Uint64(ts[16:24])
-			s3 := binary.LittleEndian.Uint64(ts[24:32])
-			b0 := binary.LittleEndian.Uint64(tb[0:8])
-			b1 := binary.LittleEndian.Uint64(tb[8:16])
-			b2 := binary.LittleEndian.Uint64(tb[16:24])
-			b3 := binary.LittleEndian.Uint64(tb[24:32])
-			binary.LittleEndian.PutUint64(td[0:8], s0^b0)
-			binary.LittleEndian.PutUint64(td[8:16], s1^b1)
-			binary.LittleEndian.PutUint64(td[16:24], s2^b2)
-			binary.LittleEndian.PutUint64(td[24:32], s3^b3)
-			ts, td, tb = ts[32:], td[32:], tb[32:]
-		}
-		td, tb = td[:len(ts)], tb[:len(ts)] // bounds check elimination
-		for i, v := range ts {
-			td[i] = tb[i] ^ v
-		}
-		c.len = bufSize - (len(src) % bufSize)
-
-	}
-
-}
--- a/vendor/golang.org/x/crypto/internal/chacha20/chacha_s390x.go
+++ b/vendor/golang.org/x/crypto/internal/chacha20/chacha_s390x.go
@ -1,29 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build s390x,!gccgo,!appengine
-
-package chacha20
-
-import (
-	"golang.org/x/sys/cpu"
-)
-
-var haveAsm = cpu.S390X.HasVX
-
-const bufSize = 256
-
-// xorKeyStreamVX is an assembly implementation of XORKeyStream. It must only
-// be called when the vector facility is available.
-// Implementation in asm_s390x.s.
-//go:noescape
-func xorKeyStreamVX(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32, buf *[256]byte, len *int)
-
-func (c *Cipher) xorKeyStreamAsm(dst, src []byte) {
-	xorKeyStreamVX(dst, src, &c.key, &c.nonce, &c.counter, &c.buf, &c.len)
-}
-
-// EXRL targets, DO NOT CALL!
-func mvcSrcToBuf()
-func mvcBufToDst()
--- a/vendor/golang.org/x/crypto/poly1305/bits_compat.go
+++ b/vendor/golang.org/x/crypto/poly1305/bits_compat.go
@ -0,0 +1,39 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !go1.13
+
+package poly1305
+
+// Generic fallbacks for the math/bits intrinsics, copied from
+// src/math/bits/bits.go. They were added in Go 1.12, but Add64 and Sum64 had
+// variable time fallbacks until Go 1.13.
+
+func bitsAdd64(x, y, carry uint64) (sum, carryOut uint64) {
+	sum = x + y + carry
+	carryOut = ((x & y) | ((x | y) &^ sum)) >> 63
+	return
+}
+
+func bitsSub64(x, y, borrow uint64) (diff, borrowOut uint64) {
+	diff = x - y - borrow
+	borrowOut = ((^x & y) | (^(x ^ y) & diff)) >> 63
+	return
+}
+
+func bitsMul64(x, y uint64) (hi, lo uint64) {
+	const mask32 = 1<<32 - 1
+	x0 := x & mask32
+	x1 := x >> 32
+	y0 := y & mask32
+	y1 := y >> 32
+	w0 := x0 * y0
+	t := x1*y0 + w0>>32
+	w1 := t & mask32
+	w2 := t >> 32
+	w1 += x0 * y1
+	hi = x1*y1 + w2 + w1>>32
+	lo = x * y
+	return
+}
--- a/vendor/golang.org/x/crypto/poly1305/bits_go1.13.go
+++ b/vendor/golang.org/x/crypto/poly1305/bits_go1.13.go
@ -0,0 +1,21 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build go1.13
+
+package poly1305
+
+import "math/bits"
+
+func bitsAdd64(x, y, carry uint64) (sum, carryOut uint64) {
+	return bits.Add64(x, y, carry)
+}
+
+func bitsSub64(x, y, borrow uint64) (diff, borrowOut uint64) {
+	return bits.Sub64(x, y, borrow)
+}
+
+func bitsMul64(x, y uint64) (hi, lo uint64) {
+	return bits.Mul64(x, y)
+}
--- a/vendor/golang.org/x/crypto/poly1305/mac_noasm.go
+++ b/vendor/golang.org/x/crypto/poly1305/mac_noasm.go
@ -2,10 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// +build !amd64,!ppc64le gccgo appengine
+// +build !amd64,!ppc64le,!s390x gccgo purego

 package poly1305

 type mac struct{ macGeneric }
-
-func newMAC(key *[32]byte) mac { return mac{newMACGeneric(key)} }
--- a/vendor/golang.org/x/crypto/poly1305/poly1305.go
+++ b/vendor/golang.org/x/crypto/poly1305/poly1305.go
@ -22,8 +22,16 @@ import "crypto/subtle"
 // TagSize is the size, in bytes, of a poly1305 authenticator.
 const TagSize = 16

-// Verify returns true if mac is a valid authenticator for m with the given
-// key.
+// Sum generates an authenticator for msg using a one-time key and puts the
+// 16-byte result into out. Authenticating two different messages with the same
+// key allows an attacker to forge messages at will.
+func Sum(out *[16]byte, m []byte, key *[32]byte) {
+	h := New(key)
+	h.Write(m)
+	h.Sum(out[:0])
+}
+
+// Verify returns true if mac is a valid authenticator for m with the given key.
 func Verify(mac *[16]byte, m []byte, key *[32]byte) bool {
 	var tmp [16]byte
 	Sum(&tmp, m, key)
@ -40,10 +48,9 @@ func Verify(mac *[16]byte, m []byte, key *[32]byte) bool {
 // two different messages with the same key allows an attacker
 // to forge messages at will.
 func New(key *[32]byte) *MAC {
-	return &MAC{
-		mac:       newMAC(key),
-		finalized: false,
-	}
+	m := &MAC{}
+	initialize(key, &m.macState)
+	return m
 }

 // MAC is an io.Writer computing an authentication tag
@ -52,7 +59,7 @@ func New(key *[32]byte) *MAC {
 // MAC cannot be used like common hash.Hash implementations,
 // because using a poly1305 key twice breaks its security.
 // Therefore writing data to a running MAC after calling
-// Sum causes it to panic.
+// Sum or Verify causes it to panic.
 type MAC struct {
 	mac // platform-dependent implementation

@ -65,10 +72,10 @@ func (h *MAC) Size() int { return TagSize }
 // Write adds more data to the running message authentication code.
 // It never returns an error.
 //
-// It must not be called after the first call of Sum.
+// It must not be called after the first call of Sum or Verify.
 func (h *MAC) Write(p []byte) (n int, err error) {
 	if h.finalized {
-		panic("poly1305: write to MAC after Sum")
+		panic("poly1305: write to MAC after Sum or Verify")
 	}
 	return h.mac.Write(p)
 }
@ -81,3 +88,12 @@ func (h *MAC) Sum(b []byte) []byte {
 	h.finalized = true
 	return append(b, mac[:]...)
 }
+
+// Verify returns whether the authenticator of all data written to
+// the message authentication code matches the expected value.
+func (h *MAC) Verify(expected []byte) bool {
+	var mac [TagSize]byte
+	h.mac.Sum(&mac)
+	h.finalized = true
+	return subtle.ConstantTimeCompare(expected, mac[:]) == 1
+}
--- a/vendor/golang.org/x/crypto/poly1305/sum_amd64.go
+++ b/vendor/golang.org/x/crypto/poly1305/sum_amd64.go
@ -2,67 +2,46 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// +build amd64,!gccgo,!appengine
+// +build !gccgo,!purego

 package poly1305

 //go:noescape
-func initialize(state *[7]uint64, key *[32]byte)
+func update(state *macState, msg []byte)

-//go:noescape
-func update(state *[7]uint64, msg []byte)
-
-//go:noescape
-func finalize(tag *[TagSize]byte, state *[7]uint64)
-
-// Sum generates an authenticator for m using a one-time key and puts the
-// 16-byte result into out. Authenticating two different messages with the same
-// key allows an attacker to forge messages at will.
-func Sum(out *[16]byte, m []byte, key *[32]byte) {
-	h := newMAC(key)
-	h.Write(m)
-	h.Sum(out)
-}
-
-func newMAC(key *[32]byte) (h mac) {
-	initialize(&h.state, key)
-	return
-}
-
-type mac struct {
-	state [7]uint64 // := uint64{ h0, h1, h2, r0, r1, pad0, pad1 }
-
-	buffer [TagSize]byte
-	offset int
-}
+// mac is a wrapper for macGeneric that redirects calls that would have gone to
+// updateGeneric to update.
+//
+// Its Write and Sum methods are otherwise identical to the macGeneric ones, but
+// using function pointers would carry a major performance cost.
+type mac struct{ macGeneric }

-func (h *mac) Write(p []byte) (n int, err error) {
-	n = len(p)
+func (h *mac) Write(p []byte) (int, error) {
+	nn := len(p)
 	if h.offset > 0 {
-		remaining := TagSize - h.offset
-		if n < remaining {
-			h.offset += copy(h.buffer[h.offset:], p)
-			return n, nil
+		n := copy(h.buffer[h.offset:], p)
+		if h.offset+n < TagSize {
+			h.offset += n
+			return nn, nil
 		}
-		copy(h.buffer[h.offset:], p[:remaining])
-		p = p[remaining:]
+		p = p[n:]
 		h.offset = 0
-		update(&h.state, h.buffer[:])
+		update(&h.macState, h.buffer[:])
 	}
-	if nn := len(p) - (len(p) % TagSize); nn > 0 {
-		update(&h.state, p[:nn])
-		p = p[nn:]
+	if n := len(p) - (len(p) % TagSize); n > 0 {
+		update(&h.macState, p[:n])
+		p = p[n:]
 	}
 	if len(p) > 0 {
 		h.offset += copy(h.buffer[h.offset:], p)
 	}
-	return n, nil
+	return nn, nil
 }

 func (h *mac) Sum(out *[16]byte) {
-	state := h.state
+	state := h.macState
 	if h.offset > 0 {
 		update(&state, h.buffer[:h.offset])
 	}
-	finalize(out, &state)
+	finalize(out, &state.h, &state.s)
 }
--- a/vendor/golang.org/x/crypto/poly1305/sum_amd64.s
+++ b/vendor/golang.org/x/crypto/poly1305/sum_amd64.s
@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// +build amd64,!gccgo,!appengine
+// +build !gccgo,!purego

 #include "textflag.h"

@ -54,10 +54,6 @@
 	ADCQ  t3, h1;                  \
 	ADCQ  $0, h2

-DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
-DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
-GLOBL ·poly1305Mask<>(SB), RODATA, $16
-
 // func update(state *[7]uint64, msg []byte)
 TEXT ·update(SB), $0-32
 	MOVQ state+0(FP), DI
@ -110,39 +106,3 @@ done:
 	MOVQ R9, 8(DI)
 	MOVQ R10, 16(DI)
 	RET
-
-// func initialize(state *[7]uint64, key *[32]byte)
-TEXT ·initialize(SB), $0-16
-	MOVQ state+0(FP), DI
-	MOVQ key+8(FP), SI
-
-	// state[0...7] is initialized with zero
-	MOVOU 0(SI), X0
-	MOVOU 16(SI), X1
-	MOVOU ·poly1305Mask<>(SB), X2
-	PAND  X2, X0
-	MOVOU X0, 24(DI)
-	MOVOU X1, 40(DI)
-	RET
-
-// func finalize(tag *[TagSize]byte, state *[7]uint64)
-TEXT ·finalize(SB), $0-16
-	MOVQ tag+0(FP), DI
-	MOVQ state+8(FP), SI
-
-	MOVQ    0(SI), AX
-	MOVQ    8(SI), BX
-	MOVQ    16(SI), CX
-	MOVQ    AX, R8
-	MOVQ    BX, R9
-	SUBQ    $0xFFFFFFFFFFFFFFFB, AX
-	SBBQ    $0xFFFFFFFFFFFFFFFF, BX
-	SBBQ    $3, CX
-	CMOVQCS R8, AX
-	CMOVQCS R9, BX
-	ADDQ    40(SI), AX
-	ADCQ    48(SI), BX
-
-	MOVQ AX, 0(DI)
-	MOVQ BX, 8(DI)
-	RET
--- a/vendor/golang.org/x/crypto/poly1305/sum_arm.go
+++ b/vendor/golang.org/x/crypto/poly1305/sum_arm.go
@ -1,22 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build arm,!gccgo,!appengine,!nacl
-
-package poly1305
-
-// This function is implemented in sum_arm.s
-//go:noescape
-func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]byte)
-
-// Sum generates an authenticator for m using a one-time key and puts the
-// 16-byte result into out. Authenticating two different messages with the same
-// key allows an attacker to forge messages at will.
-func Sum(out *[16]byte, m []byte, key *[32]byte) {
-	var mPtr *byte
-	if len(m) > 0 {
-		mPtr = &m[0]
-	}
-	poly1305_auth_armv6(out, mPtr, uint32(len(m)), key)
-}
--- a/vendor/golang.org/x/crypto/poly1305/sum_arm.s
+++ b/vendor/golang.org/x/crypto/poly1305/sum_arm.s
@ -1,427 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build arm,!gccgo,!appengine,!nacl
-
-#include "textflag.h"
-
-// This code was translated into a form compatible with 5a from the public
-// domain source by Andrew Moon: github.com/floodyberry/poly1305-opt/blob/master/app/extensions/poly1305.
-
-DATA ·poly1305_init_constants_armv6<>+0x00(SB)/4, $0x3ffffff
-DATA ·poly1305_init_constants_armv6<>+0x04(SB)/4, $0x3ffff03
-DATA ·poly1305_init_constants_armv6<>+0x08(SB)/4, $0x3ffc0ff
-DATA ·poly1305_init_constants_armv6<>+0x0c(SB)/4, $0x3f03fff
-DATA ·poly1305_init_constants_armv6<>+0x10(SB)/4, $0x00fffff
-GLOBL ·poly1305_init_constants_armv6<>(SB), 8, $20
-
-// Warning: the linker may use R11 to synthesize certain instructions. Please
-// take care and verify that no synthetic instructions use it.
-
-TEXT poly1305_init_ext_armv6<>(SB), NOSPLIT, $0
-	// Needs 16 bytes of stack and 64 bytes of space pointed to by R0.  (It
-	// might look like it's only 60 bytes of space but the final four bytes
-	// will be written by another function.) We need to skip over four
-	// bytes of stack because that's saving the value of 'g'.
-	ADD       $4, R13, R8
-	MOVM.IB   [R4-R7], (R8)
-	MOVM.IA.W (R1), [R2-R5]
-	MOVW      $·poly1305_init_constants_armv6<>(SB), R7
-	MOVW      R2, R8
-	MOVW      R2>>26, R9
-	MOVW      R3>>20, g
-	MOVW      R4>>14, R11
-	MOVW      R5>>8, R12
-	ORR       R3<<6, R9, R9
-	ORR       R4<<12, g, g
-	ORR       R5<<18, R11, R11
-	MOVM.IA   (R7), [R2-R6]
-	AND       R8, R2, R2
-	AND       R9, R3, R3
-	AND       g, R4, R4
-	AND       R11, R5, R5
-	AND       R12, R6, R6
-	MOVM.IA.W [R2-R6], (R0)
-	EOR       R2, R2, R2
-	EOR       R3, R3, R3
-	EOR       R4, R4, R4
-	EOR       R5, R5, R5
-	EOR       R6, R6, R6
-	MOVM.IA.W [R2-R6], (R0)
-	MOVM.IA.W (R1), [R2-R5]
-	MOVM.IA   [R2-R6], (R0)
-	ADD       $20, R13, R0
-	MOVM.DA   (R0), [R4-R7]
-	RET
-
-#define MOVW_UNALIGNED(Rsrc, Rdst, Rtmp, offset) \
-	MOVBU (offset+0)(Rsrc), Rtmp; \
-	MOVBU Rtmp, (offset+0)(Rdst); \
-	MOVBU (offset+1)(Rsrc), Rtmp; \
-	MOVBU Rtmp, (offset+1)(Rdst); \
-	MOVBU (offset+2)(Rsrc), Rtmp; \
-	MOVBU Rtmp, (offset+2)(Rdst); \
-	MOVBU (offset+3)(Rsrc), Rtmp; \
-	MOVBU Rtmp, (offset+3)(Rdst)
-
-TEXT poly1305_blocks_armv6<>(SB), NOSPLIT, $0
-	// Needs 24 bytes of stack for saved registers and then 88 bytes of
-	// scratch space after that. We assume that 24 bytes at (R13) have
-	// already been used: four bytes for the link register saved in the
-	// prelude of poly1305_auth_armv6, four bytes for saving the value of g
-	// in that function and 16 bytes of scratch space used around
-	// poly1305_finish_ext_armv6_skip1.
-	ADD     $24, R13, R12
-	MOVM.IB [R4-R8, R14], (R12)
-	MOVW    R0, 88(R13)
-	MOVW    R1, 92(R13)
-	MOVW    R2, 96(R13)
-	MOVW    R1, R14
-	MOVW    R2, R12
-	MOVW    56(R0), R8
-	WORD    $0xe1180008                // TST R8, R8 not working see issue 5921
-	EOR     R6, R6, R6
-	MOVW.EQ $(1<<24), R6
-	MOVW    R6, 84(R13)
-	ADD     $116, R13, g
-	MOVM.IA (R0), [R0-R9]
-	MOVM.IA [R0-R4], (g)
-	CMP     $16, R12
-	BLO     poly1305_blocks_armv6_done
-
-poly1305_blocks_armv6_mainloop:
-	WORD    $0xe31e0003                            // TST R14, #3 not working see issue 5921
-	BEQ     poly1305_blocks_armv6_mainloop_aligned
-	ADD     $100, R13, g
-	MOVW_UNALIGNED(R14, g, R0, 0)
-	MOVW_UNALIGNED(R14, g, R0, 4)
-	MOVW_UNALIGNED(R14, g, R0, 8)
-	MOVW_UNALIGNED(R14, g, R0, 12)
-	MOVM.IA (g), [R0-R3]
-	ADD     $16, R14
-	B       poly1305_blocks_armv6_mainloop_loaded
-
-poly1305_blocks_armv6_mainloop_aligned:
-	MOVM.IA.W (R14), [R0-R3]
-
-poly1305_blocks_armv6_mainloop_loaded:
-	MOVW    R0>>26, g
-	MOVW    R1>>20, R11
-	MOVW    R2>>14, R12
-	MOVW    R14, 92(R13)
-	MOVW    R3>>8, R4
-	ORR     R1<<6, g, g
-	ORR     R2<<12, R11, R11
-	ORR     R3<<18, R12, R12
-	BIC     $0xfc000000, R0, R0
-	BIC     $0xfc000000, g, g
-	MOVW    84(R13), R3
-	BIC     $0xfc000000, R11, R11
-	BIC     $0xfc000000, R12, R12
-	ADD     R0, R5, R5
-	ADD     g, R6, R6
-	ORR     R3, R4, R4
-	ADD     R11, R7, R7
-	ADD     $116, R13, R14
-	ADD     R12, R8, R8
-	ADD     R4, R9, R9
-	MOVM.IA (R14), [R0-R4]
-	MULLU   R4, R5, (R11, g)
-	MULLU   R3, R5, (R14, R12)
-	MULALU  R3, R6, (R11, g)
-	MULALU  R2, R6, (R14, R12)
-	MULALU  R2, R7, (R11, g)
-	MULALU  R1, R7, (R14, R12)
-	ADD     R4<<2, R4, R4
-	ADD     R3<<2, R3, R3
-	MULALU  R1, R8, (R11, g)
-	MULALU  R0, R8, (R14, R12)
-	MULALU  R0, R9, (R11, g)
-	MULALU  R4, R9, (R14, R12)
-	MOVW    g, 76(R13)
-	MOVW    R11, 80(R13)
-	MOVW    R12, 68(R13)
-	MOVW    R14, 72(R13)
-	MULLU   R2, R5, (R11, g)
-	MULLU   R1, R5, (R14, R12)
-	MULALU  R1, R6, (R11, g)
-	MULALU  R0, R6, (R14, R12)
-	MULALU  R0, R7, (R11, g)
-	MULALU  R4, R7, (R14, R12)
-	ADD     R2<<2, R2, R2
-	ADD     R1<<2, R1, R1
-	MULALU  R4, R8, (R11, g)
-	MULALU  R3, R8, (R14, R12)
-	MULALU  R3, R9, (R11, g)
-	MULALU  R2, R9, (R14, R12)
-	MOVW    g, 60(R13)
-	MOVW    R11, 64(R13)
-	MOVW    R12, 52(R13)
-	MOVW    R14, 56(R13)
-	MULLU   R0, R5, (R11, g)
-	MULALU  R4, R6, (R11, g)
-	MULALU  R3, R7, (R11, g)
-	MULALU  R2, R8, (R11, g)
-	MULALU  R1, R9, (R11, g)
-	ADD     $52, R13, R0
-	MOVM.IA (R0), [R0-R7]
-	MOVW    g>>26, R12
-	MOVW    R4>>26, R14
-	ORR     R11<<6, R12, R12
-	ORR     R5<<6, R14, R14
-	BIC     $0xfc000000, g, g
-	BIC     $0xfc000000, R4, R4
-	ADD.S   R12, R0, R0
-	ADC     $0, R1, R1
-	ADD.S   R14, R6, R6
-	ADC     $0, R7, R7
-	MOVW    R0>>26, R12
-	MOVW    R6>>26, R14
-	ORR     R1<<6, R12, R12
-	ORR     R7<<6, R14, R14
-	BIC     $0xfc000000, R0, R0
-	BIC     $0xfc000000, R6, R6
-	ADD     R14<<2, R14, R14
-	ADD.S   R12, R2, R2
-	ADC     $0, R3, R3
-	ADD     R14, g, g
-	MOVW    R2>>26, R12
-	MOVW    g>>26, R14
-	ORR     R3<<6, R12, R12
-	BIC     $0xfc000000, g, R5
-	BIC     $0xfc000000, R2, R7
-	ADD     R12, R4, R4
-	ADD     R14, R0, R0
-	MOVW    R4>>26, R12
-	BIC     $0xfc000000, R4, R8
-	ADD     R12, R6, R9
-	MOVW    96(R13), R12
-	MOVW    92(R13), R14
-	MOVW    R0, R6
-	CMP     $32, R12
-	SUB     $16, R12, R12
-	MOVW    R12, 96(R13)
-	BHS     poly1305_blocks_armv6_mainloop
-
-poly1305_blocks_armv6_done:
-	MOVW    88(R13), R12
-	MOVW    R5, 20(R12)
-	MOVW    R6, 24(R12)
-	MOVW    R7, 28(R12)
-	MOVW    R8, 32(R12)
-	MOVW    R9, 36(R12)
-	ADD     $48, R13, R0
-	MOVM.DA (R0), [R4-R8, R14]
-	RET
-
-#define MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp) \
-	MOVBU.P 1(Rsrc), Rtmp; \
-	MOVBU.P Rtmp, 1(Rdst); \
-	MOVBU.P 1(Rsrc), Rtmp; \
-	MOVBU.P Rtmp, 1(Rdst)
-
-#define MOVWP_UNALIGNED(Rsrc, Rdst, Rtmp) \
-	MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp); \
-	MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp)
-
-// func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]key)
-TEXT ·poly1305_auth_armv6(SB), $196-16
-	// The value 196, just above, is the sum of 64 (the size of the context
-	// structure) and 132 (the amount of stack needed).
-	//
-	// At this point, the stack pointer (R13) has been moved down. It
-	// points to the saved link register and there's 196 bytes of free
-	// space above it.
-	//
-	// The stack for this function looks like:
-	//
-	// +---------------------
-	// |
-	// | 64 bytes of context structure
-	// |
-	// +---------------------
-	// |
-	// | 112 bytes for poly1305_blocks_armv6
-	// |
-	// +---------------------
-	// | 16 bytes of final block, constructed at
-	// | poly1305_finish_ext_armv6_skip8
-	// +---------------------
-	// | four bytes of saved 'g'
-	// +---------------------
-	// | lr, saved by prelude    <- R13 points here
-	// +---------------------
-	MOVW g, 4(R13)
-
-	MOVW out+0(FP), R4
-	MOVW m+4(FP), R5
-	MOVW mlen+8(FP), R6
-	MOVW key+12(FP), R7
-
-	ADD  $136, R13, R0 // 136 = 4 + 4 + 16 + 112
-	MOVW R7, R1
-
-	// poly1305_init_ext_armv6 will write to the stack from R13+4, but
-	// that's ok because none of the other values have been written yet.
-	BL    poly1305_init_ext_armv6<>(SB)
-	BIC.S $15, R6, R2
-	BEQ   poly1305_auth_armv6_noblocks
-	ADD   $136, R13, R0
-	MOVW  R5, R1
-	ADD   R2, R5, R5
-	SUB   R2, R6, R6
-	BL    poly1305_blocks_armv6<>(SB)
-
-poly1305_auth_armv6_noblocks:
-	ADD  $136, R13, R0
-	MOVW R5, R1
-	MOVW R6, R2
-	MOVW R4, R3
-
-	MOVW  R0, R5
-	MOVW  R1, R6
-	MOVW  R2, R7
-	MOVW  R3, R8
-	AND.S R2, R2, R2
-	BEQ   poly1305_finish_ext_armv6_noremaining
-	EOR   R0, R0
-	ADD   $8, R13, R9                           // 8 = offset to 16 byte scratch space
-	MOVW  R0, (R9)
-	MOVW  R0, 4(R9)
-	MOVW  R0, 8(R9)
-	MOVW  R0, 12(R9)
-	WORD  $0xe3110003                           // TST R1, #3 not working see issue 5921
-	BEQ   poly1305_finish_ext_armv6_aligned
-	WORD  $0xe3120008                           // TST R2, #8 not working see issue 5921
-	BEQ   poly1305_finish_ext_armv6_skip8
-	MOVWP_UNALIGNED(R1, R9, g)
-	MOVWP_UNALIGNED(R1, R9, g)
-
-poly1305_finish_ext_armv6_skip8:
-	WORD $0xe3120004                     // TST $4, R2 not working see issue 5921
-	BEQ  poly1305_finish_ext_armv6_skip4
-	MOVWP_UNALIGNED(R1, R9, g)
-
-poly1305_finish_ext_armv6_skip4:
-	WORD $0xe3120002                     // TST $2, R2 not working see issue 5921
-	BEQ  poly1305_finish_ext_armv6_skip2
-	MOVHUP_UNALIGNED(R1, R9, g)
-	B    poly1305_finish_ext_armv6_skip2
-
-poly1305_finish_ext_armv6_aligned:
-	WORD      $0xe3120008                             // TST R2, #8 not working see issue 5921
-	BEQ       poly1305_finish_ext_armv6_skip8_aligned
-	MOVM.IA.W (R1), [g-R11]
-	MOVM.IA.W [g-R11], (R9)
-
-poly1305_finish_ext_armv6_skip8_aligned:
-	WORD   $0xe3120004                             // TST $4, R2 not working see issue 5921
-	BEQ    poly1305_finish_ext_armv6_skip4_aligned
-	MOVW.P 4(R1), g
-	MOVW.P g, 4(R9)
-
-poly1305_finish_ext_armv6_skip4_aligned:
-	WORD    $0xe3120002                     // TST $2, R2 not working see issue 5921
-	BEQ     poly1305_finish_ext_armv6_skip2
-	MOVHU.P 2(R1), g
-	MOVH.P  g, 2(R9)
-
-poly1305_finish_ext_armv6_skip2:
-	WORD    $0xe3120001                     // TST $1, R2 not working see issue 5921
-	BEQ     poly1305_finish_ext_armv6_skip1
-	MOVBU.P 1(R1), g
-	MOVBU.P g, 1(R9)
-
-poly1305_finish_ext_armv6_skip1:
-	MOVW  $1, R11
-	MOVBU R11, 0(R9)
-	MOVW  R11, 56(R5)
-	MOVW  R5, R0
-	ADD   $8, R13, R1
-	MOVW  $16, R2
-	BL    poly1305_blocks_armv6<>(SB)
-
-poly1305_finish_ext_armv6_noremaining:
-	MOVW      20(R5), R0
-	MOVW      24(R5), R1
-	MOVW      28(R5), R2
-	MOVW      32(R5), R3
-	MOVW      36(R5), R4
-	MOVW      R4>>26, R12
-	BIC       $0xfc000000, R4, R4
-	ADD       R12<<2, R12, R12
-	ADD       R12, R0, R0
-	MOVW      R0>>26, R12
-	BIC       $0xfc000000, R0, R0
-	ADD       R12, R1, R1
-	MOVW      R1>>26, R12
-	BIC       $0xfc000000, R1, R1
-	ADD       R12, R2, R2
-	MOVW      R2>>26, R12
-	BIC       $0xfc000000, R2, R2
-	ADD       R12, R3, R3
-	MOVW      R3>>26, R12
-	BIC       $0xfc000000, R3, R3
-	ADD       R12, R4, R4
-	ADD       $5, R0, R6
-	MOVW      R6>>26, R12
-	BIC       $0xfc000000, R6, R6
-	ADD       R12, R1, R7
-	MOVW      R7>>26, R12
-	BIC       $0xfc000000, R7, R7
-	ADD       R12, R2, g
-	MOVW      g>>26, R12
-	BIC       $0xfc000000, g, g
-	ADD       R12, R3, R11
-	MOVW      $-(1<<26), R12
-	ADD       R11>>26, R12, R12
-	BIC       $0xfc000000, R11, R11
-	ADD       R12, R4, R9
-	MOVW      R9>>31, R12
-	SUB       $1, R12
-	AND       R12, R6, R6
-	AND       R12, R7, R7
-	AND       R12, g, g
-	AND       R12, R11, R11
-	AND       R12, R9, R9
-	MVN       R12, R12
-	AND       R12, R0, R0
-	AND       R12, R1, R1
-	AND       R12, R2, R2
-	AND       R12, R3, R3
-	AND       R12, R4, R4
-	ORR       R6, R0, R0
-	ORR       R7, R1, R1
-	ORR       g, R2, R2
-	ORR       R11, R3, R3
-	ORR       R9, R4, R4
-	ORR       R1<<26, R0, R0
-	MOVW      R1>>6, R1
-	ORR       R2<<20, R1, R1
-	MOVW      R2>>12, R2
-	ORR       R3<<14, R2, R2
-	MOVW      R3>>18, R3
-	ORR       R4<<8, R3, R3
-	MOVW      40(R5), R6
-	MOVW      44(R5), R7
-	MOVW      48(R5), g
-	MOVW      52(R5), R11
-	ADD.S     R6, R0, R0
-	ADC.S     R7, R1, R1
-	ADC.S     g, R2, R2
-	ADC.S     R11, R3, R3
-	MOVM.IA   [R0-R3], (R8)
-	MOVW      R5, R12
-	EOR       R0, R0, R0
-	EOR       R1, R1, R1
-	EOR       R2, R2, R2
-	EOR       R3, R3, R3
-	EOR       R4, R4, R4
-	EOR       R5, R5, R5
-	EOR       R6, R6, R6
-	EOR       R7, R7, R7
-	MOVM.IA.W [R0-R7], (R12)
-	MOVM.IA   [R0-R7], (R12)
-	MOVW      4(R13), g
-	RET
--- a/vendor/golang.org/x/crypto/poly1305/sum_generic.go
+++ b/vendor/golang.org/x/crypto/poly1305/sum_generic.go
@ -2,171 +2,309 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+// This file provides the generic implementation of Sum and MAC. Other files
+// might provide optimized assembly implementations of some of this code.
+
 package poly1305

 import "encoding/binary"

-const (
-	msgBlock   = uint32(1 << 24)
-	finalBlock = uint32(0)
-)
+// Poly1305 [RFC 7539] is a relatively simple algorithm: the authentication tag
+// for a 64 bytes message is approximately
+//
+//     s + m[0:16] * r⁴ + m[16:32] * r³ + m[32:48] * r² + m[48:64] * r  mod  2¹³⁰ - 5
+//
+// for some secret r and s. It can be computed sequentially like
+//
+//     for len(msg) > 0:
+//         h += read(msg, 16)
+//         h *= r
+//         h %= 2¹³⁰ - 5
+//     return h + s
+//
+// All the complexity is about doing performant constant-time math on numbers
+// larger than any available numeric type.

-// sumGeneric generates an authenticator for msg using a one-time key and
-// puts the 16-byte result into out. This is the generic implementation of
-// Sum and should be called if no assembly implementation is available.
 func sumGeneric(out *[TagSize]byte, msg []byte, key *[32]byte) {
 	h := newMACGeneric(key)
 	h.Write(msg)
 	h.Sum(out)
 }

-func newMACGeneric(key *[32]byte) (h macGeneric) {
-	h.r[0] = binary.LittleEndian.Uint32(key[0:]) & 0x3ffffff
-	h.r[1] = (binary.LittleEndian.Uint32(key[3:]) >> 2) & 0x3ffff03
-	h.r[2] = (binary.LittleEndian.Uint32(key[6:]) >> 4) & 0x3ffc0ff
-	h.r[3] = (binary.LittleEndian.Uint32(key[9:]) >> 6) & 0x3f03fff
-	h.r[4] = (binary.LittleEndian.Uint32(key[12:]) >> 8) & 0x00fffff
-
-	h.s[0] = binary.LittleEndian.Uint32(key[16:])
-	h.s[1] = binary.LittleEndian.Uint32(key[20:])
-	h.s[2] = binary.LittleEndian.Uint32(key[24:])
-	h.s[3] = binary.LittleEndian.Uint32(key[28:])
-	return
+func newMACGeneric(key *[32]byte) macGeneric {
+	m := macGeneric{}
+	initialize(key, &m.macState)
+	return m
+}
+
+// macState holds numbers in saturated 64-bit little-endian limbs. That is,
+// the value of [x0, x1, x2] is x[0] + x[1] * 2⁶⁴ + x[2] * 2¹²⁸.
+type macState struct {
+	// h is the main accumulator. It is to be interpreted modulo 2¹³⁰ - 5, but
+	// can grow larger during and after rounds. It must, however, remain below
+	// 2 * (2¹³⁰ - 5).
+	h [3]uint64
+	// r and s are the private key components.
+	r [2]uint64
+	s [2]uint64
 }

 type macGeneric struct {
-	h, r [5]uint32
-	s    [4]uint32
+	macState

 	buffer [TagSize]byte
 	offset int
 }

-func (h *macGeneric) Write(p []byte) (n int, err error) {
-	n = len(p)
+// Write splits the incoming message into TagSize chunks, and passes them to
+// update. It buffers incomplete chunks.
+func (h *macGeneric) Write(p []byte) (int, error) {
+	nn := len(p)
 	if h.offset > 0 {
-		remaining := TagSize - h.offset
-		if n < remaining {
-			h.offset += copy(h.buffer[h.offset:], p)
-			return n, nil
+		n := copy(h.buffer[h.offset:], p)
+		if h.offset+n < TagSize {
+			h.offset += n
+			return nn, nil
 		}
-		copy(h.buffer[h.offset:], p[:remaining])
-		p = p[remaining:]
+		p = p[n:]
 		h.offset = 0
-		updateGeneric(h.buffer[:], msgBlock, &(h.h), &(h.r))
+		updateGeneric(&h.macState, h.buffer[:])
 	}
-	if nn := len(p) - (len(p) % TagSize); nn > 0 {
-		updateGeneric(p, msgBlock, &(h.h), &(h.r))
-		p = p[nn:]
+	if n := len(p) - (len(p) % TagSize); n > 0 {
+		updateGeneric(&h.macState, p[:n])
+		p = p[n:]
 	}
 	if len(p) > 0 {
 		h.offset += copy(h.buffer[h.offset:], p)
 	}
-	return n, nil
+	return nn, nil
 }

-func (h *macGeneric) Sum(out *[16]byte) {
-	H, R := h.h, h.r
+// Sum flushes the last incomplete chunk from the buffer, if any, and generates
+// the MAC output. It does not modify its state, in order to allow for multiple
+// calls to Sum, even if no Write is allowed after Sum.
+func (h *macGeneric) Sum(out *[TagSize]byte) {
+	state := h.macState
 	if h.offset > 0 {
-		var buffer [TagSize]byte
-		copy(buffer[:], h.buffer[:h.offset])
-		buffer[h.offset] = 1 // invariant: h.offset < TagSize
-		updateGeneric(buffer[:], finalBlock, &H, &R)
+		updateGeneric(&state, h.buffer[:h.offset])
 	}
-	finalizeGeneric(out, &H, &(h.s))
+	finalize(out, &state.h, &state.s)
+}
+
+// [rMask0, rMask1] is the specified Poly1305 clamping mask in little-endian. It
+// clears some bits of the secret coefficient to make it possible to implement
+// multiplication more efficiently.
+const (
+	rMask0 = 0x0FFFFFFC0FFFFFFF
+	rMask1 = 0x0FFFFFFC0FFFFFFC
+)
+
+// initialize loads the 256-bit key into the two 128-bit secret values r and s.
+func initialize(key *[32]byte, m *macState) {
+	m.r[0] = binary.LittleEndian.Uint64(key[0:8]) & rMask0
+	m.r[1] = binary.LittleEndian.Uint64(key[8:16]) & rMask1
+	m.s[0] = binary.LittleEndian.Uint64(key[16:24])
+	m.s[1] = binary.LittleEndian.Uint64(key[24:32])
+}
+
+// uint128 holds a 128-bit number as two 64-bit limbs, for use with the
+// bits.Mul64 and bits.Add64 intrinsics.
+type uint128 struct {
+	lo, hi uint64
+}
+
+func mul64(a, b uint64) uint128 {
+	hi, lo := bitsMul64(a, b)
+	return uint128{lo, hi}
 }

-func updateGeneric(msg []byte, flag uint32, h, r *[5]uint32) {
-	h0, h1, h2, h3, h4 := h[0], h[1], h[2], h[3], h[4]
-	r0, r1, r2, r3, r4 := uint64(r[0]), uint64(r[1]), uint64(r[2]), uint64(r[3]), uint64(r[4])
-	R1, R2, R3, R4 := r1*5, r2*5, r3*5, r4*5
-
-	for len(msg) >= TagSize {
-		// h += msg
-		h0 += binary.LittleEndian.Uint32(msg[0:]) & 0x3ffffff
-		h1 += (binary.LittleEndian.Uint32(msg[3:]) >> 2) & 0x3ffffff
-		h2 += (binary.LittleEndian.Uint32(msg[6:]) >> 4) & 0x3ffffff
-		h3 += (binary.LittleEndian.Uint32(msg[9:]) >> 6) & 0x3ffffff
-		h4 += (binary.LittleEndian.Uint32(msg[12:]) >> 8) | flag
-
-		// h *= r
-		d0 := (uint64(h0) * r0) + (uint64(h1) * R4) + (uint64(h2) * R3) + (uint64(h3) * R2) + (uint64(h4) * R1)
-		d1 := (d0 >> 26) + (uint64(h0) * r1) + (uint64(h1) * r0) + (uint64(h2) * R4) + (uint64(h3) * R3) + (uint64(h4) * R2)
-		d2 := (d1 >> 26) + (uint64(h0) * r2) + (uint64(h1) * r1) + (uint64(h2) * r0) + (uint64(h3) * R4) + (uint64(h4) * R3)
-		d3 := (d2 >> 26) + (uint64(h0) * r3) + (uint64(h1) * r2) + (uint64(h2) * r1) + (uint64(h3) * r0) + (uint64(h4) * R4)
-		d4 := (d3 >> 26) + (uint64(h0) * r4) + (uint64(h1) * r3) + (uint64(h2) * r2) + (uint64(h3) * r1) + (uint64(h4) * r0)
-
-		// h %= p
-		h0 = uint32(d0) & 0x3ffffff
-		h1 = uint32(d1) & 0x3ffffff
-		h2 = uint32(d2) & 0x3ffffff
-		h3 = uint32(d3) & 0x3ffffff
-		h4 = uint32(d4) & 0x3ffffff
-
-		h0 += uint32(d4>>26) * 5
-		h1 += h0 >> 26
-		h0 = h0 & 0x3ffffff
-
-		msg = msg[TagSize:]
+func add128(a, b uint128) uint128 {
+	lo, c := bitsAdd64(a.lo, b.lo, 0)
+	hi, c := bitsAdd64(a.hi, b.hi, c)
+	if c != 0 {
+		panic("poly1305: unexpected overflow")
 	}
+	return uint128{lo, hi}
+}

-	h[0], h[1], h[2], h[3], h[4] = h0, h1, h2, h3, h4
+func shiftRightBy2(a uint128) uint128 {
+	a.lo = a.lo>>2 | (a.hi&3)<<62
+	a.hi = a.hi >> 2
+	return a
 }

-func finalizeGeneric(out *[TagSize]byte, h *[5]uint32, s *[4]uint32) {
-	h0, h1, h2, h3, h4 := h[0], h[1], h[2], h[3], h[4]
-
-	// h %= p reduction
-	h2 += h1 >> 26
-	h1 &= 0x3ffffff
-	h3 += h2 >> 26
-	h2 &= 0x3ffffff
-	h4 += h3 >> 26
-	h3 &= 0x3ffffff
-	h0 += 5 * (h4 >> 26)
-	h4 &= 0x3ffffff
-	h1 += h0 >> 26
-	h0 &= 0x3ffffff
-
-	// h - p
-	t0 := h0 + 5
-	t1 := h1 + (t0 >> 26)
-	t2 := h2 + (t1 >> 26)
-	t3 := h3 + (t2 >> 26)
-	t4 := h4 + (t3 >> 26) - (1 << 26)
-	t0 &= 0x3ffffff
-	t1 &= 0x3ffffff
-	t2 &= 0x3ffffff
-	t3 &= 0x3ffffff
-
-	// select h if h < p else h - p
-	t_mask := (t4 >> 31) - 1
-	h_mask := ^t_mask
-	h0 = (h0 & h_mask) | (t0 & t_mask)
-	h1 = (h1 & h_mask) | (t1 & t_mask)
-	h2 = (h2 & h_mask) | (t2 & t_mask)
-	h3 = (h3 & h_mask) | (t3 & t_mask)
-	h4 = (h4 & h_mask) | (t4 & t_mask)
-
-	// h %= 2^128
-	h0 |= h1 << 26
-	h1 = ((h1 >> 6) | (h2 << 20))
-	h2 = ((h2 >> 12) | (h3 << 14))
-	h3 = ((h3 >> 18) | (h4 << 8))
-
-	// s: the s part of the key
-	// tag = (h + s) % (2^128)
-	t := uint64(h0) + uint64(s[0])
-	h0 = uint32(t)
-	t = uint64(h1) + uint64(s[1]) + (t >> 32)
-	h1 = uint32(t)
-	t = uint64(h2) + uint64(s[2]) + (t >> 32)
-	h2 = uint32(t)
-	t = uint64(h3) + uint64(s[3]) + (t >> 32)
-	h3 = uint32(t)
-
-	binary.LittleEndian.PutUint32(out[0:], h0)
-	binary.LittleEndian.PutUint32(out[4:], h1)
-	binary.LittleEndian.PutUint32(out[8:], h2)
-	binary.LittleEndian.PutUint32(out[12:], h3)
+// updateGeneric absorbs msg into the state.h accumulator. For each chunk m of
+// 128 bits of message, it computes
+//
+//     h₊ = (h + m) * r  mod  2¹³⁰ - 5
+//
+// If the msg length is not a multiple of TagSize, it assumes the last
+// incomplete chunk is the final one.
+func updateGeneric(state *macState, msg []byte) {
+	h0, h1, h2 := state.h[0], state.h[1], state.h[2]
+	r0, r1 := state.r[0], state.r[1]
+
+	for len(msg) > 0 {
+		var c uint64
+
+		// For the first step, h + m, we use a chain of bits.Add64 intrinsics.
+		// The resulting value of h might exceed 2¹³⁰ - 5, but will be partially
+		// reduced at the end of the multiplication below.
+		//
+		// The spec requires us to set a bit just above the message size, not to
+		// hide leading zeroes. For full chunks, that's 1 << 128, so we can just
+		// add 1 to the most significant (2¹²⁸) limb, h2.
+		if len(msg) >= TagSize {
+			h0, c = bitsAdd64(h0, binary.LittleEndian.Uint64(msg[0:8]), 0)
+			h1, c = bitsAdd64(h1, binary.LittleEndian.Uint64(msg[8:16]), c)
+			h2 += c + 1
+
+			msg = msg[TagSize:]
+		} else {
+			var buf [TagSize]byte
+			copy(buf[:], msg)
+			buf[len(msg)] = 1
+
+			h0, c = bitsAdd64(h0, binary.LittleEndian.Uint64(buf[0:8]), 0)
+			h1, c = bitsAdd64(h1, binary.LittleEndian.Uint64(buf[8:16]), c)
+			h2 += c
+
+			msg = nil
+		}
+
+		// Multiplication of big number limbs is similar to elementary school
+		// columnar multiplication. Instead of digits, there are 64-bit limbs.
+		//
+		// We are multiplying a 3 limbs number, h, by a 2 limbs number, r.
+		//
+		//                        h2    h1    h0  x
+		//                              r1    r0  =
+		//                       ----------------
+		//                      h2r0  h1r0  h0r0     <-- individual 128-bit products
+		//            +   h2r1  h1r1  h0r1
+		//               ------------------------
+		//                 m3    m2    m1    m0      <-- result in 128-bit overlapping limbs
+		//               ------------------------
+		//         m3.hi m2.hi m1.hi m0.hi           <-- carry propagation
+		//     +         m3.lo m2.lo m1.lo m0.lo
+		//        -------------------------------
+		//           t4    t3    t2    t1    t0      <-- final result in 64-bit limbs
+		//
+		// The main difference from pen-and-paper multiplication is that we do
+		// carry propagation in a separate step, as if we wrote two digit sums
+		// at first (the 128-bit limbs), and then carried the tens all at once.
+
+		h0r0 := mul64(h0, r0)
+		h1r0 := mul64(h1, r0)
+		h2r0 := mul64(h2, r0)
+		h0r1 := mul64(h0, r1)
+		h1r1 := mul64(h1, r1)
+		h2r1 := mul64(h2, r1)
+
+		// Since h2 is known to be at most 7 (5 + 1 + 1), and r0 and r1 have their
+		// top 4 bits cleared by rMask{0,1}, we know that their product is not going
+		// to overflow 64 bits, so we can ignore the high part of the products.
+		//
+		// This also means that the product doesn't have a fifth limb (t4).
+		if h2r0.hi != 0 {
+			panic("poly1305: unexpected overflow")
+		}
+		if h2r1.hi != 0 {
+			panic("poly1305: unexpected overflow")
+		}
+
+		m0 := h0r0
+		m1 := add128(h1r0, h0r1) // These two additions don't overflow thanks again
+		m2 := add128(h2r0, h1r1) // to the 4 masked bits at the top of r0 and r1.
+		m3 := h2r1
+
+		t0 := m0.lo
+		t1, c := bitsAdd64(m1.lo, m0.hi, 0)
+		t2, c := bitsAdd64(m2.lo, m1.hi, c)
+		t3, _ := bitsAdd64(m3.lo, m2.hi, c)
+
+		// Now we have the result as 4 64-bit limbs, and we need to reduce it
+		// modulo 2¹³⁰ - 5. The special shape of this Crandall prime lets us do
+		// a cheap partial reduction according to the reduction identity
+		//
+		//     c * 2¹³⁰ + n  =  c * 5 + n  mod  2¹³⁰ - 5
+		//
+		// because 2¹³⁰ = 5 mod 2¹³⁰ - 5. Partial reduction since the result is
+		// likely to be larger than 2¹³⁰ - 5, but still small enough to fit the
+		// assumptions we make about h in the rest of the code.
+		//
+		// See also https://speakerdeck.com/gtank/engineering-prime-numbers?slide=23
+
+		// We split the final result at the 2¹³⁰ mark into h and cc, the carry.
+		// Note that the carry bits are effectively shifted left by 2, in other
+		// words, cc = c * 4 for the c in the reduction identity.
+		h0, h1, h2 = t0, t1, t2&maskLow2Bits
+		cc := uint128{t2 & maskNotLow2Bits, t3}
+
+		// To add c * 5 to h, we first add cc = c * 4, and then add (cc >> 2) = c.
+
+		h0, c = bitsAdd64(h0, cc.lo, 0)
+		h1, c = bitsAdd64(h1, cc.hi, c)
+		h2 += c
+
+		cc = shiftRightBy2(cc)
+
+		h0, c = bitsAdd64(h0, cc.lo, 0)
+		h1, c = bitsAdd64(h1, cc.hi, c)
+		h2 += c
+
+		// h2 is at most 3 + 1 + 1 = 5, making the whole of h at most
+		//
+		//     5 * 2¹²⁸ + (2¹²⁸ - 1) = 6 * 2¹²⁸ - 1
+	}
+
+	state.h[0], state.h[1], state.h[2] = h0, h1, h2
+}
+
+const (
+	maskLow2Bits    uint64 = 0x0000000000000003
+	maskNotLow2Bits uint64 = ^maskLow2Bits
+)
+
+// select64 returns x if v == 1 and y if v == 0, in constant time.
+func select64(v, x, y uint64) uint64 { return ^(v-1)&x | (v-1)&y }
+
+// [p0, p1, p2] is 2¹³⁰ - 5 in little endian order.
+const (
+	p0 = 0xFFFFFFFFFFFFFFFB
+	p1 = 0xFFFFFFFFFFFFFFFF
+	p2 = 0x0000000000000003
+)
+
+// finalize completes the modular reduction of h and computes
+//
+//     out = h + s  mod  2¹²⁸
+//
+func finalize(out *[TagSize]byte, h *[3]uint64, s *[2]uint64) {
+	h0, h1, h2 := h[0], h[1], h[2]
+
+	// After the partial reduction in updateGeneric, h might be more than
+	// 2¹³⁰ - 5, but will be less than 2 * (2¹³⁰ - 5). To complete the reduction
+	// in constant time, we compute t = h - (2¹³⁰ - 5), and select h as the
+	// result if the subtraction underflows, and t otherwise.
+
+	hMinusP0, b := bitsSub64(h0, p0, 0)
+	hMinusP1, b := bitsSub64(h1, p1, b)
+	_, b = bitsSub64(h2, p2, b)
+
+	// h = h if h < p else h - p
+	h0 = select64(b, h0, hMinusP0)
+	h1 = select64(b, h1, hMinusP1)
+
+	// Finally, we compute the last Poly1305 step
+	//
+	//     tag = h + s  mod  2¹²⁸
+	//
+	// by just doing a wide addition with the 128 low bits of h and discarding
+	// the overflow.
+	h0, c := bitsAdd64(h0, s[0], 0)
+	h1, _ = bitsAdd64(h1, s[1], c)
+
+	binary.LittleEndian.PutUint64(out[0:8], h0)
+	binary.LittleEndian.PutUint64(out[8:16], h1)
 }
--- a/vendor/golang.org/x/crypto/poly1305/sum_noasm.go
+++ b/vendor/golang.org/x/crypto/poly1305/sum_noasm.go
@ -1,16 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build s390x,!go1.11 !arm,!amd64,!s390x,!ppc64le gccgo appengine nacl
-
-package poly1305
-
-// Sum generates an authenticator for msg using a one-time key and puts the
-// 16-byte result into out. Authenticating two different messages with the same
-// key allows an attacker to forge messages at will.
-func Sum(out *[TagSize]byte, msg []byte, key *[32]byte) {
-	h := newMAC(key)
-	h.Write(msg)
-	h.Sum(out)
-}
--- a/vendor/golang.org/x/crypto/poly1305/sum_ppc64le.go
+++ b/vendor/golang.org/x/crypto/poly1305/sum_ppc64le.go
@ -2,67 +2,46 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// +build ppc64le,!gccgo,!appengine
+// +build !gccgo,!purego

 package poly1305

 //go:noescape
-func initialize(state *[7]uint64, key *[32]byte)
+func update(state *macState, msg []byte)

-//go:noescape
-func update(state *[7]uint64, msg []byte)
-
-//go:noescape
-func finalize(tag *[TagSize]byte, state *[7]uint64)
-
-// Sum generates an authenticator for m using a one-time key and puts the
-// 16-byte result into out. Authenticating two different messages with the same
-// key allows an attacker to forge messages at will.
-func Sum(out *[16]byte, m []byte, key *[32]byte) {
-	h := newMAC(key)
-	h.Write(m)
-	h.Sum(out)
-}
-
-func newMAC(key *[32]byte) (h mac) {
-	initialize(&h.state, key)
-	return
-}
-
-type mac struct {
-	state [7]uint64 // := uint64{ h0, h1, h2, r0, r1, pad0, pad1 }
-
-	buffer [TagSize]byte
-	offset int
-}
+// mac is a wrapper for macGeneric that redirects calls that would have gone to
+// updateGeneric to update.
+//
+// Its Write and Sum methods are otherwise identical to the macGeneric ones, but
+// using function pointers would carry a major performance cost.
+type mac struct{ macGeneric }

-func (h *mac) Write(p []byte) (n int, err error) {
-	n = len(p)
+func (h *mac) Write(p []byte) (int, error) {
+	nn := len(p)
 	if h.offset > 0 {
-		remaining := TagSize - h.offset
-		if n < remaining {
-			h.offset += copy(h.buffer[h.offset:], p)
-			return n, nil
+		n := copy(h.buffer[h.offset:], p)
+		if h.offset+n < TagSize {
+			h.offset += n
+			return nn, nil
 		}
-		copy(h.buffer[h.offset:], p[:remaining])
-		p = p[remaining:]
+		p = p[n:]
 		h.offset = 0
-		update(&h.state, h.buffer[:])
+		update(&h.macState, h.buffer[:])
 	}
-	if nn := len(p) - (len(p) % TagSize); nn > 0 {
-		update(&h.state, p[:nn])
-		p = p[nn:]
+	if n := len(p) - (len(p) % TagSize); n > 0 {
+		update(&h.macState, p[:n])
+		p = p[n:]
 	}
 	if len(p) > 0 {
 		h.offset += copy(h.buffer[h.offset:], p)
 	}
-	return n, nil
+	return nn, nil
 }

 func (h *mac) Sum(out *[16]byte) {
-	state := h.state
+	state := h.macState
 	if h.offset > 0 {
 		update(&state, h.buffer[:h.offset])
 	}
-	finalize(out, &state)
+	finalize(out, &state.h, &state.s)
 }
--- a/vendor/golang.org/x/crypto/poly1305/sum_ppc64le.s
+++ b/vendor/golang.org/x/crypto/poly1305/sum_ppc64le.s
@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// +build ppc64le,!gccgo,!appengine
+// +build !gccgo,!purego

 #include "textflag.h"

@ -58,7 +58,6 @@ DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
 GLOBL ·poly1305Mask<>(SB), RODATA, $16

 // func update(state *[7]uint64, msg []byte)
-
 TEXT ·update(SB), $0-32
 	MOVD state+0(FP), R3
 	MOVD msg_base+8(FP), R4
@ -180,68 +179,3 @@ done:
 	MOVD R9, 8(R3)
 	MOVD R10, 16(R3)
 	RET
-
-// func initialize(state *[7]uint64, key *[32]byte)
-TEXT ·initialize(SB), $0-16
-	MOVD state+0(FP), R3
-	MOVD key+8(FP), R4
-
-	// state[0...7] is initialized with zero
-	// Load key
-	MOVD 0(R4), R5
-	MOVD 8(R4), R6
-	MOVD 16(R4), R7
-	MOVD 24(R4), R8
-
-	// Address of key mask
-	MOVD $·poly1305Mask<>(SB), R9
-
-	// Save original key in state
-	MOVD R7, 40(R3)
-	MOVD R8, 48(R3)
-
-	// Get mask
-	MOVD (R9), R7
-	MOVD 8(R9), R8
-
-	// And with key
-	AND R5, R7, R5
-	AND R6, R8, R6
-
-	// Save masked key in state
-	MOVD R5, 24(R3)
-	MOVD R6, 32(R3)
-	RET
-
-// func finalize(tag *[TagSize]byte, state *[7]uint64)
-TEXT ·finalize(SB), $0-16
-	MOVD tag+0(FP), R3
-	MOVD state+8(FP), R4
-
-	// Get h0, h1, h2 from state
-	MOVD 0(R4), R5
-	MOVD 8(R4), R6
-	MOVD 16(R4), R7
-
-	// Save h0, h1
-	MOVD  R5, R8
-	MOVD  R6, R9
-	MOVD  $3, R20
-	MOVD  $-1, R21
-	SUBC  $-5, R5
-	SUBE  R21, R6
-	SUBE  R20, R7
-	MOVD  $0, R21
-	SUBZE R21
-
-	// Check for carry
-	CMP  $0, R21
-	ISEL $2, R5, R8, R5
-	ISEL $2, R6, R9, R6
-	MOVD 40(R4), R8
-	MOVD 48(R4), R9
-	ADDC R8, R5
-	ADDE R9, R6
-	MOVD R5, 0(R3)
-	MOVD R6, 8(R3)
-	RET
--- a/vendor/golang.org/x/crypto/poly1305/sum_s390x.go
+++ b/vendor/golang.org/x/crypto/poly1305/sum_s390x.go
@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// +build s390x,go1.11,!gccgo,!appengine
+// +build !gccgo,!purego

 package poly1305

@ -10,33 +10,66 @@ import (
 	"golang.org/x/sys/cpu"
 )

-// poly1305vx is an assembly implementation of Poly1305 that uses vector
+// updateVX is an assembly implementation of Poly1305 that uses vector
 // instructions. It must only be called if the vector facility (vx) is
 // available.
 //go:noescape
-func poly1305vx(out *[16]byte, m *byte, mlen uint64, key *[32]byte)
+func updateVX(state *macState, msg []byte)

-// poly1305vmsl is an assembly implementation of Poly1305 that uses vector
-// instructions, including VMSL. It must only be called if the vector facility (vx) is
-// available and if VMSL is supported.
-//go:noescape
-func poly1305vmsl(out *[16]byte, m *byte, mlen uint64, key *[32]byte)
-
-// Sum generates an authenticator for m using a one-time key and puts the
-// 16-byte result into out. Authenticating two different messages with the same
-// key allows an attacker to forge messages at will.
-func Sum(out *[16]byte, m []byte, key *[32]byte) {
-	if cpu.S390X.HasVX {
-		var mPtr *byte
-		if len(m) > 0 {
-			mPtr = &m[0]
+// mac is a replacement for macGeneric that uses a larger buffer and redirects
+// calls that would have gone to updateGeneric to updateVX if the vector
+// facility is installed.
+//
+// A larger buffer is required for good performance because the vector
+// implementation has a higher fixed cost per call than the generic
+// implementation.
+type mac struct {
+	macState
+
+	buffer [16 * TagSize]byte // size must be a multiple of block size (16)
+	offset int
+}
+
+func (h *mac) Write(p []byte) (int, error) {
+	nn := len(p)
+	if h.offset > 0 {
+		n := copy(h.buffer[h.offset:], p)
+		if h.offset+n < len(h.buffer) {
+			h.offset += n
+			return nn, nil
+		}
+		p = p[n:]
+		h.offset = 0
+		if cpu.S390X.HasVX {
+			updateVX(&h.macState, h.buffer[:])
+		} else {
+			updateGeneric(&h.macState, h.buffer[:])
 		}
-		if cpu.S390X.HasVXE && len(m) > 256 {
-			poly1305vmsl(out, mPtr, uint64(len(m)), key)
+	}
+
+	tail := len(p) % len(h.buffer) // number of bytes to copy into buffer
+	body := len(p) - tail          // number of bytes to process now
+	if body > 0 {
+		if cpu.S390X.HasVX {
+			updateVX(&h.macState, p[:body])
 		} else {
-			poly1305vx(out, mPtr, uint64(len(m)), key)
+			updateGeneric(&h.macState, p[:body])
 		}
-	} else {
-		sumGeneric(out, m, key)
 	}
+	h.offset = copy(h.buffer[:], p[body:]) // copy tail bytes - can be 0
+	return nn, nil
+}
+
+func (h *mac) Sum(out *[TagSize]byte) {
+	state := h.macState
+	remainder := h.buffer[:h.offset]
+
+	// Use the generic implementation if we have 2 or fewer blocks left
+	// to sum. The vector implementation has a higher startup time.
+	if cpu.S390X.HasVX && len(remainder) > 2*TagSize {
+		updateVX(&state, remainder)
+	} else if len(remainder) > 0 {
+		updateGeneric(&state, remainder)
+	}
+	finalize(out, &state.h, &state.s)
 }
--- a/vendor/golang.org/x/crypto/poly1305/sum_s390x.s
+++ b/vendor/golang.org/x/crypto/poly1305/sum_s390x.s
@ -2,115 +2,187 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// +build s390x,go1.11,!gccgo,!appengine
+// +build !gccgo,!purego

 #include "textflag.h"

-// Implementation of Poly1305 using the vector facility (vx).
-
-// constants
-#define MOD26 V0
-#define EX0   V1
-#define EX1   V2
-#define EX2   V3
-
-// temporaries
-#define T_0 V4
-#define T_1 V5
-#define T_2 V6
-#define T_3 V7
-#define T_4 V8
-
-// key (r)
-#define R_0  V9
-#define R_1  V10
-#define R_2  V11
-#define R_3  V12
-#define R_4  V13
-#define R5_1 V14
-#define R5_2 V15
-#define R5_3 V16
-#define R5_4 V17
-#define RSAVE_0 R5
-#define RSAVE_1 R6
-#define RSAVE_2 R7
-#define RSAVE_3 R8
-#define RSAVE_4 R9
-#define R5SAVE_1 V28
-#define R5SAVE_2 V29
-#define R5SAVE_3 V30
-#define R5SAVE_4 V31
-
-// message block
-#define F_0 V18
-#define F_1 V19
-#define F_2 V20
-#define F_3 V21
-#define F_4 V22
-
-// accumulator
-#define H_0 V23
-#define H_1 V24
-#define H_2 V25
-#define H_3 V26
-#define H_4 V27
-
-GLOBL ·keyMask<>(SB), RODATA, $16
-DATA ·keyMask<>+0(SB)/8, $0xffffff0ffcffff0f
-DATA ·keyMask<>+8(SB)/8, $0xfcffff0ffcffff0f
-
-GLOBL ·bswapMask<>(SB), RODATA, $16
-DATA ·bswapMask<>+0(SB)/8, $0x0f0e0d0c0b0a0908
-DATA ·bswapMask<>+8(SB)/8, $0x0706050403020100
-
-GLOBL ·constants<>(SB), RODATA, $64
-// MOD26
-DATA ·constants<>+0(SB)/8, $0x3ffffff
-DATA ·constants<>+8(SB)/8, $0x3ffffff
+// This implementation of Poly1305 uses the vector facility (vx)
+// to process up to 2 blocks (32 bytes) per iteration using an
+// algorithm based on the one described in:
+//
+// NEON crypto, Daniel J. Bernstein & Peter Schwabe
+// https://cryptojedi.org/papers/neoncrypto-20120320.pdf
+//
+// This algorithm uses 5 26-bit limbs to represent a 130-bit
+// value. These limbs are, for the most part, zero extended and
+// placed into 64-bit vector register elements. Each vector
+// register is 128-bits wide and so holds 2 of these elements.
+// Using 26-bit limbs allows us plenty of headroom to accomodate
+// accumulations before and after multiplication without
+// overflowing either 32-bits (before multiplication) or 64-bits
+// (after multiplication).
+//
+// In order to parallelise the operations required to calculate
+// the sum we use two separate accumulators and then sum those
+// in an extra final step. For compatibility with the generic
+// implementation we perform this summation at the end of every
+// updateVX call.
+//
+// To use two accumulators we must multiply the message blocks
+// by r² rather than r. Only the final message block should be
+// multiplied by r.
+//
+// Example:
+//
+// We want to calculate the sum (h) for a 64 byte message (m):
+//
+//   h = m[0:16]r⁴ + m[16:32]r³ + m[32:48]r² + m[48:64]r
+//
+// To do this we split the calculation into the even indices
+// and odd indices of the message. These form our SIMD 'lanes':
+//
+//   h = m[ 0:16]r⁴ + m[32:48]r² +   <- lane 0
+//       m[16:32]r³ + m[48:64]r      <- lane 1
+//
+// To calculate this iteratively we refactor so that both lanes
+// are written in terms of r² and r:
+//
+//   h = (m[ 0:16]r² + m[32:48])r² + <- lane 0
+//       (m[16:32]r² + m[48:64])r    <- lane 1
+//                ^             ^
+//                |             coefficients for second iteration
+//                coefficients for first iteration
+//
+// So in this case we would have two iterations. In the first
+// both lanes are multiplied by r². In the second only the
+// first lane is multiplied by r² and the second lane is
+// instead multiplied by r. This gives use the odd and even
+// powers of r that we need from the original equation.
+//
+// Notation:
+//
+//   h - accumulator
+//   r - key
+//   m - message
+//
+//   [a, b]       - SIMD register holding two 64-bit values
+//   [a, b, c, d] - SIMD register holding four 32-bit values
+//   xᵢ[n]        - limb n of variable x with bit width i
+//
+// Limbs are expressed in little endian order, so for 26-bit
+// limbs x₂₆[4] will be the most significant limb and x₂₆[0]
+// will be the least significant limb.
+
+// masking constants
+#define MOD24 V0 // [0x0000000000ffffff, 0x0000000000ffffff] - mask low 24-bits
+#define MOD26 V1 // [0x0000000003ffffff, 0x0000000003ffffff] - mask low 26-bits
+
+// expansion constants (see EXPAND macro)
+#define EX0 V2
+#define EX1 V3
+#define EX2 V4
+
+// key (r², r or 1 depending on context)
+#define R_0 V5
+#define R_1 V6
+#define R_2 V7
+#define R_3 V8
+#define R_4 V9
+
+// precalculated coefficients (5r², 5r or 0 depending on context)
+#define R5_1 V10
+#define R5_2 V11
+#define R5_3 V12
+#define R5_4 V13
+
+// message block (m)
+#define M_0 V14
+#define M_1 V15
+#define M_2 V16
+#define M_3 V17
+#define M_4 V18
+
+// accumulator (h)
+#define H_0 V19
+#define H_1 V20
+#define H_2 V21
+#define H_3 V22
+#define H_4 V23
+
+// temporary registers (for short-lived values)
+#define T_0 V24
+#define T_1 V25
+#define T_2 V26
+#define T_3 V27
+#define T_4 V28
+
+GLOBL ·constants<>(SB), RODATA, $0x30
 // EX0
-DATA ·constants<>+16(SB)/8, $0x0006050403020100
-DATA ·constants<>+24(SB)/8, $0x1016151413121110
+DATA ·constants<>+0x00(SB)/8, $0x0006050403020100
+DATA ·constants<>+0x08(SB)/8, $0x1016151413121110
 // EX1
-DATA ·constants<>+32(SB)/8, $0x060c0b0a09080706
-DATA ·constants<>+40(SB)/8, $0x161c1b1a19181716
+DATA ·constants<>+0x10(SB)/8, $0x060c0b0a09080706
+DATA ·constants<>+0x18(SB)/8, $0x161c1b1a19181716
 // EX2
-DATA ·constants<>+48(SB)/8, $0x0d0d0d0d0d0f0e0d
-DATA ·constants<>+56(SB)/8, $0x1d1d1d1d1d1f1e1d
-
-// h = (f*g) % (2**130-5) [partial reduction]
+DATA ·constants<>+0x20(SB)/8, $0x0d0d0d0d0d0f0e0d
+DATA ·constants<>+0x28(SB)/8, $0x1d1d1d1d1d1f1e1d
+
+// MULTIPLY multiplies each lane of f and g, partially reduced
+// modulo 2¹³⁰ - 5. The result, h, consists of partial products
+// in each lane that need to be reduced further to produce the
+// final result.
+//
+//   h₁₃₀ = (f₁₃₀g₁₃₀) % 2¹³⁰ + (5f₁₃₀g₁₃₀) / 2¹³⁰
+//
+// Note that the multiplication by 5 of the high bits is
+// achieved by precalculating the multiplication of four of the
+// g coefficients by 5. These are g51-g54.
 #define MULTIPLY(f0, f1, f2, f3, f4, g0, g1, g2, g3, g4, g51, g52, g53, g54, h0, h1, h2, h3, h4) \
 	VMLOF  f0, g0, h0        \
-	VMLOF  f0, g1, h1        \
-	VMLOF  f0, g2, h2        \
 	VMLOF  f0, g3, h3        \
+	VMLOF  f0, g1, h1        \
 	VMLOF  f0, g4, h4        \
+	VMLOF  f0, g2, h2        \
 	VMLOF  f1, g54, T_0      \
-	VMLOF  f1, g0, T_1       \
-	VMLOF  f1, g1, T_2       \
 	VMLOF  f1, g2, T_3       \
+	VMLOF  f1, g0, T_1       \
 	VMLOF  f1, g3, T_4       \
+	VMLOF  f1, g1, T_2       \
 	VMALOF f2, g53, h0, h0   \
-	VMALOF f2, g54, h1, h1   \
-	VMALOF f2, g0, h2, h2    \
 	VMALOF f2, g1, h3, h3    \
+	VMALOF f2, g54, h1, h1   \
 	VMALOF f2, g2, h4, h4    \
+	VMALOF f2, g0, h2, h2    \
 	VMALOF f3, g52, T_0, T_0 \
-	VMALOF f3, g53, T_1, T_1 \
-	VMALOF f3, g54, T_2, T_2 \
 	VMALOF f3, g0, T_3, T_3  \
+	VMALOF f3, g53, T_1, T_1 \
 	VMALOF f3, g1, T_4, T_4  \
+	VMALOF f3, g54, T_2, T_2 \
 	VMALOF f4, g51, h0, h0   \
-	VMALOF f4, g52, h1, h1   \
-	VMALOF f4, g53, h2, h2   \
 	VMALOF f4, g54, h3, h3   \
+	VMALOF f4, g52, h1, h1   \
 	VMALOF f4, g0, h4, h4    \
+	VMALOF f4, g53, h2, h2   \
 	VAG    T_0, h0, h0       \
-	VAG    T_1, h1, h1       \
-	VAG    T_2, h2, h2       \
 	VAG    T_3, h3, h3       \
-	VAG    T_4, h4, h4
-
-// carry h0->h1 h3->h4, h1->h2 h4->h0, h0->h1 h2->h3, h3->h4
+	VAG    T_1, h1, h1       \
+	VAG    T_4, h4, h4       \
+	VAG    T_2, h2, h2
+
+// REDUCE performs the following carry operations in four
+// stages, as specified in Bernstein & Schwabe:
+//
+//   1: h₂₆[0]->h₂₆[1] h₂₆[3]->h₂₆[4]
+//   2: h₂₆[1]->h₂₆[2] h₂₆[4]->h₂₆[0]
+//   3: h₂₆[0]->h₂₆[1] h₂₆[2]->h₂₆[3]
+//   4: h₂₆[3]->h₂₆[4]
+//
+// The result is that all of the limbs are limited to 26-bits
+// except for h₂₆[1] and h₂₆[4] which are limited to 27-bits.
+//
+// Note that although each limb is aligned at 26-bit intervals
+// they may contain values that exceed 2²⁶ - 1, hence the need
+// to carry the excess bits in each limb.
 #define REDUCE(h0, h1, h2, h3, h4) \
 	VESRLG $26, h0, T_0  \
 	VESRLG $26, h3, T_1  \
@ -136,144 +208,155 @@ DATA ·constants<>+56(SB)/8, $0x1d1d1d1d1d1f1e1d
 	VN     MOD26, h3, h3 \
 	VAG    T_2, h4, h4

-// expand in0 into d[0] and in1 into d[1]
+// EXPAND splits the 128-bit little-endian values in0 and in1
+// into 26-bit big-endian limbs and places the results into
+// the first and second lane of d₂₆[0:4] respectively.
+//
+// The EX0, EX1 and EX2 constants are arrays of byte indices
+// for permutation. The permutation both reverses the bytes
+// in the input and ensures the bytes are copied into the
+// destination limb ready to be shifted into their final
+// position.
 #define EXPAND(in0, in1, d0, d1, d2, d3, d4) \
-	VGBM   $0x0707, d1       \ // d1=tmp
-	VPERM  in0, in1, EX2, d4 \
 	VPERM  in0, in1, EX0, d0 \
 	VPERM  in0, in1, EX1, d2 \
-	VN     d1, d4, d4        \
+	VPERM  in0, in1, EX2, d4 \
 	VESRLG $26, d0, d1       \
 	VESRLG $30, d2, d3       \
 	VESRLG $4, d2, d2        \
-	VN     MOD26, d0, d0     \
-	VN     MOD26, d1, d1     \
-	VN     MOD26, d2, d2     \
-	VN     MOD26, d3, d3
-
-// pack h4:h0 into h1:h0 (no carry)
-#define PACK(h0, h1, h2, h3, h4) \
-	VESLG $26, h1, h1  \
-	VESLG $26, h3, h3  \
-	VO    h0, h1, h0   \
-	VO    h2, h3, h2   \
-	VESLG $4, h2, h2   \
-	VLEIB $7, $48, h1  \
-	VSLB  h1, h2, h2   \
-	VO    h0, h2, h0   \
-	VLEIB $7, $104, h1 \
-	VSLB  h1, h4, h3   \
-	VO    h3, h0, h0   \
-	VLEIB $7, $24, h1  \
-	VSRLB h1, h4, h1
-
-// if h > 2**130-5 then h -= 2**130-5
-#define MOD(h0, h1, t0, t1, t2) \
-	VZERO t0          \
-	VLEIG $1, $5, t0  \
-	VACCQ h0, t0, t1  \
-	VAQ   h0, t0, t0  \
-	VONE  t2          \
-	VLEIG $1, $-4, t2 \
-	VAQ   t2, t1, t1  \
-	VACCQ h1, t1, t1  \
-	VONE  t2          \
-	VAQ   t2, t1, t1  \
-	VN    h0, t1, t2  \
-	VNC   t0, t1, t1  \
-	VO    t1, t2, h0
-
-// func poly1305vx(out *[16]byte, m *byte, mlen uint64, key *[32]key)
-TEXT ·poly1305vx(SB), $0-32
-	// This code processes up to 2 blocks (32 bytes) per iteration
-	// using the algorithm described in:
-	// NEON crypto, Daniel J. Bernstein & Peter Schwabe
-	// https://cryptojedi.org/papers/neoncrypto-20120320.pdf
-	LMG out+0(FP), R1, R4 // R1=out, R2=m, R3=mlen, R4=key
-
-	// load MOD26, EX0, EX1 and EX2
+	VN     MOD26, d0, d0     \ // [in0₂₆[0], in1₂₆[0]]
+	VN     MOD26, d3, d3     \ // [in0₂₆[3], in1₂₆[3]]
+	VN     MOD26, d1, d1     \ // [in0₂₆[1], in1₂₆[1]]
+	VN     MOD24, d4, d4     \ // [in0₂₆[4], in1₂₆[4]]
+	VN     MOD26, d2, d2     // [in0₂₆[2], in1₂₆[2]]
+
+// func updateVX(state *macState, msg []byte)
+TEXT ·updateVX(SB), NOSPLIT, $0
+	MOVD state+0(FP), R1
+	LMG  msg+8(FP), R2, R3 // R2=msg_base, R3=msg_len
+
+	// load EX0, EX1 and EX2
 	MOVD $·constants<>(SB), R5
-	VLM  (R5), MOD26, EX2
-
-	// setup r
-	VL   (R4), T_0
-	MOVD $·keyMask<>(SB), R6
-	VL   (R6), T_1
-	VN   T_0, T_1, T_0
-	EXPAND(T_0, T_0, R_0, R_1, R_2, R_3, R_4)
-
-	// setup r*5
-	VLEIG $0, $5, T_0
-	VLEIG $1, $5, T_0
-
-	// store r (for final block)
-	VMLOF T_0, R_1, R5SAVE_1
-	VMLOF T_0, R_2, R5SAVE_2
-	VMLOF T_0, R_3, R5SAVE_3
-	VMLOF T_0, R_4, R5SAVE_4
-	VLGVG $0, R_0, RSAVE_0
-	VLGVG $0, R_1, RSAVE_1
-	VLGVG $0, R_2, RSAVE_2
-	VLGVG $0, R_3, RSAVE_3
-	VLGVG $0, R_4, RSAVE_4
-
-	// skip r**2 calculation
+	VLM  (R5), EX0, EX2
+
+	// generate masks
+	VGMG $(64-24), $63, MOD24 // [0x00ffffff, 0x00ffffff]
+	VGMG $(64-26), $63, MOD26 // [0x03ffffff, 0x03ffffff]
+
+	// load h (accumulator) and r (key) from state
+	VZERO T_1               // [0, 0]
+	VL    0(R1), T_0        // [h₆₄[0], h₆₄[1]]
+	VLEG  $0, 16(R1), T_1   // [h₆₄[2], 0]
+	VL    24(R1), T_2       // [r₆₄[0], r₆₄[1]]
+	VPDI  $0, T_0, T_2, T_3 // [h₆₄[0], r₆₄[0]]
+	VPDI  $5, T_0, T_2, T_4 // [h₆₄[1], r₆₄[1]]
+
+	// unpack h and r into 26-bit limbs
+	// note: h₆₄[2] may have the low 3 bits set, so h₂₆[4] is a 27-bit value
+	VN     MOD26, T_3, H_0            // [h₂₆[0], r₂₆[0]]
+	VZERO  H_1                        // [0, 0]
+	VZERO  H_3                        // [0, 0]
+	VGMG   $(64-12-14), $(63-12), T_0 // [0x03fff000, 0x03fff000] - 26-bit mask with low 12 bits masked out
+	VESLG  $24, T_1, T_1              // [h₆₄[2]<<24, 0]
+	VERIMG $-26&63, T_3, MOD26, H_1   // [h₂₆[1], r₂₆[1]]
+	VESRLG $+52&63, T_3, H_2          // [h₂₆[2], r₂₆[2]] - low 12 bits only
+	VERIMG $-14&63, T_4, MOD26, H_3   // [h₂₆[1], r₂₆[1]]
+	VESRLG $40, T_4, H_4              // [h₂₆[4], r₂₆[4]] - low 24 bits only
+	VERIMG $+12&63, T_4, T_0, H_2     // [h₂₆[2], r₂₆[2]] - complete
+	VO     T_1, H_4, H_4              // [h₂₆[4], r₂₆[4]] - complete
+
+	// replicate r across all 4 vector elements
+	VREPF $3, H_0, R_0 // [r₂₆[0], r₂₆[0], r₂₆[0], r₂₆[0]]
+	VREPF $3, H_1, R_1 // [r₂₆[1], r₂₆[1], r₂₆[1], r₂₆[1]]
+	VREPF $3, H_2, R_2 // [r₂₆[2], r₂₆[2], r₂₆[2], r₂₆[2]]
+	VREPF $3, H_3, R_3 // [r₂₆[3], r₂₆[3], r₂₆[3], r₂₆[3]]
+	VREPF $3, H_4, R_4 // [r₂₆[4], r₂₆[4], r₂₆[4], r₂₆[4]]
+
+	// zero out lane 1 of h
+	VLEIG $1, $0, H_0 // [h₂₆[0], 0]
+	VLEIG $1, $0, H_1 // [h₂₆[1], 0]
+	VLEIG $1, $0, H_2 // [h₂₆[2], 0]
+	VLEIG $1, $0, H_3 // [h₂₆[3], 0]
+	VLEIG $1, $0, H_4 // [h₂₆[4], 0]
+
+	// calculate 5r (ignore least significant limb)
+	VREPIF $5, T_0
+	VMLF   T_0, R_1, R5_1 // [5r₂₆[1], 5r₂₆[1], 5r₂₆[1], 5r₂₆[1]]
+	VMLF   T_0, R_2, R5_2 // [5r₂₆[2], 5r₂₆[2], 5r₂₆[2], 5r₂₆[2]]
+	VMLF   T_0, R_3, R5_3 // [5r₂₆[3], 5r₂₆[3], 5r₂₆[3], 5r₂₆[3]]
+	VMLF   T_0, R_4, R5_4 // [5r₂₆[4], 5r₂₆[4], 5r₂₆[4], 5r₂₆[4]]
+
+	// skip r² calculation if we are only calculating one block
 	CMPBLE R3, $16, skip

-	// calculate r**2
-	MULTIPLY(R_0, R_1, R_2, R_3, R_4, R_0, R_1, R_2, R_3, R_4, R5SAVE_1, R5SAVE_2, R5SAVE_3, R5SAVE_4, H_0, H_1, H_2, H_3, H_4)
-	REDUCE(H_0, H_1, H_2, H_3, H_4)
-	VLEIG $0, $5, T_0
-	VLEIG $1, $5, T_0
-	VMLOF T_0, H_1, R5_1
-	VMLOF T_0, H_2, R5_2
-	VMLOF T_0, H_3, R5_3
-	VMLOF T_0, H_4, R5_4
-	VLR   H_0, R_0
-	VLR   H_1, R_1
-	VLR   H_2, R_2
-	VLR   H_3, R_3
-	VLR   H_4, R_4
-
-	// initialize h
-	VZERO H_0
-	VZERO H_1
-	VZERO H_2
-	VZERO H_3
-	VZERO H_4
+	// calculate r²
+	MULTIPLY(R_0, R_1, R_2, R_3, R_4, R_0, R_1, R_2, R_3, R_4, R5_1, R5_2, R5_3, R5_4, M_0, M_1, M_2, M_3, M_4)
+	REDUCE(M_0, M_1, M_2, M_3, M_4)
+	VGBM   $0x0f0f, T_0
+	VERIMG $0, M_0, T_0, R_0 // [r₂₆[0], r²₂₆[0], r₂₆[0], r²₂₆[0]]
+	VERIMG $0, M_1, T_0, R_1 // [r₂₆[1], r²₂₆[1], r₂₆[1], r²₂₆[1]]
+	VERIMG $0, M_2, T_0, R_2 // [r₂₆[2], r²₂₆[2], r₂₆[2], r²₂₆[2]]
+	VERIMG $0, M_3, T_0, R_3 // [r₂₆[3], r²₂₆[3], r₂₆[3], r²₂₆[3]]
+	VERIMG $0, M_4, T_0, R_4 // [r₂₆[4], r²₂₆[4], r₂₆[4], r²₂₆[4]]
+
+	// calculate 5r² (ignore least significant limb)
+	VREPIF $5, T_0
+	VMLF   T_0, R_1, R5_1 // [5r₂₆[1], 5r²₂₆[1], 5r₂₆[1], 5r²₂₆[1]]
+	VMLF   T_0, R_2, R5_2 // [5r₂₆[2], 5r²₂₆[2], 5r₂₆[2], 5r²₂₆[2]]
+	VMLF   T_0, R_3, R5_3 // [5r₂₆[3], 5r²₂₆[3], 5r₂₆[3], 5r²₂₆[3]]
+	VMLF   T_0, R_4, R5_4 // [5r₂₆[4], 5r²₂₆[4], 5r₂₆[4], 5r²₂₆[4]]

 loop:
-	CMPBLE R3, $32, b2
-	VLM    (R2), T_0, T_1
-	SUB    $32, R3
-	MOVD   $32(R2), R2
-	EXPAND(T_0, T_1, F_0, F_1, F_2, F_3, F_4)
-	VLEIB  $4, $1, F_4
-	VLEIB  $12, $1, F_4
+	CMPBLE R3, $32, b2 // 2 or fewer blocks remaining, need to change key coefficients
+
+	// load next 2 blocks from message
+	VLM (R2), T_0, T_1
+
+	// update message slice
+	SUB  $32, R3
+	MOVD $32(R2), R2
+
+	// unpack message blocks into 26-bit big-endian limbs
+	EXPAND(T_0, T_1, M_0, M_1, M_2, M_3, M_4)
+
+	// add 2¹²⁸ to each message block value
+	VLEIB $4, $1, M_4
+	VLEIB $12, $1, M_4

 multiply:
-	VAG    H_0, F_0, F_0
-	VAG    H_1, F_1, F_1
-	VAG    H_2, F_2, F_2
-	VAG    H_3, F_3, F_3
-	VAG    H_4, F_4, F_4
-	MULTIPLY(F_0, F_1, F_2, F_3, F_4, R_0, R_1, R_2, R_3, R_4, R5_1, R5_2, R5_3, R5_4, H_0, H_1, H_2, H_3, H_4)
+	// accumulate the incoming message
+	VAG H_0, M_0, M_0
+	VAG H_3, M_3, M_3
+	VAG H_1, M_1, M_1
+	VAG H_4, M_4, M_4
+	VAG H_2, M_2, M_2
+
+	// multiply the accumulator by the key coefficient
+	MULTIPLY(M_0, M_1, M_2, M_3, M_4, R_0, R_1, R_2, R_3, R_4, R5_1, R5_2, R5_3, R5_4, H_0, H_1, H_2, H_3, H_4)
+
+	// carry and partially reduce the partial products
 	REDUCE(H_0, H_1, H_2, H_3, H_4)
+
 	CMPBNE R3, $0, loop

 finish:
-	// sum vectors
+	// sum lane 0 and lane 1 and put the result in lane 1
 	VZERO  T_0
 	VSUMQG H_0, T_0, H_0
-	VSUMQG H_1, T_0, H_1
-	VSUMQG H_2, T_0, H_2
 	VSUMQG H_3, T_0, H_3
+	VSUMQG H_1, T_0, H_1
 	VSUMQG H_4, T_0, H_4
+	VSUMQG H_2, T_0, H_2

-	// h may be >= 2*(2**130-5) so we need to reduce it again
+	// reduce again after summation
+	// TODO(mundaym): there might be a more efficient way to do this
+	// now that we only have 1 active lane. For example, we could
+	// simultaneously pack the values as we reduce them.
 	REDUCE(H_0, H_1, H_2, H_3, H_4)

-	// carry h1->h4
+	// carry h[1] through to h[4] so that only h[4] can exceed 2²⁶ - 1
+	// TODO(mundaym): in testing this final carry was unnecessary.
+	// Needs a proof before it can be removed though.
 	VESRLG $26, H_1, T_1
 	VN     MOD26, H_1, H_1
 	VAQ    T_1, H_2, H_2
@ -284,95 +367,137 @@ finish:
 	VN     MOD26, H_3, H_3
 	VAQ    T_3, H_4, H_4

-	// h is now < 2*(2**130-5)
-	// pack h into h1 (hi) and h0 (lo)
-	PACK(H_0, H_1, H_2, H_3, H_4)
-
-	// if h > 2**130-5 then h -= 2**130-5
-	MOD(H_0, H_1, T_0, T_1, T_2)
-
-	// h += s
-	MOVD  $·bswapMask<>(SB), R5
-	VL    (R5), T_1
-	VL    16(R4), T_0
-	VPERM T_0, T_0, T_1, T_0    // reverse bytes (to big)
-	VAQ   T_0, H_0, H_0
-	VPERM H_0, H_0, T_1, H_0    // reverse bytes (to little)
-	VST   H_0, (R1)
-
+	// h is now < 2(2¹³⁰ - 5)
+	// Pack each lane in h₂₆[0:4] into h₁₂₈[0:1].
+	VESLG $26, H_1, H_1
+	VESLG $26, H_3, H_3
+	VO    H_0, H_1, H_0
+	VO    H_2, H_3, H_2
+	VESLG $4, H_2, H_2
+	VLEIB $7, $48, H_1
+	VSLB  H_1, H_2, H_2
+	VO    H_0, H_2, H_0
+	VLEIB $7, $104, H_1
+	VSLB  H_1, H_4, H_3
+	VO    H_3, H_0, H_0
+	VLEIB $7, $24, H_1
+	VSRLB H_1, H_4, H_1
+
+	// update state
+	VSTEG $1, H_0, 0(R1)
+	VSTEG $0, H_0, 8(R1)
+	VSTEG $1, H_1, 16(R1)
 	RET

-b2:
+b2:  // 2 or fewer blocks remaining
 	CMPBLE R3, $16, b1

-	// 2 blocks remaining
-	SUB    $17, R3
-	VL     (R2), T_0
-	VLL    R3, 16(R2), T_1
-	ADD    $1, R3
+	// Load the 2 remaining blocks (17-32 bytes remaining).
+	MOVD $-17(R3), R0    // index of final byte to load modulo 16
+	VL   (R2), T_0       // load full 16 byte block
+	VLL  R0, 16(R2), T_1 // load final (possibly partial) block and pad with zeros to 16 bytes
+
+	// The Poly1305 algorithm requires that a 1 bit be appended to
+	// each message block. If the final block is less than 16 bytes
+	// long then it is easiest to insert the 1 before the message
+	// block is split into 26-bit limbs. If, on the other hand, the
+	// final message block is 16 bytes long then we append the 1 bit
+	// after expansion as normal.
 	MOVBZ  $1, R0
-	CMPBEQ R3, $16, 2(PC)
-	VLVGB  R3, R0, T_1
-	EXPAND(T_0, T_1, F_0, F_1, F_2, F_3, F_4)
+	MOVD   $-16(R3), R3   // index of byte in last block to insert 1 at (could be 16)
+	CMPBEQ R3, $16, 2(PC) // skip the insertion if the final block is 16 bytes long
+	VLVGB  R3, R0, T_1    // insert 1 into the byte at index R3
+
+	// Split both blocks into 26-bit limbs in the appropriate lanes.
+	EXPAND(T_0, T_1, M_0, M_1, M_2, M_3, M_4)
+
+	// Append a 1 byte to the end of the second to last block.
+	VLEIB $4, $1, M_4
+
+	// Append a 1 byte to the end of the last block only if it is a
+	// full 16 byte block.
 	CMPBNE R3, $16, 2(PC)
-	VLEIB  $12, $1, F_4
-	VLEIB  $4, $1, F_4
-
-	// setup [r²,r]
-	VLVGG $1, RSAVE_0, R_0
-	VLVGG $1, RSAVE_1, R_1
-	VLVGG $1, RSAVE_2, R_2
-	VLVGG $1, RSAVE_3, R_3
-	VLVGG $1, RSAVE_4, R_4
-	VPDI  $0, R5_1, R5SAVE_1, R5_1
-	VPDI  $0, R5_2, R5SAVE_2, R5_2
-	VPDI  $0, R5_3, R5SAVE_3, R5_3
-	VPDI  $0, R5_4, R5SAVE_4, R5_4
+	VLEIB  $12, $1, M_4
+
+	// Finally, set up the coefficients for the final multiplication.
+	// We have previously saved r and 5r in the 32-bit even indexes
+	// of the R_[0-4] and R5_[1-4] coefficient registers.
+	//
+	// We want lane 0 to be multiplied by r² so that can be kept the
+	// same. We want lane 1 to be multiplied by r so we need to move
+	// the saved r value into the 32-bit odd index in lane 1 by
+	// rotating the 64-bit lane by 32.
+	VGBM   $0x00ff, T_0         // [0, 0xffffffffffffffff] - mask lane 1 only
+	VERIMG $32, R_0, T_0, R_0   // [_,  r²₂₆[0], _,  r₂₆[0]]
+	VERIMG $32, R_1, T_0, R_1   // [_,  r²₂₆[1], _,  r₂₆[1]]
+	VERIMG $32, R_2, T_0, R_2   // [_,  r²₂₆[2], _,  r₂₆[2]]
+	VERIMG $32, R_3, T_0, R_3   // [_,  r²₂₆[3], _,  r₂₆[3]]
+	VERIMG $32, R_4, T_0, R_4   // [_,  r²₂₆[4], _,  r₂₆[4]]
+	VERIMG $32, R5_1, T_0, R5_1 // [_, 5r²₂₆[1], _, 5r₂₆[1]]
+	VERIMG $32, R5_2, T_0, R5_2 // [_, 5r²₂₆[2], _, 5r₂₆[2]]
+	VERIMG $32, R5_3, T_0, R5_3 // [_, 5r²₂₆[3], _, 5r₂₆[3]]
+	VERIMG $32, R5_4, T_0, R5_4 // [_, 5r²₂₆[4], _, 5r₂₆[4]]

 	MOVD $0, R3
 	BR   multiply

 skip:
-	VZERO H_0
-	VZERO H_1
-	VZERO H_2
-	VZERO H_3
-	VZERO H_4
-
 	CMPBEQ R3, $0, finish

-b1:
-	// 1 block remaining
-	SUB    $1, R3
-	VLL    R3, (R2), T_0
-	ADD    $1, R3
+b1:  // 1 block remaining
+
+	// Load the final block (1-16 bytes). This will be placed into
+	// lane 0.
+	MOVD $-1(R3), R0
+	VLL  R0, (R2), T_0 // pad to 16 bytes with zeros
+
+	// The Poly1305 algorithm requires that a 1 bit be appended to
+	// each message block. If the final block is less than 16 bytes
+	// long then it is easiest to insert the 1 before the message
+	// block is split into 26-bit limbs. If, on the other hand, the
+	// final message block is 16 bytes long then we append the 1 bit
+	// after expansion as normal.
 	MOVBZ  $1, R0
 	CMPBEQ R3, $16, 2(PC)
 	VLVGB  R3, R0, T_0
-	VZERO  T_1
-	EXPAND(T_0, T_1, F_0, F_1, F_2, F_3, F_4)
+
+	// Set the message block in lane 1 to the value 0 so that it
+	// can be accumulated without affecting the final result.
+	VZERO T_1
+
+	// Split the final message block into 26-bit limbs in lane 0.
+	// Lane 1 will be contain 0.
+	EXPAND(T_0, T_1, M_0, M_1, M_2, M_3, M_4)
+
+	// Append a 1 byte to the end of the last block only if it is a
+	// full 16 byte block.
 	CMPBNE R3, $16, 2(PC)
-	VLEIB  $4, $1, F_4
-	VLEIG  $1, $1, R_0
-	VZERO  R_1
-	VZERO  R_2
-	VZERO  R_3
-	VZERO  R_4
-	VZERO  R5_1
-	VZERO  R5_2
-	VZERO  R5_3
-	VZERO  R5_4
-
-	// setup [r, 1]
-	VLVGG $0, RSAVE_0, R_0
-	VLVGG $0, RSAVE_1, R_1
-	VLVGG $0, RSAVE_2, R_2
-	VLVGG $0, RSAVE_3, R_3
-	VLVGG $0, RSAVE_4, R_4
-	VPDI  $0, R5SAVE_1, R5_1, R5_1
-	VPDI  $0, R5SAVE_2, R5_2, R5_2
-	VPDI  $0, R5SAVE_3, R5_3, R5_3
-	VPDI  $0, R5SAVE_4, R5_4, R5_4
+	VLEIB  $4, $1, M_4
+
+	// We have previously saved r and 5r in the 32-bit even indexes
+	// of the R_[0-4] and R5_[1-4] coefficient registers.
+	//
+	// We want lane 0 to be multiplied by r so we need to move the
+	// saved r value into the 32-bit odd index in lane 0. We want
+	// lane 1 to be set to the value 1. This makes multiplication
+	// a no-op. We do this by setting lane 1 in every register to 0
+	// and then just setting the 32-bit index 3 in R_0 to 1.
+	VZERO T_0
+	MOVD  $0, R0
+	MOVD  $0x10111213, R12
+	VLVGP R12, R0, T_1         // [_, 0x10111213, _, 0x00000000]
+	VPERM T_0, R_0, T_1, R_0   // [_,  r₂₆[0], _, 0]
+	VPERM T_0, R_1, T_1, R_1   // [_,  r₂₆[1], _, 0]
+	VPERM T_0, R_2, T_1, R_2   // [_,  r₂₆[2], _, 0]
+	VPERM T_0, R_3, T_1, R_3   // [_,  r₂₆[3], _, 0]
+	VPERM T_0, R_4, T_1, R_4   // [_,  r₂₆[4], _, 0]
+	VPERM T_0, R5_1, T_1, R5_1 // [_, 5r₂₆[1], _, 0]
+	VPERM T_0, R5_2, T_1, R5_2 // [_, 5r₂₆[2], _, 0]
+	VPERM T_0, R5_3, T_1, R5_3 // [_, 5r₂₆[3], _, 0]
+	VPERM T_0, R5_4, T_1, R5_4 // [_, 5r₂₆[4], _, 0]
+
+	// Set the value of lane 1 to be 1.
+	VLEIF $3, $1, R_0 // [_,  r₂₆[0], _, 1]

 	MOVD $0, R3
 	BR   multiply
--- a/vendor/golang.org/x/crypto/poly1305/sum_vmsl_s390x.s
+++ b/vendor/golang.org/x/crypto/poly1305/sum_vmsl_s390x.s
@ -1,909 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build s390x,go1.11,!gccgo,!appengine
-
-#include "textflag.h"
-
-// Implementation of Poly1305 using the vector facility (vx) and the VMSL instruction.
-
-// constants
-#define EX0   V1
-#define EX1   V2
-#define EX2   V3
-
-// temporaries
-#define T_0 V4
-#define T_1 V5
-#define T_2 V6
-#define T_3 V7
-#define T_4 V8
-#define T_5 V9
-#define T_6 V10
-#define T_7 V11
-#define T_8 V12
-#define T_9 V13
-#define T_10 V14
-
-// r**2 & r**4
-#define R_0  V15
-#define R_1  V16
-#define R_2  V17
-#define R5_1 V18
-#define R5_2 V19
-// key (r)
-#define RSAVE_0 R7
-#define RSAVE_1 R8
-#define RSAVE_2 R9
-#define R5SAVE_1 R10
-#define R5SAVE_2 R11
-
-// message block
-#define M0 V20
-#define M1 V21
-#define M2 V22
-#define M3 V23
-#define M4 V24
-#define M5 V25
-
-// accumulator
-#define H0_0 V26
-#define H1_0 V27
-#define H2_0 V28
-#define H0_1 V29
-#define H1_1 V30
-#define H2_1 V31
-
-GLOBL ·keyMask<>(SB), RODATA, $16
-DATA ·keyMask<>+0(SB)/8, $0xffffff0ffcffff0f
-DATA ·keyMask<>+8(SB)/8, $0xfcffff0ffcffff0f
-
-GLOBL ·bswapMask<>(SB), RODATA, $16
-DATA ·bswapMask<>+0(SB)/8, $0x0f0e0d0c0b0a0908
-DATA ·bswapMask<>+8(SB)/8, $0x0706050403020100
-
-GLOBL ·constants<>(SB), RODATA, $48
-// EX0
-DATA ·constants<>+0(SB)/8, $0x18191a1b1c1d1e1f
-DATA ·constants<>+8(SB)/8, $0x0000050403020100
-// EX1
-DATA ·constants<>+16(SB)/8, $0x18191a1b1c1d1e1f
-DATA ·constants<>+24(SB)/8, $0x00000a0908070605
-// EX2
-DATA ·constants<>+32(SB)/8, $0x18191a1b1c1d1e1f
-DATA ·constants<>+40(SB)/8, $0x0000000f0e0d0c0b
-
-GLOBL ·c<>(SB), RODATA, $48
-// EX0
-DATA ·c<>+0(SB)/8, $0x0000050403020100
-DATA ·c<>+8(SB)/8, $0x0000151413121110
-// EX1
-DATA ·c<>+16(SB)/8, $0x00000a0908070605
-DATA ·c<>+24(SB)/8, $0x00001a1918171615
-// EX2
-DATA ·c<>+32(SB)/8, $0x0000000f0e0d0c0b
-DATA ·c<>+40(SB)/8, $0x0000001f1e1d1c1b
-
-GLOBL ·reduce<>(SB), RODATA, $32
-// 44 bit
-DATA ·reduce<>+0(SB)/8, $0x0
-DATA ·reduce<>+8(SB)/8, $0xfffffffffff
-// 42 bit
-DATA ·reduce<>+16(SB)/8, $0x0
-DATA ·reduce<>+24(SB)/8, $0x3ffffffffff
-
-// h = (f*g) % (2**130-5) [partial reduction]
-// uses T_0...T_9 temporary registers
-// input: m02_0, m02_1, m02_2, m13_0, m13_1, m13_2, r_0, r_1, r_2, r5_1, r5_2, m4_0, m4_1, m4_2, m5_0, m5_1, m5_2
-// temp: t0, t1, t2, t3, t4, t5, t6, t7, t8, t9
-// output: m02_0, m02_1, m02_2, m13_0, m13_1, m13_2
-#define MULTIPLY(m02_0, m02_1, m02_2, m13_0, m13_1, m13_2, r_0, r_1, r_2, r5_1, r5_2, m4_0, m4_1, m4_2, m5_0, m5_1, m5_2, t0, t1, t2, t3, t4, t5, t6, t7, t8, t9) \
-	\ // Eliminate the dependency for the last 2 VMSLs
-	VMSLG m02_0, r_2, m4_2, m4_2                       \
-	VMSLG m13_0, r_2, m5_2, m5_2                       \ // 8 VMSLs pipelined
-	VMSLG m02_0, r_0, m4_0, m4_0                       \
-	VMSLG m02_1, r5_2, V0, T_0                         \
-	VMSLG m02_0, r_1, m4_1, m4_1                       \
-	VMSLG m02_1, r_0, V0, T_1                          \
-	VMSLG m02_1, r_1, V0, T_2                          \
-	VMSLG m02_2, r5_1, V0, T_3                         \
-	VMSLG m02_2, r5_2, V0, T_4                         \
-	VMSLG m13_0, r_0, m5_0, m5_0                       \
-	VMSLG m13_1, r5_2, V0, T_5                         \
-	VMSLG m13_0, r_1, m5_1, m5_1                       \
-	VMSLG m13_1, r_0, V0, T_6                          \
-	VMSLG m13_1, r_1, V0, T_7                          \
-	VMSLG m13_2, r5_1, V0, T_8                         \
-	VMSLG m13_2, r5_2, V0, T_9                         \
-	VMSLG m02_2, r_0, m4_2, m4_2                       \
-	VMSLG m13_2, r_0, m5_2, m5_2                       \
-	VAQ   m4_0, T_0, m02_0                             \
-	VAQ   m4_1, T_1, m02_1                             \
-	VAQ   m5_0, T_5, m13_0                             \
-	VAQ   m5_1, T_6, m13_1                             \
-	VAQ   m02_0, T_3, m02_0                            \
-	VAQ   m02_1, T_4, m02_1                            \
-	VAQ   m13_0, T_8, m13_0                            \
-	VAQ   m13_1, T_9, m13_1                            \
-	VAQ   m4_2, T_2, m02_2                             \
-	VAQ   m5_2, T_7, m13_2                             \
-
-// SQUARE uses three limbs of r and r_2*5 to output square of r
-// uses T_1, T_5 and T_7 temporary registers
-// input: r_0, r_1, r_2, r5_2
-// temp: TEMP0, TEMP1, TEMP2
-// output: p0, p1, p2
-#define SQUARE(r_0, r_1, r_2, r5_2, p0, p1, p2, TEMP0, TEMP1, TEMP2) \
-	VMSLG r_0, r_0, p0, p0     \
-	VMSLG r_1, r5_2, V0, TEMP0 \
-	VMSLG r_2, r5_2, p1, p1    \
-	VMSLG r_0, r_1, V0, TEMP1  \
-	VMSLG r_1, r_1, p2, p2     \
-	VMSLG r_0, r_2, V0, TEMP2  \
-	VAQ   TEMP0, p0, p0        \
-	VAQ   TEMP1, p1, p1        \
-	VAQ   TEMP2, p2, p2        \
-	VAQ   TEMP0, p0, p0        \
-	VAQ   TEMP1, p1, p1        \
-	VAQ   TEMP2, p2, p2        \
-
-// carry h0->h1->h2->h0 || h3->h4->h5->h3
-// uses T_2, T_4, T_5, T_7, T_8, T_9
-//       t6,  t7,  t8,  t9, t10, t11
-// input: h0, h1, h2, h3, h4, h5
-// temp: t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11
-// output: h0, h1, h2, h3, h4, h5
-#define REDUCE(h0, h1, h2, h3, h4, h5, t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) \
-	VLM    (R12), t6, t7  \ // 44 and 42 bit clear mask
-	VLEIB  $7, $0x28, t10 \ // 5 byte shift mask
-	VREPIB $4, t8         \ // 4 bit shift mask
-	VREPIB $2, t11        \ // 2 bit shift mask
-	VSRLB  t10, h0, t0    \ // h0 byte shift
-	VSRLB  t10, h1, t1    \ // h1 byte shift
-	VSRLB  t10, h2, t2    \ // h2 byte shift
-	VSRLB  t10, h3, t3    \ // h3 byte shift
-	VSRLB  t10, h4, t4    \ // h4 byte shift
-	VSRLB  t10, h5, t5    \ // h5 byte shift
-	VSRL   t8, t0, t0     \ // h0 bit shift
-	VSRL   t8, t1, t1     \ // h2 bit shift
-	VSRL   t11, t2, t2    \ // h2 bit shift
-	VSRL   t8, t3, t3     \ // h3 bit shift
-	VSRL   t8, t4, t4     \ // h4 bit shift
-	VESLG  $2, t2, t9     \ // h2 carry x5
-	VSRL   t11, t5, t5    \ // h5 bit shift
-	VN     t6, h0, h0     \ // h0 clear carry
-	VAQ    t2, t9, t2     \ // h2 carry x5
-	VESLG  $2, t5, t9     \ // h5 carry x5
-	VN     t6, h1, h1     \ // h1 clear carry
-	VN     t7, h2, h2     \ // h2 clear carry
-	VAQ    t5, t9, t5     \ // h5 carry x5
-	VN     t6, h3, h3     \ // h3 clear carry
-	VN     t6, h4, h4     \ // h4 clear carry
-	VN     t7, h5, h5     \ // h5 clear carry
-	VAQ    t0, h1, h1     \ // h0->h1
-	VAQ    t3, h4, h4     \ // h3->h4
-	VAQ    t1, h2, h2     \ // h1->h2
-	VAQ    t4, h5, h5     \ // h4->h5
-	VAQ    t2, h0, h0     \ // h2->h0
-	VAQ    t5, h3, h3     \ // h5->h3
-	VREPG  $1, t6, t6     \ // 44 and 42 bit masks across both halves
-	VREPG  $1, t7, t7     \
-	VSLDB  $8, h0, h0, h0 \ // set up [h0/1/2, h3/4/5]
-	VSLDB  $8, h1, h1, h1 \
-	VSLDB  $8, h2, h2, h2 \
-	VO     h0, h3, h3     \
-	VO     h1, h4, h4     \
-	VO     h2, h5, h5     \
-	VESRLG $44, h3, t0    \ // 44 bit shift right
-	VESRLG $44, h4, t1    \
-	VESRLG $42, h5, t2    \
-	VN     t6, h3, h3     \ // clear carry bits
-	VN     t6, h4, h4     \
-	VN     t7, h5, h5     \
-	VESLG  $2, t2, t9     \ // multiply carry by 5
-	VAQ    t9, t2, t2     \
-	VAQ    t0, h4, h4     \
-	VAQ    t1, h5, h5     \
-	VAQ    t2, h3, h3     \
-
-// carry h0->h1->h2->h0
-// input: h0, h1, h2
-// temp: t0, t1, t2, t3, t4, t5, t6, t7, t8
-// output: h0, h1, h2
-#define REDUCE2(h0, h1, h2, t0, t1, t2, t3, t4, t5, t6, t7, t8) \
-	VLEIB  $7, $0x28, t3 \ // 5 byte shift mask
-	VREPIB $4, t4        \ // 4 bit shift mask
-	VREPIB $2, t7        \ // 2 bit shift mask
-	VGBM   $0x003F, t5   \ // mask to clear carry bits
-	VSRLB  t3, h0, t0    \
-	VSRLB  t3, h1, t1    \
-	VSRLB  t3, h2, t2    \
-	VESRLG $4, t5, t5    \ // 44 bit clear mask
-	VSRL   t4, t0, t0    \
-	VSRL   t4, t1, t1    \
-	VSRL   t7, t2, t2    \
-	VESRLG $2, t5, t6    \ // 42 bit clear mask
-	VESLG  $2, t2, t8    \
-	VAQ    t8, t2, t2    \
-	VN     t5, h0, h0    \
-	VN     t5, h1, h1    \
-	VN     t6, h2, h2    \
-	VAQ    t0, h1, h1    \
-	VAQ    t1, h2, h2    \
-	VAQ    t2, h0, h0    \
-	VSRLB  t3, h0, t0    \
-	VSRLB  t3, h1, t1    \
-	VSRLB  t3, h2, t2    \
-	VSRL   t4, t0, t0    \
-	VSRL   t4, t1, t1    \
-	VSRL   t7, t2, t2    \
-	VN     t5, h0, h0    \
-	VN     t5, h1, h1    \
-	VESLG  $2, t2, t8    \
-	VN     t6, h2, h2    \
-	VAQ    t0, h1, h1    \
-	VAQ    t8, t2, t2    \
-	VAQ    t1, h2, h2    \
-	VAQ    t2, h0, h0    \
-
-// expands two message blocks into the lower halfs of the d registers
-// moves the contents of the d registers into upper halfs
-// input: in1, in2, d0, d1, d2, d3, d4, d5
-// temp: TEMP0, TEMP1, TEMP2, TEMP3
-// output: d0, d1, d2, d3, d4, d5
-#define EXPACC(in1, in2, d0, d1, d2, d3, d4, d5, TEMP0, TEMP1, TEMP2, TEMP3) \
-	VGBM   $0xff3f, TEMP0      \
-	VGBM   $0xff1f, TEMP1      \
-	VESLG  $4, d1, TEMP2       \
-	VESLG  $4, d4, TEMP3       \
-	VESRLG $4, TEMP0, TEMP0    \
-	VPERM  in1, d0, EX0, d0    \
-	VPERM  in2, d3, EX0, d3    \
-	VPERM  in1, d2, EX2, d2    \
-	VPERM  in2, d5, EX2, d5    \
-	VPERM  in1, TEMP2, EX1, d1 \
-	VPERM  in2, TEMP3, EX1, d4 \
-	VN     TEMP0, d0, d0       \
-	VN     TEMP0, d3, d3       \
-	VESRLG $4, d1, d1          \
-	VESRLG $4, d4, d4          \
-	VN     TEMP1, d2, d2       \
-	VN     TEMP1, d5, d5       \
-	VN     TEMP0, d1, d1       \
-	VN     TEMP0, d4, d4       \
-
-// expands one message block into the lower halfs of the d registers
-// moves the contents of the d registers into upper halfs
-// input: in, d0, d1, d2
-// temp: TEMP0, TEMP1, TEMP2
-// output: d0, d1, d2
-#define EXPACC2(in, d0, d1, d2, TEMP0, TEMP1, TEMP2) \
-	VGBM   $0xff3f, TEMP0     \
-	VESLG  $4, d1, TEMP2      \
-	VGBM   $0xff1f, TEMP1     \
-	VPERM  in, d0, EX0, d0    \
-	VESRLG $4, TEMP0, TEMP0   \
-	VPERM  in, d2, EX2, d2    \
-	VPERM  in, TEMP2, EX1, d1 \
-	VN     TEMP0, d0, d0      \
-	VN     TEMP1, d2, d2      \
-	VESRLG $4, d1, d1         \
-	VN     TEMP0, d1, d1      \
-
-// pack h2:h0 into h1:h0 (no carry)
-// input: h0, h1, h2
-// output: h0, h1, h2
-#define PACK(h0, h1, h2) \
-	VMRLG  h1, h2, h2  \ // copy h1 to upper half h2
-	VESLG  $44, h1, h1 \ // shift limb 1 44 bits, leaving 20
-	VO     h0, h1, h0  \ // combine h0 with 20 bits from limb 1
-	VESRLG $20, h2, h1 \ // put top 24 bits of limb 1 into h1
-	VLEIG  $1, $0, h1  \ // clear h2 stuff from lower half of h1
-	VO     h0, h1, h0  \ // h0 now has 88 bits (limb 0 and 1)
-	VLEIG  $0, $0, h2  \ // clear upper half of h2
-	VESRLG $40, h2, h1 \ // h1 now has upper two bits of result
-	VLEIB  $7, $88, h1 \ // for byte shift (11 bytes)
-	VSLB   h1, h2, h2  \ // shift h2 11 bytes to the left
-	VO     h0, h2, h0  \ // combine h0 with 20 bits from limb 1
-	VLEIG  $0, $0, h1  \ // clear upper half of h1
-
-// if h > 2**130-5 then h -= 2**130-5
-// input: h0, h1
-// temp: t0, t1, t2
-// output: h0
-#define MOD(h0, h1, t0, t1, t2) \
-	VZERO t0          \
-	VLEIG $1, $5, t0  \
-	VACCQ h0, t0, t1  \
-	VAQ   h0, t0, t0  \
-	VONE  t2          \
-	VLEIG $1, $-4, t2 \
-	VAQ   t2, t1, t1  \
-	VACCQ h1, t1, t1  \
-	VONE  t2          \
-	VAQ   t2, t1, t1  \
-	VN    h0, t1, t2  \
-	VNC   t0, t1, t1  \
-	VO    t1, t2, h0  \
-
-// func poly1305vmsl(out *[16]byte, m *byte, mlen uint64, key *[32]key)
-TEXT ·poly1305vmsl(SB), $0-32
-	// This code processes 6 + up to 4 blocks (32 bytes) per iteration
-	// using the algorithm described in:
-	// NEON crypto, Daniel J. Bernstein & Peter Schwabe
-	// https://cryptojedi.org/papers/neoncrypto-20120320.pdf
-	// And as moddified for VMSL as described in
-	// Accelerating Poly1305 Cryptographic Message Authentication on the z14
-	// O'Farrell et al, CASCON 2017, p48-55
-	// https://ibm.ent.box.com/s/jf9gedj0e9d2vjctfyh186shaztavnht
-
-	LMG   out+0(FP), R1, R4 // R1=out, R2=m, R3=mlen, R4=key
-	VZERO V0                // c
-
-	// load EX0, EX1 and EX2
-	MOVD $·constants<>(SB), R5
-	VLM  (R5), EX0, EX2        // c
-
-	// setup r
-	VL    (R4), T_0
-	MOVD  $·keyMask<>(SB), R6
-	VL    (R6), T_1
-	VN    T_0, T_1, T_0
-	VZERO T_2                 // limbs for r
-	VZERO T_3
-	VZERO T_4
-	EXPACC2(T_0, T_2, T_3, T_4, T_1, T_5, T_7)
-
-	// T_2, T_3, T_4: [0, r]
-
-	// setup r*20
-	VLEIG $0, $0, T_0
-	VLEIG $1, $20, T_0       // T_0: [0, 20]
-	VZERO T_5
-	VZERO T_6
-	VMSLG T_0, T_3, T_5, T_5
-	VMSLG T_0, T_4, T_6, T_6
-
-	// store r for final block in GR
-	VLGVG $1, T_2, RSAVE_0  // c
-	VLGVG $1, T_3, RSAVE_1  // c
-	VLGVG $1, T_4, RSAVE_2  // c
-	VLGVG $1, T_5, R5SAVE_1 // c
-	VLGVG $1, T_6, R5SAVE_2 // c
-
-	// initialize h
-	VZERO H0_0
-	VZERO H1_0
-	VZERO H2_0
-	VZERO H0_1
-	VZERO H1_1
-	VZERO H2_1
-
-	// initialize pointer for reduce constants
-	MOVD $·reduce<>(SB), R12
-
-	// calculate r**2 and 20*(r**2)
-	VZERO R_0
-	VZERO R_1
-	VZERO R_2
-	SQUARE(T_2, T_3, T_4, T_6, R_0, R_1, R_2, T_1, T_5, T_7)
-	REDUCE2(R_0, R_1, R_2, M0, M1, M2, M3, M4, R5_1, R5_2, M5, T_1)
-	VZERO R5_1
-	VZERO R5_2
-	VMSLG T_0, R_1, R5_1, R5_1
-	VMSLG T_0, R_2, R5_2, R5_2
-
-	// skip r**4 calculation if 3 blocks or less
-	CMPBLE R3, $48, b4
-
-	// calculate r**4 and 20*(r**4)
-	VZERO T_8
-	VZERO T_9
-	VZERO T_10
-	SQUARE(R_0, R_1, R_2, R5_2, T_8, T_9, T_10, T_1, T_5, T_7)
-	REDUCE2(T_8, T_9, T_10, M0, M1, M2, M3, M4, T_2, T_3, M5, T_1)
-	VZERO T_2
-	VZERO T_3
-	VMSLG T_0, T_9, T_2, T_2
-	VMSLG T_0, T_10, T_3, T_3
-
-	// put r**2 to the right and r**4 to the left of R_0, R_1, R_2
-	VSLDB $8, T_8, T_8, T_8
-	VSLDB $8, T_9, T_9, T_9
-	VSLDB $8, T_10, T_10, T_10
-	VSLDB $8, T_2, T_2, T_2
-	VSLDB $8, T_3, T_3, T_3
-
-	VO T_8, R_0, R_0
-	VO T_9, R_1, R_1
-	VO T_10, R_2, R_2
-	VO T_2, R5_1, R5_1
-	VO T_3, R5_2, R5_2
-
-	CMPBLE R3, $80, load // less than or equal to 5 blocks in message
-
-	// 6(or 5+1) blocks
-	SUB    $81, R3
-	VLM    (R2), M0, M4
-	VLL    R3, 80(R2), M5
-	ADD    $1, R3
-	MOVBZ  $1, R0
-	CMPBGE R3, $16, 2(PC)
-	VLVGB  R3, R0, M5
-	MOVD   $96(R2), R2
-	EXPACC(M0, M1, H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_0, T_1, T_2, T_3)
-	EXPACC(M2, M3, H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_0, T_1, T_2, T_3)
-	VLEIB  $2, $1, H2_0
-	VLEIB  $2, $1, H2_1
-	VLEIB  $10, $1, H2_0
-	VLEIB  $10, $1, H2_1
-
-	VZERO  M0
-	VZERO  M1
-	VZERO  M2
-	VZERO  M3
-	VZERO  T_4
-	VZERO  T_10
-	EXPACC(M4, M5, M0, M1, M2, M3, T_4, T_10, T_0, T_1, T_2, T_3)
-	VLR    T_4, M4
-	VLEIB  $10, $1, M2
-	CMPBLT R3, $16, 2(PC)
-	VLEIB  $10, $1, T_10
-	MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, T_10, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
-	REDUCE(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_10, M0, M1, M2, M3, M4, T_4, T_5, T_2, T_7, T_8, T_9)
-	VMRHG  V0, H0_1, H0_0
-	VMRHG  V0, H1_1, H1_0
-	VMRHG  V0, H2_1, H2_0
-	VMRLG  V0, H0_1, H0_1
-	VMRLG  V0, H1_1, H1_1
-	VMRLG  V0, H2_1, H2_1
-
-	SUB    $16, R3
-	CMPBLE R3, $0, square
-
-load:
-	// load EX0, EX1 and EX2
-	MOVD $·c<>(SB), R5
-	VLM  (R5), EX0, EX2
-
-loop:
-	CMPBLE R3, $64, add // b4	// last 4 or less blocks left
-
-	// next 4 full blocks
-	VLM  (R2), M2, M5
-	SUB  $64, R3
-	MOVD $64(R2), R2
-	REDUCE(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_10, M0, M1, T_0, T_1, T_3, T_4, T_5, T_2, T_7, T_8, T_9)
-
-	// expacc in-lined to create [m2, m3] limbs
-	VGBM   $0x3f3f, T_0     // 44 bit clear mask
-	VGBM   $0x1f1f, T_1     // 40 bit clear mask
-	VPERM  M2, M3, EX0, T_3
-	VESRLG $4, T_0, T_0     // 44 bit clear mask ready
-	VPERM  M2, M3, EX1, T_4
-	VPERM  M2, M3, EX2, T_5
-	VN     T_0, T_3, T_3
-	VESRLG $4, T_4, T_4
-	VN     T_1, T_5, T_5
-	VN     T_0, T_4, T_4
-	VMRHG  H0_1, T_3, H0_0
-	VMRHG  H1_1, T_4, H1_0
-	VMRHG  H2_1, T_5, H2_0
-	VMRLG  H0_1, T_3, H0_1
-	VMRLG  H1_1, T_4, H1_1
-	VMRLG  H2_1, T_5, H2_1
-	VLEIB  $10, $1, H2_0
-	VLEIB  $10, $1, H2_1
-	VPERM  M4, M5, EX0, T_3
-	VPERM  M4, M5, EX1, T_4
-	VPERM  M4, M5, EX2, T_5
-	VN     T_0, T_3, T_3
-	VESRLG $4, T_4, T_4
-	VN     T_1, T_5, T_5
-	VN     T_0, T_4, T_4
-	VMRHG  V0, T_3, M0
-	VMRHG  V0, T_4, M1
-	VMRHG  V0, T_5, M2
-	VMRLG  V0, T_3, M3
-	VMRLG  V0, T_4, M4
-	VMRLG  V0, T_5, M5
-	VLEIB  $10, $1, M2
-	VLEIB  $10, $1, M5
-
-	MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
-	CMPBNE R3, $0, loop
-	REDUCE(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_10, M0, M1, M3, M4, M5, T_4, T_5, T_2, T_7, T_8, T_9)
-	VMRHG  V0, H0_1, H0_0
-	VMRHG  V0, H1_1, H1_0
-	VMRHG  V0, H2_1, H2_0
-	VMRLG  V0, H0_1, H0_1
-	VMRLG  V0, H1_1, H1_1
-	VMRLG  V0, H2_1, H2_1
-
-	// load EX0, EX1, EX2
-	MOVD $·constants<>(SB), R5
-	VLM  (R5), EX0, EX2
-
-	// sum vectors
-	VAQ H0_0, H0_1, H0_0
-	VAQ H1_0, H1_1, H1_0
-	VAQ H2_0, H2_1, H2_0
-
-	// h may be >= 2*(2**130-5) so we need to reduce it again
-	// M0...M4 are used as temps here
-	REDUCE2(H0_0, H1_0, H2_0, M0, M1, M2, M3, M4, T_9, T_10, H0_1, M5)
-
-next:  // carry h1->h2
-	VLEIB  $7, $0x28, T_1
-	VREPIB $4, T_2
-	VGBM   $0x003F, T_3
-	VESRLG $4, T_3
-
-	// byte shift
-	VSRLB T_1, H1_0, T_4
-
-	// bit shift
-	VSRL T_2, T_4, T_4
-
-	// clear h1 carry bits
-	VN T_3, H1_0, H1_0
-
-	// add carry
-	VAQ T_4, H2_0, H2_0
-
-	// h is now < 2*(2**130-5)
-	// pack h into h1 (hi) and h0 (lo)
-	PACK(H0_0, H1_0, H2_0)
-
-	// if h > 2**130-5 then h -= 2**130-5
-	MOD(H0_0, H1_0, T_0, T_1, T_2)
-
-	// h += s
-	MOVD  $·bswapMask<>(SB), R5
-	VL    (R5), T_1
-	VL    16(R4), T_0
-	VPERM T_0, T_0, T_1, T_0    // reverse bytes (to big)
-	VAQ   T_0, H0_0, H0_0
-	VPERM H0_0, H0_0, T_1, H0_0 // reverse bytes (to little)
-	VST   H0_0, (R1)
-	RET
-
-add:
-	// load EX0, EX1, EX2
-	MOVD $·constants<>(SB), R5
-	VLM  (R5), EX0, EX2
-
-	REDUCE(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_10, M0, M1, M3, M4, M5, T_4, T_5, T_2, T_7, T_8, T_9)
-	VMRHG  V0, H0_1, H0_0
-	VMRHG  V0, H1_1, H1_0
-	VMRHG  V0, H2_1, H2_0
-	VMRLG  V0, H0_1, H0_1
-	VMRLG  V0, H1_1, H1_1
-	VMRLG  V0, H2_1, H2_1
-	CMPBLE R3, $64, b4
-
-b4:
-	CMPBLE R3, $48, b3 // 3 blocks or less
-
-	// 4(3+1) blocks remaining
-	SUB    $49, R3
-	VLM    (R2), M0, M2
-	VLL    R3, 48(R2), M3
-	ADD    $1, R3
-	MOVBZ  $1, R0
-	CMPBEQ R3, $16, 2(PC)
-	VLVGB  R3, R0, M3
-	MOVD   $64(R2), R2
-	EXPACC(M0, M1, H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_0, T_1, T_2, T_3)
-	VLEIB  $10, $1, H2_0
-	VLEIB  $10, $1, H2_1
-	VZERO  M0
-	VZERO  M1
-	VZERO  M4
-	VZERO  M5
-	VZERO  T_4
-	VZERO  T_10
-	EXPACC(M2, M3, M0, M1, M4, M5, T_4, T_10, T_0, T_1, T_2, T_3)
-	VLR    T_4, M2
-	VLEIB  $10, $1, M4
-	CMPBNE R3, $16, 2(PC)
-	VLEIB  $10, $1, T_10
-	MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M4, M5, M2, T_10, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
-	REDUCE(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_10, M0, M1, M3, M4, M5, T_4, T_5, T_2, T_7, T_8, T_9)
-	VMRHG  V0, H0_1, H0_0
-	VMRHG  V0, H1_1, H1_0
-	VMRHG  V0, H2_1, H2_0
-	VMRLG  V0, H0_1, H0_1
-	VMRLG  V0, H1_1, H1_1
-	VMRLG  V0, H2_1, H2_1
-	SUB    $16, R3
-	CMPBLE R3, $0, square // this condition must always hold true!
-
-b3:
-	CMPBLE R3, $32, b2
-
-	// 3 blocks remaining
-
-	// setup [r²,r]
-	VSLDB $8, R_0, R_0, R_0
-	VSLDB $8, R_1, R_1, R_1
-	VSLDB $8, R_2, R_2, R_2
-	VSLDB $8, R5_1, R5_1, R5_1
-	VSLDB $8, R5_2, R5_2, R5_2
-
-	VLVGG $1, RSAVE_0, R_0
-	VLVGG $1, RSAVE_1, R_1
-	VLVGG $1, RSAVE_2, R_2
-	VLVGG $1, R5SAVE_1, R5_1
-	VLVGG $1, R5SAVE_2, R5_2
-
-	// setup [h0, h1]
-	VSLDB $8, H0_0, H0_0, H0_0
-	VSLDB $8, H1_0, H1_0, H1_0
-	VSLDB $8, H2_0, H2_0, H2_0
-	VO    H0_1, H0_0, H0_0
-	VO    H1_1, H1_0, H1_0
-	VO    H2_1, H2_0, H2_0
-	VZERO H0_1
-	VZERO H1_1
-	VZERO H2_1
-
-	VZERO M0
-	VZERO M1
-	VZERO M2
-	VZERO M3
-	VZERO M4
-	VZERO M5
-
-	// H*[r**2, r]
-	MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
-	REDUCE2(H0_0, H1_0, H2_0, M0, M1, M2, M3, M4, H0_1, H1_1, T_10, M5)
-
-	SUB    $33, R3
-	VLM    (R2), M0, M1
-	VLL    R3, 32(R2), M2
-	ADD    $1, R3
-	MOVBZ  $1, R0
-	CMPBEQ R3, $16, 2(PC)
-	VLVGB  R3, R0, M2
-
-	// H += m0
-	VZERO T_1
-	VZERO T_2
-	VZERO T_3
-	EXPACC2(M0, T_1, T_2, T_3, T_4, T_5, T_6)
-	VLEIB $10, $1, T_3
-	VAG   H0_0, T_1, H0_0
-	VAG   H1_0, T_2, H1_0
-	VAG   H2_0, T_3, H2_0
-
-	VZERO M0
-	VZERO M3
-	VZERO M4
-	VZERO M5
-	VZERO T_10
-
-	// (H+m0)*r
-	MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M3, M4, M5, V0, T_10, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
-	REDUCE2(H0_0, H1_0, H2_0, M0, M3, M4, M5, T_10, H0_1, H1_1, H2_1, T_9)
-
-	// H += m1
-	VZERO V0
-	VZERO T_1
-	VZERO T_2
-	VZERO T_3
-	EXPACC2(M1, T_1, T_2, T_3, T_4, T_5, T_6)
-	VLEIB $10, $1, T_3
-	VAQ   H0_0, T_1, H0_0
-	VAQ   H1_0, T_2, H1_0
-	VAQ   H2_0, T_3, H2_0
-	REDUCE2(H0_0, H1_0, H2_0, M0, M3, M4, M5, T_9, H0_1, H1_1, H2_1, T_10)
-
-	// [H, m2] * [r**2, r]
-	EXPACC2(M2, H0_0, H1_0, H2_0, T_1, T_2, T_3)
-	CMPBNE R3, $16, 2(PC)
-	VLEIB  $10, $1, H2_0
-	VZERO  M0
-	VZERO  M1
-	VZERO  M2
-	VZERO  M3
-	VZERO  M4
-	VZERO  M5
-	MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
-	REDUCE2(H0_0, H1_0, H2_0, M0, M1, M2, M3, M4, H0_1, H1_1, M5, T_10)
-	SUB    $16, R3
-	CMPBLE R3, $0, next   // this condition must always hold true!
-
-b2:
-	CMPBLE R3, $16, b1
-
-	// 2 blocks remaining
-
-	// setup [r²,r]
-	VSLDB $8, R_0, R_0, R_0
-	VSLDB $8, R_1, R_1, R_1
-	VSLDB $8, R_2, R_2, R_2
-	VSLDB $8, R5_1, R5_1, R5_1
-	VSLDB $8, R5_2, R5_2, R5_2
-
-	VLVGG $1, RSAVE_0, R_0
-	VLVGG $1, RSAVE_1, R_1
-	VLVGG $1, RSAVE_2, R_2
-	VLVGG $1, R5SAVE_1, R5_1
-	VLVGG $1, R5SAVE_2, R5_2
-
-	// setup [h0, h1]
-	VSLDB $8, H0_0, H0_0, H0_0
-	VSLDB $8, H1_0, H1_0, H1_0
-	VSLDB $8, H2_0, H2_0, H2_0
-	VO    H0_1, H0_0, H0_0
-	VO    H1_1, H1_0, H1_0
-	VO    H2_1, H2_0, H2_0
-	VZERO H0_1
-	VZERO H1_1
-	VZERO H2_1
-
-	VZERO M0
-	VZERO M1
-	VZERO M2
-	VZERO M3
-	VZERO M4
-	VZERO M5
-
-	// H*[r**2, r]
-	MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
-	REDUCE(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_10, M0, M1, M2, M3, M4, T_4, T_5, T_2, T_7, T_8, T_9)
-	VMRHG V0, H0_1, H0_0
-	VMRHG V0, H1_1, H1_0
-	VMRHG V0, H2_1, H2_0
-	VMRLG V0, H0_1, H0_1
-	VMRLG V0, H1_1, H1_1
-	VMRLG V0, H2_1, H2_1
-
-	// move h to the left and 0s at the right
-	VSLDB $8, H0_0, H0_0, H0_0
-	VSLDB $8, H1_0, H1_0, H1_0
-	VSLDB $8, H2_0, H2_0, H2_0
-
-	// get message blocks and append 1 to start
-	SUB    $17, R3
-	VL     (R2), M0
-	VLL    R3, 16(R2), M1
-	ADD    $1, R3
-	MOVBZ  $1, R0
-	CMPBEQ R3, $16, 2(PC)
-	VLVGB  R3, R0, M1
-	VZERO  T_6
-	VZERO  T_7
-	VZERO  T_8
-	EXPACC2(M0, T_6, T_7, T_8, T_1, T_2, T_3)
-	EXPACC2(M1, T_6, T_7, T_8, T_1, T_2, T_3)
-	VLEIB  $2, $1, T_8
-	CMPBNE R3, $16, 2(PC)
-	VLEIB  $10, $1, T_8
-
-	// add [m0, m1] to h
-	VAG H0_0, T_6, H0_0
-	VAG H1_0, T_7, H1_0
-	VAG H2_0, T_8, H2_0
-
-	VZERO M2
-	VZERO M3
-	VZERO M4
-	VZERO M5
-	VZERO T_10
-	VZERO M0
-
-	// at this point R_0 .. R5_2 look like [r**2, r]
-	MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M2, M3, M4, M5, T_10, M0, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
-	REDUCE2(H0_0, H1_0, H2_0, M2, M3, M4, M5, T_9, H0_1, H1_1, H2_1, T_10)
-	SUB    $16, R3, R3
-	CMPBLE R3, $0, next
-
-b1:
-	CMPBLE R3, $0, next
-
-	// 1 block remaining
-
-	// setup [r²,r]
-	VSLDB $8, R_0, R_0, R_0
-	VSLDB $8, R_1, R_1, R_1
-	VSLDB $8, R_2, R_2, R_2
-	VSLDB $8, R5_1, R5_1, R5_1
-	VSLDB $8, R5_2, R5_2, R5_2
-
-	VLVGG $1, RSAVE_0, R_0
-	VLVGG $1, RSAVE_1, R_1
-	VLVGG $1, RSAVE_2, R_2
-	VLVGG $1, R5SAVE_1, R5_1
-	VLVGG $1, R5SAVE_2, R5_2
-
-	// setup [h0, h1]
-	VSLDB $8, H0_0, H0_0, H0_0
-	VSLDB $8, H1_0, H1_0, H1_0
-	VSLDB $8, H2_0, H2_0, H2_0
-	VO    H0_1, H0_0, H0_0
-	VO    H1_1, H1_0, H1_0
-	VO    H2_1, H2_0, H2_0
-	VZERO H0_1
-	VZERO H1_1
-	VZERO H2_1
-
-	VZERO M0
-	VZERO M1
-	VZERO M2
-	VZERO M3
-	VZERO M4
-	VZERO M5
-
-	// H*[r**2, r]
-	MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
-	REDUCE2(H0_0, H1_0, H2_0, M0, M1, M2, M3, M4, T_9, T_10, H0_1, M5)
-
-	// set up [0, m0] limbs
-	SUB    $1, R3
-	VLL    R3, (R2), M0
-	ADD    $1, R3
-	MOVBZ  $1, R0
-	CMPBEQ R3, $16, 2(PC)
-	VLVGB  R3, R0, M0
-	VZERO  T_1
-	VZERO  T_2
-	VZERO  T_3
-	EXPACC2(M0, T_1, T_2, T_3, T_4, T_5, T_6)// limbs: [0, m]
-	CMPBNE R3, $16, 2(PC)
-	VLEIB  $10, $1, T_3
-
-	// h+m0
-	VAQ H0_0, T_1, H0_0
-	VAQ H1_0, T_2, H1_0
-	VAQ H2_0, T_3, H2_0
-
-	VZERO M0
-	VZERO M1
-	VZERO M2
-	VZERO M3
-	VZERO M4
-	VZERO M5
-	MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
-	REDUCE2(H0_0, H1_0, H2_0, M0, M1, M2, M3, M4, T_9, T_10, H0_1, M5)
-
-	BR next
-
-square:
-	// setup [r²,r]
-	VSLDB $8, R_0, R_0, R_0
-	VSLDB $8, R_1, R_1, R_1
-	VSLDB $8, R_2, R_2, R_2
-	VSLDB $8, R5_1, R5_1, R5_1
-	VSLDB $8, R5_2, R5_2, R5_2
-
-	VLVGG $1, RSAVE_0, R_0
-	VLVGG $1, RSAVE_1, R_1
-	VLVGG $1, RSAVE_2, R_2
-	VLVGG $1, R5SAVE_1, R5_1
-	VLVGG $1, R5SAVE_2, R5_2
-
-	// setup [h0, h1]
-	VSLDB $8, H0_0, H0_0, H0_0
-	VSLDB $8, H1_0, H1_0, H1_0
-	VSLDB $8, H2_0, H2_0, H2_0
-	VO    H0_1, H0_0, H0_0
-	VO    H1_1, H1_0, H1_0
-	VO    H2_1, H2_0, H2_0
-	VZERO H0_1
-	VZERO H1_1
-	VZERO H2_1
-
-	VZERO M0
-	VZERO M1
-	VZERO M2
-	VZERO M3
-	VZERO M4
-	VZERO M5
-
-	// (h0*r**2) + (h1*r)
-	MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
-	REDUCE2(H0_0, H1_0, H2_0, M0, M1, M2, M3, M4, T_9, T_10, H0_1, M5)
-	BR next
--- a/vendor/golang.org/x/crypto/ssh/certs.go
+++ b/vendor/golang.org/x/crypto/ssh/certs.go
@ -17,12 +17,14 @@ import (
 // These constants from [PROTOCOL.certkeys] represent the algorithm names
 // for certificate types supported by this package.
 const (
-	CertAlgoRSAv01      = "ssh-rsa-cert-v01@openssh.com"
-	CertAlgoDSAv01      = "ssh-dss-cert-v01@openssh.com"
-	CertAlgoECDSA256v01 = "ecdsa-sha2-nistp256-cert-v01@openssh.com"
-	CertAlgoECDSA384v01 = "ecdsa-sha2-nistp384-cert-v01@openssh.com"
-	CertAlgoECDSA521v01 = "ecdsa-sha2-nistp521-cert-v01@openssh.com"
-	CertAlgoED25519v01  = "ssh-ed25519-cert-v01@openssh.com"
+	CertAlgoRSAv01        = "ssh-rsa-cert-v01@openssh.com"
+	CertAlgoDSAv01        = "ssh-dss-cert-v01@openssh.com"
+	CertAlgoECDSA256v01   = "ecdsa-sha2-nistp256-cert-v01@openssh.com"
+	CertAlgoECDSA384v01   = "ecdsa-sha2-nistp384-cert-v01@openssh.com"
+	CertAlgoECDSA521v01   = "ecdsa-sha2-nistp521-cert-v01@openssh.com"
+	CertAlgoSKECDSA256v01 = "sk-ecdsa-sha2-nistp256-cert-v01@openssh.com"
+	CertAlgoED25519v01    = "ssh-ed25519-cert-v01@openssh.com"
+	CertAlgoSKED25519v01  = "sk-ssh-ed25519-cert-v01@openssh.com"
 )

 // Certificate types distinguish between host and user
@ -37,6 +39,7 @@ const (
 type Signature struct {
 	Format string
 	Blob   []byte
+	Rest   []byte `ssh:"rest"`
 }

 // CertTimeInfinity can be used for OpenSSHCertV01.ValidBefore to indicate that
@ -411,8 +414,8 @@ func (c *CertChecker) CheckCert(principal string, cert *Certificate) error {
 	return nil
 }

-// SignCert sets c.SignatureKey to the authority's public key and stores a
-// Signature, by authority, in the certificate.
+// SignCert signs the certificate with an authority, setting the Nonce,
+// SignatureKey, and Signature fields.
 func (c *Certificate) SignCert(rand io.Reader, authority Signer) error {
 	c.Nonce = make([]byte, 32)
 	if _, err := io.ReadFull(rand, c.Nonce); err != nil {
@ -429,12 +432,14 @@ func (c *Certificate) SignCert(rand io.Reader, authority Signer) error {
 }

 var certAlgoNames = map[string]string{
-	KeyAlgoRSA:      CertAlgoRSAv01,
-	KeyAlgoDSA:      CertAlgoDSAv01,
-	KeyAlgoECDSA256: CertAlgoECDSA256v01,
-	KeyAlgoECDSA384: CertAlgoECDSA384v01,
-	KeyAlgoECDSA521: CertAlgoECDSA521v01,
-	KeyAlgoED25519:  CertAlgoED25519v01,
+	KeyAlgoRSA:        CertAlgoRSAv01,
+	KeyAlgoDSA:        CertAlgoDSAv01,
+	KeyAlgoECDSA256:   CertAlgoECDSA256v01,
+	KeyAlgoECDSA384:   CertAlgoECDSA384v01,
+	KeyAlgoECDSA521:   CertAlgoECDSA521v01,
+	KeyAlgoSKECDSA256: CertAlgoSKECDSA256v01,
+	KeyAlgoED25519:    CertAlgoED25519v01,
+	KeyAlgoSKED25519:  CertAlgoSKED25519v01,
 }

 // certToPrivAlgo returns the underlying algorithm for a certificate algorithm.
@ -518,6 +523,12 @@ func parseSignatureBody(in []byte) (out *Signature, rest []byte, ok bool) {
 		return
 	}

+	switch out.Format {
+	case KeyAlgoSKECDSA256, CertAlgoSKECDSA256v01, KeyAlgoSKED25519, CertAlgoSKED25519v01:
+		out.Rest = in
+		return out, nil, ok
+	}
+
 	return out, in, ok
 }

--- a/vendor/golang.org/x/crypto/ssh/cipher.go
+++ b/vendor/golang.org/x/crypto/ssh/cipher.go
@ -16,9 +16,8 @@ import (
 	"hash"
 	"io"
 	"io/ioutil"
-	"math/bits"

-	"golang.org/x/crypto/internal/chacha20"
+	"golang.org/x/crypto/chacha20"
 	"golang.org/x/crypto/poly1305"
 )

@ -120,7 +119,7 @@ var cipherModes = map[string]*cipherMode{
 	chacha20Poly1305ID: {64, 0, newChaCha20Cipher},

 	// CBC mode is insecure and so is not included in the default config.
-	// (See http://www.isg.rhul.ac.uk/~kp/SandPfinal.pdf). If absolutely
+	// (See https://www.ieee-security.org/TC/SP2013/papers/4977a526.pdf). If absolutely
 	// needed, it's possible to specify a custom Config to enable it.
 	// You should expect that an active attacker can recover plaintext if
 	// you do.
@ -642,8 +641,8 @@ const chacha20Poly1305ID = "chacha20-poly1305@openssh.com"
 // the methods here also implement padding, which RFC4253 Section 6
 // also requires of stream ciphers.
 type chacha20Poly1305Cipher struct {
-	lengthKey  [8]uint32
-	contentKey [8]uint32
+	lengthKey  [32]byte
+	contentKey [32]byte
 	buf        []byte
 }

@ -656,21 +655,21 @@ func newChaCha20Cipher(key, unusedIV, unusedMACKey []byte, unusedAlgs directionA
 		buf: make([]byte, 256),
 	}

-	for i := range c.contentKey {
-		c.contentKey[i] = binary.LittleEndian.Uint32(key[i*4 : (i+1)*4])
-	}
-	for i := range c.lengthKey {
-		c.lengthKey[i] = binary.LittleEndian.Uint32(key[(i+8)*4 : (i+9)*4])
-	}
+	copy(c.contentKey[:], key[:32])
+	copy(c.lengthKey[:], key[32:])
 	return c, nil
 }

 func (c *chacha20Poly1305Cipher) readCipherPacket(seqNum uint32, r io.Reader) ([]byte, error) {
-	nonce := [3]uint32{0, 0, bits.ReverseBytes32(seqNum)}
-	s := chacha20.New(c.contentKey, nonce)
-	var polyKey [32]byte
+	nonce := make([]byte, 12)
+	binary.BigEndian.PutUint32(nonce[8:], seqNum)
+	s, err := chacha20.NewUnauthenticatedCipher(c.contentKey[:], nonce)
+	if err != nil {
+		return nil, err
+	}
+	var polyKey, discardBuf [32]byte
 	s.XORKeyStream(polyKey[:], polyKey[:])
-	s.Advance() // skip next 32 bytes
+	s.XORKeyStream(discardBuf[:], discardBuf[:]) // skip the next 32 bytes

 	encryptedLength := c.buf[:4]
 	if _, err := io.ReadFull(r, encryptedLength); err != nil {
@ -678,7 +677,11 @@ func (c *chacha20Poly1305Cipher) readCipherPacket(seqNum uint32, r io.Reader) ([
 	}

 	var lenBytes [4]byte
-	chacha20.New(c.lengthKey, nonce).XORKeyStream(lenBytes[:], encryptedLength)
+	ls, err := chacha20.NewUnauthenticatedCipher(c.lengthKey[:], nonce)
+	if err != nil {
+		return nil, err
+	}
+	ls.XORKeyStream(lenBytes[:], encryptedLength)

 	length := binary.BigEndian.Uint32(lenBytes[:])
 	if length > maxPacket {
@ -724,11 +727,15 @@ func (c *chacha20Poly1305Cipher) readCipherPacket(seqNum uint32, r io.Reader) ([
 }

 func (c *chacha20Poly1305Cipher) writeCipherPacket(seqNum uint32, w io.Writer, rand io.Reader, payload []byte) error {
-	nonce := [3]uint32{0, 0, bits.ReverseBytes32(seqNum)}
-	s := chacha20.New(c.contentKey, nonce)
-	var polyKey [32]byte
+	nonce := make([]byte, 12)
+	binary.BigEndian.PutUint32(nonce[8:], seqNum)
+	s, err := chacha20.NewUnauthenticatedCipher(c.contentKey[:], nonce)
+	if err != nil {
+		return err
+	}
+	var polyKey, discardBuf [32]byte
 	s.XORKeyStream(polyKey[:], polyKey[:])
-	s.Advance() // skip next 32 bytes
+	s.XORKeyStream(discardBuf[:], discardBuf[:]) // skip the next 32 bytes

 	// There is no blocksize, so fall back to multiple of 8 byte
 	// padding, as described in RFC 4253, Sec 6.
@ -748,7 +755,11 @@ func (c *chacha20Poly1305Cipher) writeCipherPacket(seqNum uint32, w io.Writer, r
 	}

 	binary.BigEndian.PutUint32(c.buf, uint32(1+len(payload)+padding))
-	chacha20.New(c.lengthKey, nonce).XORKeyStream(c.buf, c.buf[:4])
+	ls, err := chacha20.NewUnauthenticatedCipher(c.lengthKey[:], nonce)
+	if err != nil {
+		return err
+	}
+	ls.XORKeyStream(c.buf, c.buf[:4])
 	c.buf[4] = byte(padding)
 	copy(c.buf[5:], payload)
 	packetEnd := 5 + len(payload) + padding
--- a/vendor/golang.org/x/crypto/ssh/client_auth.go
+++ b/vendor/golang.org/x/crypto/ssh/client_auth.go
@ -36,7 +36,7 @@ func (c *connection) clientAuthenticate(config *ClientConfig) error {

 	// during the authentication phase the client first attempts the "none" method
 	// then any untried methods suggested by the server.
-	tried := make(map[string]bool)
+	var tried []string
 	var lastMethods []string

 	sessionID := c.transport.getSessionID()
@ -49,7 +49,9 @@ func (c *connection) clientAuthenticate(config *ClientConfig) error {
 			// success
 			return nil
 		} else if ok == authFailure {
-			tried[auth.method()] = true
+			if m := auth.method(); !contains(tried, m) {
+				tried = append(tried, m)
+			}
 		}
 		if methods == nil {
 			methods = lastMethods
@ -61,7 +63,7 @@ func (c *connection) clientAuthenticate(config *ClientConfig) error {
 	findNext:
 		for _, a := range config.Auth {
 			candidateMethod := a.method()
-			if tried[candidateMethod] {
+			if contains(tried, candidateMethod) {
 				continue
 			}
 			for _, meth := range methods {
@ -72,16 +74,16 @@ func (c *connection) clientAuthenticate(config *ClientConfig) error {
 			}
 		}
 	}
-	return fmt.Errorf("ssh: unable to authenticate, attempted methods %v, no supported methods remain", keys(tried))
+	return fmt.Errorf("ssh: unable to authenticate, attempted methods %v, no supported methods remain", tried)
 }

-func keys(m map[string]bool) []string {
-	s := make([]string, 0, len(m))
-
-	for key := range m {
-		s = append(s, key)
+func contains(list []string, e string) bool {
+	for _, s := range list {
+		if s == e {
+			return true
+		}
 	}
-	return s
+	return false
 }

 // An AuthMethod represents an instance of an RFC 4252 authentication method.
--- a/vendor/golang.org/x/crypto/ssh/common.go
+++ b/vendor/golang.org/x/crypto/ssh/common.go
@ -58,6 +58,14 @@ var serverForbiddenKexAlgos = map[string]struct{}{
 	kexAlgoDHGEXSHA256: {}, // server half implementation is only minimal to satisfy the automated tests
 }

+// preferredKexAlgos specifies the default preference for key-exchange algorithms
+// in preference order.
+var preferredKexAlgos = []string{
+	kexAlgoCurve25519SHA256,
+	kexAlgoECDH256, kexAlgoECDH384, kexAlgoECDH521,
+	kexAlgoDH14SHA1,
+}
+
 // supportedHostKeyAlgos specifies the supported host-key algorithms (i.e. methods
 // of authenticating servers) in preference order.
 var supportedHostKeyAlgos = []string{
@ -246,7 +254,7 @@ func (c *Config) SetDefaults() {
 	c.Ciphers = ciphers

 	if c.KeyExchanges == nil {
-		c.KeyExchanges = supportedKexAlgos
+		c.KeyExchanges = preferredKexAlgos
 	}

 	if c.MACs == nil {
--- a/vendor/golang.org/x/crypto/ssh/internal/bcrypt_pbkdf/bcrypt_pbkdf.go
+++ b/vendor/golang.org/x/crypto/ssh/internal/bcrypt_pbkdf/bcrypt_pbkdf.go
@ -0,0 +1,93 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package bcrypt_pbkdf implements bcrypt_pbkdf(3) from OpenBSD.
+//
+// See https://flak.tedunangst.com/post/bcrypt-pbkdf and
+// https://cvsweb.openbsd.org/cgi-bin/cvsweb/src/lib/libutil/bcrypt_pbkdf.c.
+package bcrypt_pbkdf
+
+import (
+	"crypto/sha512"
+	"errors"
+	"golang.org/x/crypto/blowfish"
+)
+
+const blockSize = 32
+
+// Key derives a key from the password, salt and rounds count, returning a
+// []byte of length keyLen that can be used as cryptographic key.
+func Key(password, salt []byte, rounds, keyLen int) ([]byte, error) {
+	if rounds < 1 {
+		return nil, errors.New("bcrypt_pbkdf: number of rounds is too small")
+	}
+	if len(password) == 0 {
+		return nil, errors.New("bcrypt_pbkdf: empty password")
+	}
+	if len(salt) == 0 || len(salt) > 1<<20 {
+		return nil, errors.New("bcrypt_pbkdf: bad salt length")
+	}
+	if keyLen > 1024 {
+		return nil, errors.New("bcrypt_pbkdf: keyLen is too large")
+	}
+
+	numBlocks := (keyLen + blockSize - 1) / blockSize
+	key := make([]byte, numBlocks*blockSize)
+
+	h := sha512.New()
+	h.Write(password)
+	shapass := h.Sum(nil)
+
+	shasalt := make([]byte, 0, sha512.Size)
+	cnt, tmp := make([]byte, 4), make([]byte, blockSize)
+	for block := 1; block <= numBlocks; block++ {
+		h.Reset()
+		h.Write(salt)
+		cnt[0] = byte(block >> 24)
+		cnt[1] = byte(block >> 16)
+		cnt[2] = byte(block >> 8)
+		cnt[3] = byte(block)
+		h.Write(cnt)
+		bcryptHash(tmp, shapass, h.Sum(shasalt))
+
+		out := make([]byte, blockSize)
+		copy(out, tmp)
+		for i := 2; i <= rounds; i++ {
+			h.Reset()
+			h.Write(tmp)
+			bcryptHash(tmp, shapass, h.Sum(shasalt))
+			for j := 0; j < len(out); j++ {
+				out[j] ^= tmp[j]
+			}
+		}
+
+		for i, v := range out {
+			key[i*numBlocks+(block-1)] = v
+		}
+	}
+	return key[:keyLen], nil
+}
+
+var magic = []byte("OxychromaticBlowfishSwatDynamite")
+
+func bcryptHash(out, shapass, shasalt []byte) {
+	c, err := blowfish.NewSaltedCipher(shapass, shasalt)
+	if err != nil {
+		panic(err)
+	}
+	for i := 0; i < 64; i++ {
+		blowfish.ExpandKey(shasalt, c)
+		blowfish.ExpandKey(shapass, c)
+	}
+	copy(out, magic)
+	for i := 0; i < 32; i += 8 {
+		for j := 0; j < 64; j++ {
+			c.Encrypt(out[i:i+8], out[i:i+8])
+		}
+	}
+	// Swap bytes due to different endianness.
+	for i := 0; i < 32; i += 4 {
+		out[i+3], out[i+2], out[i+1], out[i] = out[i], out[i+1], out[i+2], out[i+3]
+	}
+}
--- a/vendor/golang.org/x/crypto/ssh/kex.go
+++ b/vendor/golang.org/x/crypto/ssh/kex.go
@ -212,7 +212,7 @@ func (group *dhGroup) Server(c packetConn, randSource io.Reader, magics *handsha
 		HostKey:   hostKeyBytes,
 		Signature: sig,
 		Hash:      crypto.SHA1,
-	}, nil
+	}, err
 }

 // ecdh performs Elliptic Curve Diffie-Hellman key exchange as
@ -572,7 +572,7 @@ func (gex *dhGEXSHA) diffieHellman(theirPublic, myPrivate *big.Int) (*big.Int, e
 	return new(big.Int).Exp(theirPublic, myPrivate, gex.p), nil
 }

-func (gex *dhGEXSHA) Client(c packetConn, randSource io.Reader, magics *handshakeMagics) (*kexResult, error) {
+func (gex dhGEXSHA) Client(c packetConn, randSource io.Reader, magics *handshakeMagics) (*kexResult, error) {
 	// Send GexRequest
 	kexDHGexRequest := kexDHGexRequestMsg{
 		MinBits:      dhGroupExchangeMinimumBits,
@ -677,7 +677,7 @@ func (gex *dhGEXSHA) Client(c packetConn, randSource io.Reader, magics *handshak
 // Server half implementation of the Diffie Hellman Key Exchange with SHA1 and SHA256.
 //
 // This is a minimal implementation to satisfy the automated tests.
-func (gex *dhGEXSHA) Server(c packetConn, randSource io.Reader, magics *handshakeMagics, priv Signer) (result *kexResult, err error) {
+func (gex dhGEXSHA) Server(c packetConn, randSource io.Reader, magics *handshakeMagics, priv Signer) (result *kexResult, err error) {
 	// Receive GexRequest
 	packet, err := c.readPacket()
 	if err != nil {
--- a/vendor/golang.org/x/crypto/ssh/keys.go
+++ b/vendor/golang.org/x/crypto/ssh/keys.go
@ -7,6 +7,8 @@ package ssh
 import (
 	"bytes"
 	"crypto"
+	"crypto/aes"
+	"crypto/cipher"
 	"crypto/dsa"
 	"crypto/ecdsa"
 	"crypto/elliptic"
@ -25,17 +27,20 @@ import (
 	"strings"

 	"golang.org/x/crypto/ed25519"
+	"golang.org/x/crypto/ssh/internal/bcrypt_pbkdf"
 )

 // These constants represent the algorithm names for key types supported by this
 // package.
 const (
-	KeyAlgoRSA      = "ssh-rsa"
-	KeyAlgoDSA      = "ssh-dss"
-	KeyAlgoECDSA256 = "ecdsa-sha2-nistp256"
-	KeyAlgoECDSA384 = "ecdsa-sha2-nistp384"
-	KeyAlgoECDSA521 = "ecdsa-sha2-nistp521"
-	KeyAlgoED25519  = "ssh-ed25519"
+	KeyAlgoRSA        = "ssh-rsa"
+	KeyAlgoDSA        = "ssh-dss"
+	KeyAlgoECDSA256   = "ecdsa-sha2-nistp256"
+	KeyAlgoSKECDSA256 = "sk-ecdsa-sha2-nistp256@openssh.com"
+	KeyAlgoECDSA384   = "ecdsa-sha2-nistp384"
+	KeyAlgoECDSA521   = "ecdsa-sha2-nistp521"
+	KeyAlgoED25519    = "ssh-ed25519"
+	KeyAlgoSKED25519  = "sk-ssh-ed25519@openssh.com"
 )

 // These constants represent non-default signature algorithms that are supported
@ -58,9 +63,13 @@ func parsePubKey(in []byte, algo string) (pubKey PublicKey, rest []byte, err err
 		return parseDSA(in)
 	case KeyAlgoECDSA256, KeyAlgoECDSA384, KeyAlgoECDSA521:
 		return parseECDSA(in)
+	case KeyAlgoSKECDSA256:
+		return parseSKECDSA(in)
 	case KeyAlgoED25519:
 		return parseED25519(in)
-	case CertAlgoRSAv01, CertAlgoDSAv01, CertAlgoECDSA256v01, CertAlgoECDSA384v01, CertAlgoECDSA521v01, CertAlgoED25519v01:
+	case KeyAlgoSKED25519:
+		return parseSKEd25519(in)
+	case CertAlgoRSAv01, CertAlgoDSAv01, CertAlgoECDSA256v01, CertAlgoECDSA384v01, CertAlgoECDSA521v01, CertAlgoSKECDSA256v01, CertAlgoED25519v01, CertAlgoSKED25519v01:
 		cert, err := parseCert(in, certToPrivAlgo(algo))
 		if err != nil {
 			return nil, nil, err
@ -553,9 +562,11 @@ func parseED25519(in []byte) (out PublicKey, rest []byte, err error) {
 		return nil, nil, err
 	}

-	key := ed25519.PublicKey(w.KeyBytes)
+	if l := len(w.KeyBytes); l != ed25519.PublicKeySize {
+		return nil, nil, fmt.Errorf("invalid size %d for Ed25519 public key", l)
+	}

-	return (ed25519PublicKey)(key), w.Rest, nil
+	return ed25519PublicKey(w.KeyBytes), w.Rest, nil
 }

 func (k ed25519PublicKey) Marshal() []byte {
@ -573,9 +584,11 @@ func (k ed25519PublicKey) Verify(b []byte, sig *Signature) error {
 	if sig.Format != k.Type() {
 		return fmt.Errorf("ssh: signature type %s for key type %s", sig.Format, k.Type())
 	}
+	if l := len(k); l != ed25519.PublicKeySize {
+		return fmt.Errorf("ssh: invalid size %d for Ed25519 public key", l)
+	}

-	edKey := (ed25519.PublicKey)(k)
-	if ok := ed25519.Verify(edKey, b, sig.Blob); !ok {
+	if ok := ed25519.Verify(ed25519.PublicKey(k), b, sig.Blob); !ok {
 		return errors.New("ssh: signature did not verify")
 	}

@ -685,6 +698,224 @@ func (k *ecdsaPublicKey) CryptoPublicKey() crypto.PublicKey {
 	return (*ecdsa.PublicKey)(k)
 }

+// skFields holds the additional fields present in U2F/FIDO2 signatures.
+// See openssh/PROTOCOL.u2f 'SSH U2F Signatures' for details.
+type skFields struct {
+	// Flags contains U2F/FIDO2 flags such as 'user present'
+	Flags byte
+	// Counter is a monotonic signature counter which can be
+	// used to detect concurrent use of a private key, should
+	// it be extracted from hardware.
+	Counter uint32
+}
+
+type skECDSAPublicKey struct {
+	// application is a URL-like string, typically "ssh:" for SSH.
+	// see openssh/PROTOCOL.u2f for details.
+	application string
+	ecdsa.PublicKey
+}
+
+func (k *skECDSAPublicKey) Type() string {
+	return KeyAlgoSKECDSA256
+}
+
+func (k *skECDSAPublicKey) nistID() string {
+	return "nistp256"
+}
+
+func parseSKECDSA(in []byte) (out PublicKey, rest []byte, err error) {
+	var w struct {
+		Curve       string
+		KeyBytes    []byte
+		Application string
+		Rest        []byte `ssh:"rest"`
+	}
+
+	if err := Unmarshal(in, &w); err != nil {
+		return nil, nil, err
+	}
+
+	key := new(skECDSAPublicKey)
+	key.application = w.Application
+
+	if w.Curve != "nistp256" {
+		return nil, nil, errors.New("ssh: unsupported curve")
+	}
+	key.Curve = elliptic.P256()
+
+	key.X, key.Y = elliptic.Unmarshal(key.Curve, w.KeyBytes)
+	if key.X == nil || key.Y == nil {
+		return nil, nil, errors.New("ssh: invalid curve point")
+	}
+
+	return key, w.Rest, nil
+}
+
+func (k *skECDSAPublicKey) Marshal() []byte {
+	// See RFC 5656, section 3.1.
+	keyBytes := elliptic.Marshal(k.Curve, k.X, k.Y)
+	w := struct {
+		Name        string
+		ID          string
+		Key         []byte
+		Application string
+	}{
+		k.Type(),
+		k.nistID(),
+		keyBytes,
+		k.application,
+	}
+
+	return Marshal(&w)
+}
+
+func (k *skECDSAPublicKey) Verify(data []byte, sig *Signature) error {
+	if sig.Format != k.Type() {
+		return fmt.Errorf("ssh: signature type %s for key type %s", sig.Format, k.Type())
+	}
+
+	h := ecHash(k.Curve).New()
+	h.Write([]byte(k.application))
+	appDigest := h.Sum(nil)
+
+	h.Reset()
+	h.Write(data)
+	dataDigest := h.Sum(nil)
+
+	var ecSig struct {
+		R *big.Int
+		S *big.Int
+	}
+	if err := Unmarshal(sig.Blob, &ecSig); err != nil {
+		return err
+	}
+
+	var skf skFields
+	if err := Unmarshal(sig.Rest, &skf); err != nil {
+		return err
+	}
+
+	blob := struct {
+		ApplicationDigest []byte `ssh:"rest"`
+		Flags             byte
+		Counter           uint32
+		MessageDigest     []byte `ssh:"rest"`
+	}{
+		appDigest,
+		skf.Flags,
+		skf.Counter,
+		dataDigest,
+	}
+
+	original := Marshal(blob)
+
+	h.Reset()
+	h.Write(original)
+	digest := h.Sum(nil)
+
+	if ecdsa.Verify((*ecdsa.PublicKey)(&k.PublicKey), digest, ecSig.R, ecSig.S) {
+		return nil
+	}
+	return errors.New("ssh: signature did not verify")
+}
+
+type skEd25519PublicKey struct {
+	// application is a URL-like string, typically "ssh:" for SSH.
+	// see openssh/PROTOCOL.u2f for details.
+	application string
+	ed25519.PublicKey
+}
+
+func (k *skEd25519PublicKey) Type() string {
+	return KeyAlgoSKED25519
+}
+
+func parseSKEd25519(in []byte) (out PublicKey, rest []byte, err error) {
+	var w struct {
+		KeyBytes    []byte
+		Application string
+		Rest        []byte `ssh:"rest"`
+	}
+
+	if err := Unmarshal(in, &w); err != nil {
+		return nil, nil, err
+	}
+
+	if l := len(w.KeyBytes); l != ed25519.PublicKeySize {
+		return nil, nil, fmt.Errorf("invalid size %d for Ed25519 public key", l)
+	}
+
+	key := new(skEd25519PublicKey)
+	key.application = w.Application
+	key.PublicKey = ed25519.PublicKey(w.KeyBytes)
+
+	return key, w.Rest, nil
+}
+
+func (k *skEd25519PublicKey) Marshal() []byte {
+	w := struct {
+		Name        string
+		KeyBytes    []byte
+		Application string
+	}{
+		KeyAlgoSKED25519,
+		[]byte(k.PublicKey),
+		k.application,
+	}
+	return Marshal(&w)
+}
+
+func (k *skEd25519PublicKey) Verify(data []byte, sig *Signature) error {
+	if sig.Format != k.Type() {
+		return fmt.Errorf("ssh: signature type %s for key type %s", sig.Format, k.Type())
+	}
+	if l := len(k.PublicKey); l != ed25519.PublicKeySize {
+		return fmt.Errorf("invalid size %d for Ed25519 public key", l)
+	}
+
+	h := sha256.New()
+	h.Write([]byte(k.application))
+	appDigest := h.Sum(nil)
+
+	h.Reset()
+	h.Write(data)
+	dataDigest := h.Sum(nil)
+
+	var edSig struct {
+		Signature []byte `ssh:"rest"`
+	}
+
+	if err := Unmarshal(sig.Blob, &edSig); err != nil {
+		return err
+	}
+
+	var skf skFields
+	if err := Unmarshal(sig.Rest, &skf); err != nil {
+		return err
+	}
+
+	blob := struct {
+		ApplicationDigest []byte `ssh:"rest"`
+		Flags             byte
+		Counter           uint32
+		MessageDigest     []byte `ssh:"rest"`
+	}{
+		appDigest,
+		skf.Flags,
+		skf.Counter,
+		dataDigest,
+	}
+
+	original := Marshal(blob)
+
+	if ok := ed25519.Verify(k.PublicKey, original, edSig.Signature); !ok {
+		return errors.New("ssh: signature did not verify")
+	}
+
+	return nil
+}
+
 // NewSignerFromKey takes an *rsa.PrivateKey, *dsa.PrivateKey,
 // *ecdsa.PrivateKey or any other crypto.Signer and returns a
 // corresponding Signer instance. ECDSA keys must use P-256, P-384 or
@ -830,14 +1061,18 @@ func NewPublicKey(key interface{}) (PublicKey, error) {
 	case *dsa.PublicKey:
 		return (*dsaPublicKey)(key), nil
 	case ed25519.PublicKey:
-		return (ed25519PublicKey)(key), nil
+		if l := len(key); l != ed25519.PublicKeySize {
+			return nil, fmt.Errorf("ssh: invalid size %d for Ed25519 public key", l)
+		}
+		return ed25519PublicKey(key), nil
 	default:
 		return nil, fmt.Errorf("ssh: unsupported key type %T", key)
 	}
 }

 // ParsePrivateKey returns a Signer from a PEM encoded private key. It supports
-// the same keys as ParseRawPrivateKey.
+// the same keys as ParseRawPrivateKey. If the private key is encrypted, it
+// will return a PassphraseMissingError.
 func ParsePrivateKey(pemBytes []byte) (Signer, error) {
 	key, err := ParseRawPrivateKey(pemBytes)
 	if err != nil {
@ -850,8 +1085,8 @@ func ParsePrivateKey(pemBytes []byte) (Signer, error) {
 // ParsePrivateKeyWithPassphrase returns a Signer from a PEM encoded private
 // key and passphrase. It supports the same keys as
 // ParseRawPrivateKeyWithPassphrase.
-func ParsePrivateKeyWithPassphrase(pemBytes, passPhrase []byte) (Signer, error) {
-	key, err := ParseRawPrivateKeyWithPassphrase(pemBytes, passPhrase)
+func ParsePrivateKeyWithPassphrase(pemBytes, passphrase []byte) (Signer, error) {
+	key, err := ParseRawPrivateKeyWithPassphrase(pemBytes, passphrase)
 	if err != nil {
 		return nil, err
 	}
@ -867,8 +1102,21 @@ func encryptedBlock(block *pem.Block) bool {
 	return strings.Contains(block.Headers["Proc-Type"], "ENCRYPTED")
 }

+// A PassphraseMissingError indicates that parsing this private key requires a
+// passphrase. Use ParsePrivateKeyWithPassphrase.
+type PassphraseMissingError struct {
+	// PublicKey will be set if the private key format includes an unencrypted
+	// public key along with the encrypted private key.
+	PublicKey PublicKey
+}
+
+func (*PassphraseMissingError) Error() string {
+	return "ssh: this private key is passphrase protected"
+}
+
 // ParseRawPrivateKey returns a private key from a PEM encoded private key. It
-// supports RSA (PKCS#1), PKCS#8, DSA (OpenSSL), and ECDSA private keys.
+// supports RSA (PKCS#1), PKCS#8, DSA (OpenSSL), and ECDSA private keys. If the
+// private key is encrypted, it will return a PassphraseMissingError.
 func ParseRawPrivateKey(pemBytes []byte) (interface{}, error) {
 	block, _ := pem.Decode(pemBytes)
 	if block == nil {
@ -876,7 +1124,7 @@ func ParseRawPrivateKey(pemBytes []byte) (interface{}, error) {
 	}

 	if encryptedBlock(block) {
-		return nil, errors.New("ssh: cannot decode encrypted private keys")
+		return nil, &PassphraseMissingError{}
 	}

 	switch block.Type {
@ -890,33 +1138,35 @@ func ParseRawPrivateKey(pemBytes []byte) (interface{}, error) {
 	case "DSA PRIVATE KEY":
 		return ParseDSAPrivateKey(block.Bytes)
 	case "OPENSSH PRIVATE KEY":
-		return parseOpenSSHPrivateKey(block.Bytes)
+		return parseOpenSSHPrivateKey(block.Bytes, unencryptedOpenSSHKey)
 	default:
 		return nil, fmt.Errorf("ssh: unsupported key type %q", block.Type)
 	}
 }

 // ParseRawPrivateKeyWithPassphrase returns a private key decrypted with
-// passphrase from a PEM encoded private key. If wrong passphrase, return
-// x509.IncorrectPasswordError.
-func ParseRawPrivateKeyWithPassphrase(pemBytes, passPhrase []byte) (interface{}, error) {
+// passphrase from a PEM encoded private key. If the passphrase is wrong, it
+// will return x509.IncorrectPasswordError.
+func ParseRawPrivateKeyWithPassphrase(pemBytes, passphrase []byte) (interface{}, error) {
 	block, _ := pem.Decode(pemBytes)
 	if block == nil {
 		return nil, errors.New("ssh: no key found")
 	}
-	buf := block.Bytes

-	if encryptedBlock(block) {
-		if x509.IsEncryptedPEMBlock(block) {
-			var err error
-			buf, err = x509.DecryptPEMBlock(block, passPhrase)
-			if err != nil {
-				if err == x509.IncorrectPasswordError {
-					return nil, err
-				}
-				return nil, fmt.Errorf("ssh: cannot decode encrypted private keys: %v", err)
-			}
+	if block.Type == "OPENSSH PRIVATE KEY" {
+		return parseOpenSSHPrivateKey(block.Bytes, passphraseProtectedOpenSSHKey(passphrase))
+	}
+
+	if !encryptedBlock(block) || !x509.IsEncryptedPEMBlock(block) {
+		return nil, errors.New("ssh: not an encrypted key")
+	}
+
+	buf, err := x509.DecryptPEMBlock(block, passphrase)
+	if err != nil {
+		if err == x509.IncorrectPasswordError {
+			return nil, err
 		}
+		return nil, fmt.Errorf("ssh: cannot decode encrypted private keys: %v", err)
 	}

 	switch block.Type {
@ -926,8 +1176,6 @@ func ParseRawPrivateKeyWithPassphrase(pemBytes, passPhrase []byte) (interface{},
 		return x509.ParseECPrivateKey(buf)
 	case "DSA PRIVATE KEY":
 		return ParseDSAPrivateKey(buf)
-	case "OPENSSH PRIVATE KEY":
-		return parseOpenSSHPrivateKey(buf)
 	default:
 		return nil, fmt.Errorf("ssh: unsupported key type %q", block.Type)
 	}
@ -965,9 +1213,68 @@ func ParseDSAPrivateKey(der []byte) (*dsa.PrivateKey, error) {
 	}, nil
 }

-// Implemented based on the documentation at
-// https://github.com/openssh/openssh-portable/blob/master/PROTOCOL.key
-func parseOpenSSHPrivateKey(key []byte) (crypto.PrivateKey, error) {
+func unencryptedOpenSSHKey(cipherName, kdfName, kdfOpts string, privKeyBlock []byte) ([]byte, error) {
+	if kdfName != "none" || cipherName != "none" {
+		return nil, &PassphraseMissingError{}
+	}
+	if kdfOpts != "" {
+		return nil, errors.New("ssh: invalid openssh private key")
+	}
+	return privKeyBlock, nil
+}
+
+func passphraseProtectedOpenSSHKey(passphrase []byte) openSSHDecryptFunc {
+	return func(cipherName, kdfName, kdfOpts string, privKeyBlock []byte) ([]byte, error) {
+		if kdfName == "none" || cipherName == "none" {
+			return nil, errors.New("ssh: key is not password protected")
+		}
+		if kdfName != "bcrypt" {
+			return nil, fmt.Errorf("ssh: unknown KDF %q, only supports %q", kdfName, "bcrypt")
+		}
+
+		var opts struct {
+			Salt   string
+			Rounds uint32
+		}
+		if err := Unmarshal([]byte(kdfOpts), &opts); err != nil {
+			return nil, err
+		}
+
+		k, err := bcrypt_pbkdf.Key(passphrase, []byte(opts.Salt), int(opts.Rounds), 32+16)
+		if err != nil {
+			return nil, err
+		}
+		key, iv := k[:32], k[32:]
+
+		c, err := aes.NewCipher(key)
+		if err != nil {
+			return nil, err
+		}
+		switch cipherName {
+		case "aes256-ctr":
+			ctr := cipher.NewCTR(c, iv)
+			ctr.XORKeyStream(privKeyBlock, privKeyBlock)
+		case "aes256-cbc":
+			if len(privKeyBlock)%c.BlockSize() != 0 {
+				return nil, fmt.Errorf("ssh: invalid encrypted private key length, not a multiple of the block size")
+			}
+			cbc := cipher.NewCBCDecrypter(c, iv)
+			cbc.CryptBlocks(privKeyBlock, privKeyBlock)
+		default:
+			return nil, fmt.Errorf("ssh: unknown cipher %q, only supports %q or %q", cipherName, "aes256-ctr", "aes256-cbc")
+		}
+
+		return privKeyBlock, nil
+	}
+}
+
+type openSSHDecryptFunc func(CipherName, KdfName, KdfOpts string, PrivKeyBlock []byte) ([]byte, error)
+
+// parseOpenSSHPrivateKey parses an OpenSSH private key, using the decrypt
+// function to unwrap the encrypted portion. unencryptedOpenSSHKey can be used
+// as the decrypt function to parse an unencrypted private key. See
+// https://github.com/openssh/openssh-portable/blob/master/PROTOCOL.key.
+func parseOpenSSHPrivateKey(key []byte, decrypt openSSHDecryptFunc) (crypto.PrivateKey, error) {
 	const magic = "openssh-key-v1\x00"
 	if len(key) < len(magic) || string(key[:len(magic)]) != magic {
 		return nil, errors.New("ssh: invalid openssh private key format")
@ -986,9 +1293,22 @@ func parseOpenSSHPrivateKey(key []byte) (crypto.PrivateKey, error) {
 	if err := Unmarshal(remaining, &w); err != nil {
 		return nil, err
 	}
+	if w.NumKeys != 1 {
+		// We only support single key files, and so does OpenSSH.
+		// https://github.com/openssh/openssh-portable/blob/4103a3ec7/sshkey.c#L4171
+		return nil, errors.New("ssh: multi-key files are not supported")
+	}

-	if w.KdfName != "none" || w.CipherName != "none" {
-		return nil, errors.New("ssh: cannot decode encrypted private keys")
+	privKeyBlock, err := decrypt(w.CipherName, w.KdfName, w.KdfOpts, w.PrivKeyBlock)
+	if err != nil {
+		if err, ok := err.(*PassphraseMissingError); ok {
+			pub, errPub := ParsePublicKey(w.PubKey)
+			if errPub != nil {
+				return nil, fmt.Errorf("ssh: failed to parse embedded public key: %v", errPub)
+			}
+			err.PublicKey = pub
+		}
+		return nil, err
 	}

 	pk1 := struct {
@ -998,15 +1318,13 @@ func parseOpenSSHPrivateKey(key []byte) (crypto.PrivateKey, error) {
 		Rest    []byte `ssh:"rest"`
 	}{}

-	if err := Unmarshal(w.PrivKeyBlock, &pk1); err != nil {
-		return nil, err
-	}
-
-	if pk1.Check1 != pk1.Check2 {
-		return nil, errors.New("ssh: checkint mismatch")
+	if err := Unmarshal(privKeyBlock, &pk1); err != nil || pk1.Check1 != pk1.Check2 {
+		if w.CipherName != "none" {
+			return nil, x509.IncorrectPasswordError
+		}
+		return nil, errors.New("ssh: malformed OpenSSH key")
 	}

-	// we only handle ed25519 and rsa keys currently
 	switch pk1.Keytype {
 	case KeyAlgoRSA:
 		// https://github.com/openssh/openssh-portable/blob/master/sshkey.c#L2760-L2773
@ -1025,10 +1343,8 @@ func parseOpenSSHPrivateKey(key []byte) (crypto.PrivateKey, error) {
 			return nil, err
 		}

-		for i, b := range key.Pad {
-			if int(b) != i+1 {
-				return nil, errors.New("ssh: padding not as expected")
-			}
+		if err := checkOpenSSHKeyPadding(key.Pad); err != nil {
+			return nil, err
 		}

 		pk := &rsa.PrivateKey{
@ -1063,20 +1379,78 @@ func parseOpenSSHPrivateKey(key []byte) (crypto.PrivateKey, error) {
 			return nil, errors.New("ssh: private key unexpected length")
 		}

-		for i, b := range key.Pad {
-			if int(b) != i+1 {
-				return nil, errors.New("ssh: padding not as expected")
-			}
+		if err := checkOpenSSHKeyPadding(key.Pad); err != nil {
+			return nil, err
 		}

 		pk := ed25519.PrivateKey(make([]byte, ed25519.PrivateKeySize))
 		copy(pk, key.Priv)
 		return &pk, nil
+	case KeyAlgoECDSA256, KeyAlgoECDSA384, KeyAlgoECDSA521:
+		key := struct {
+			Curve   string
+			Pub     []byte
+			D       *big.Int
+			Comment string
+			Pad     []byte `ssh:"rest"`
+		}{}
+
+		if err := Unmarshal(pk1.Rest, &key); err != nil {
+			return nil, err
+		}
+
+		if err := checkOpenSSHKeyPadding(key.Pad); err != nil {
+			return nil, err
+		}
+
+		var curve elliptic.Curve
+		switch key.Curve {
+		case "nistp256":
+			curve = elliptic.P256()
+		case "nistp384":
+			curve = elliptic.P384()
+		case "nistp521":
+			curve = elliptic.P521()
+		default:
+			return nil, errors.New("ssh: unhandled elliptic curve: " + key.Curve)
+		}
+
+		X, Y := elliptic.Unmarshal(curve, key.Pub)
+		if X == nil || Y == nil {
+			return nil, errors.New("ssh: failed to unmarshal public key")
+		}
+
+		if key.D.Cmp(curve.Params().N) >= 0 {
+			return nil, errors.New("ssh: scalar is out of range")
+		}
+
+		x, y := curve.ScalarBaseMult(key.D.Bytes())
+		if x.Cmp(X) != 0 || y.Cmp(Y) != 0 {
+			return nil, errors.New("ssh: public key does not match private key")
+		}
+
+		return &ecdsa.PrivateKey{
+			PublicKey: ecdsa.PublicKey{
+				Curve: curve,
+				X:     X,
+				Y:     Y,
+			},
+			D: key.D,
+		}, nil
 	default:
 		return nil, errors.New("ssh: unhandled key type")
 	}
 }

+func checkOpenSSHKeyPadding(pad []byte) error {
+	for i, b := range pad {
+		if int(b) != i+1 {
+			return errors.New("ssh: padding not as expected")
+		}
+	}
+	return nil
+}
+
 // FingerprintLegacyMD5 returns the user presentation of the key's
 // fingerprint as described by RFC 4716 section 4.
 func FingerprintLegacyMD5(pubKey PublicKey) string {
--- a/vendor/golang.org/x/crypto/ssh/mux.go
+++ b/vendor/golang.org/x/crypto/ssh/mux.go
@ -240,7 +240,7 @@ func (m *mux) onePacket() error {
 	id := binary.BigEndian.Uint32(packet[1:])
 	ch := m.chanList.getChan(id)
 	if ch == nil {
-		return fmt.Errorf("ssh: invalid channel %d", id)
+		return m.handleUnknownChannelPacket(id, packet)
 	}

 	return ch.handlePacket(packet)
@ -328,3 +328,24 @@ func (m *mux) openChannel(chanType string, extra []byte) (*channel, error) {
 		return nil, fmt.Errorf("ssh: unexpected packet in response to channel open: %T", msg)
 	}
 }
+
+func (m *mux) handleUnknownChannelPacket(id uint32, packet []byte) error {
+	msg, err := decode(packet)
+	if err != nil {
+		return err
+	}
+
+	switch msg := msg.(type) {
+	// RFC 4254 section 5.4 says unrecognized channel requests should
+	// receive a failure response.
+	case *channelRequestMsg:
+		if msg.WantReply {
+			return m.sendMessage(channelRequestFailureMsg{
+				PeersID: msg.PeersID,
+			})
+		}
+		return nil
+	default:
+		return fmt.Errorf("ssh: invalid channel %d", id)
+	}
+}
--- a/vendor/golang.org/x/crypto/ssh/server.go
+++ b/vendor/golang.org/x/crypto/ssh/server.go
@ -284,8 +284,8 @@ func (s *connection) serverHandshake(config *ServerConfig) (*Permissions, error)

 func isAcceptableAlgo(algo string) bool {
 	switch algo {
-	case KeyAlgoRSA, KeyAlgoDSA, KeyAlgoECDSA256, KeyAlgoECDSA384, KeyAlgoECDSA521, KeyAlgoED25519,
-		CertAlgoRSAv01, CertAlgoDSAv01, CertAlgoECDSA256v01, CertAlgoECDSA384v01, CertAlgoECDSA521v01, CertAlgoED25519v01:
+	case KeyAlgoRSA, KeyAlgoDSA, KeyAlgoECDSA256, KeyAlgoECDSA384, KeyAlgoECDSA521, KeyAlgoSKECDSA256, KeyAlgoED25519, KeyAlgoSKED25519,
+		CertAlgoRSAv01, CertAlgoDSAv01, CertAlgoECDSA256v01, CertAlgoECDSA384v01, CertAlgoECDSA521v01, CertAlgoSKECDSA256v01, CertAlgoED25519v01, CertAlgoSKED25519v01:
 		return true
 	}
 	return false
--- a/vendor/golang.org/x/net/html/const.go
+++ b/vendor/golang.org/x/net/html/const.go
@ -52,7 +52,6 @@ var isSpecialElementMap = map[string]bool{
 	"iframe":     true,
 	"img":        true,
 	"input":      true,
-	"isindex":    true, // The 'isindex' element has been removed, but keep it for backwards compatibility.
 	"keygen":     true,
 	"li":         true,
 	"link":       true,
--- a/vendor/golang.org/x/net/html/foreign.go
+++ b/vendor/golang.org/x/net/html/foreign.go
@ -172,7 +172,6 @@ var svgAttributeAdjustments = map[string]string{
 	"diffuseconstant":           "diffuseConstant",
 	"edgemode":                  "edgeMode",
 	"externalresourcesrequired": "externalResourcesRequired",
-	"filterres":                 "filterRes",
 	"filterunits":               "filterUnits",
 	"glyphref":                  "glyphRef",
 	"gradienttransform":         "gradientTransform",
--- a/vendor/golang.org/x/net/html/node.go
+++ b/vendor/golang.org/x/net/html/node.go
@ -18,6 +18,11 @@ const (
 	ElementNode
 	CommentNode
 	DoctypeNode
+	// RawNode nodes are not returned by the parser, but can be part of the
+	// Node tree passed to func Render to insert raw HTML (without escaping).
+	// If so, this package makes no guarantee that the rendered HTML is secure
+	// (from e.g. Cross Site Scripting attacks) or well-formed.
+	RawNode
 	scopeMarkerNode
 )

--- a/vendor/golang.org/x/net/html/parse.go
+++ b/vendor/golang.org/x/net/html/parse.go
@ -184,6 +184,17 @@ func (p *parser) clearStackToContext(s scope) {
 	}
 }

+// parseGenericRawTextElements implements the generic raw text element parsing
+// algorithm defined in 12.2.6.2.
+// https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text
+// TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part
+// officially, need to make tokenizer consider both states.
+func (p *parser) parseGenericRawTextElement() {
+	p.addElement()
+	p.originalIM = p.im
+	p.im = textIM
+}
+
 // generateImpliedEndTags pops nodes off the stack of open elements as long as
 // the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
 // If exceptions are specified, nodes with that name will not be popped off.
@ -192,16 +203,17 @@ func (p *parser) generateImpliedEndTags(exceptions ...string) {
 loop:
 	for i = len(p.oe) - 1; i >= 0; i-- {
 		n := p.oe[i]
-		if n.Type == ElementNode {
-			switch n.DataAtom {
-			case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
-				for _, except := range exceptions {
-					if n.Data == except {
-						break loop
-					}
+		if n.Type != ElementNode {
+			break
+		}
+		switch n.DataAtom {
+		case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
+			for _, except := range exceptions {
+				if n.Data == except {
+					break loop
 				}
-				continue
 			}
+			continue
 		}
 		break
 	}
@ -369,8 +381,7 @@ findIdenticalElements:
 // Section 12.2.4.3.
 func (p *parser) clearActiveFormattingElements() {
 	for {
-		n := p.afe.pop()
-		if len(p.afe) == 0 || n.Type == scopeMarkerNode {
+		if n := p.afe.pop(); len(p.afe) == 0 || n.Type == scopeMarkerNode {
 			return
 		}
 	}
@ -625,25 +636,29 @@ func inHeadIM(p *parser) bool {
 		switch p.tok.DataAtom {
 		case a.Html:
 			return inBodyIM(p)
-		case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta:
+		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta:
 			p.addElement()
 			p.oe.pop()
 			p.acknowledgeSelfClosingTag()
 			return true
 		case a.Noscript:
-			p.addElement()
 			if p.scripting {
-				p.setOriginalIM()
-				p.im = textIM
-			} else {
-				p.im = inHeadNoscriptIM
+				p.parseGenericRawTextElement()
+				return true
 			}
+			p.addElement()
+			p.im = inHeadNoscriptIM
+			// Don't let the tokenizer go into raw text mode when scripting is disabled.
+			p.tokenizer.NextIsNotRawText()
 			return true
-		case a.Script, a.Title, a.Noframes, a.Style:
+		case a.Script, a.Title:
 			p.addElement()
 			p.setOriginalIM()
 			p.im = textIM
 			return true
+		case a.Noframes, a.Style:
+			p.parseGenericRawTextElement()
+			return true
 		case a.Head:
 			// Ignore the token.
 			return true
@ -855,7 +870,7 @@ func inBodyIM(p *parser) bool {
 				return true
 			}
 			copyAttributes(p.oe[0], p.tok)
-		case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
+		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
 			return inHeadIM(p)
 		case a.Body:
 			if p.oe.contains(a.Template) {
@ -881,7 +896,7 @@ func inBodyIM(p *parser) bool {
 			p.addElement()
 			p.im = inFramesetIM
 			return true
-		case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
+		case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Main, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
 			p.popUntil(buttonScope, a.P)
 			p.addElement()
 		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
@ -1014,53 +1029,6 @@ func inBodyIM(p *parser) bool {
 			p.tok.DataAtom = a.Img
 			p.tok.Data = a.Img.String()
 			return false
-		case a.Isindex:
-			if p.form != nil {
-				// Ignore the token.
-				return true
-			}
-			action := ""
-			prompt := "This is a searchable index. Enter search keywords: "
-			attr := []Attribute{{Key: "name", Val: "isindex"}}
-			for _, t := range p.tok.Attr {
-				switch t.Key {
-				case "action":
-					action = t.Val
-				case "name":
-					// Ignore the attribute.
-				case "prompt":
-					prompt = t.Val
-				default:
-					attr = append(attr, t)
-				}
-			}
-			p.acknowledgeSelfClosingTag()
-			p.popUntil(buttonScope, a.P)
-			p.parseImpliedToken(StartTagToken, a.Form, a.Form.String())
-			if p.form == nil {
-				// NOTE: The 'isindex' element has been removed,
-				// and the 'template' element has not been designed to be
-				// collaborative with the index element.
-				//
-				// Ignore the token.
-				return true
-			}
-			if action != "" {
-				p.form.Attr = []Attribute{{Key: "action", Val: action}}
-			}
-			p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
-			p.parseImpliedToken(StartTagToken, a.Label, a.Label.String())
-			p.addText(prompt)
-			p.addChild(&Node{
-				Type:     ElementNode,
-				DataAtom: a.Input,
-				Data:     a.Input.String(),
-				Attr:     attr,
-			})
-			p.oe.pop()
-			p.parseImpliedToken(EndTagToken, a.Label, a.Label.String())
-			p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
-			p.parseImpliedToken(EndTagToken, a.Form, a.Form.String())
 		case a.Textarea:
 			p.addElement()
 			p.setOriginalIM()
@ -1070,18 +1038,21 @@ func inBodyIM(p *parser) bool {
 			p.popUntil(buttonScope, a.P)
 			p.reconstructActiveFormattingElements()
 			p.framesetOK = false
-			p.addElement()
-			p.setOriginalIM()
-			p.im = textIM
+			p.parseGenericRawTextElement()
 		case a.Iframe:
 			p.framesetOK = false
+			p.parseGenericRawTextElement()
+		case a.Noembed:
+			p.parseGenericRawTextElement()
+		case a.Noscript:
+			if p.scripting {
+				p.parseGenericRawTextElement()
+				return true
+			}
+			p.reconstructActiveFormattingElements()
 			p.addElement()
-			p.setOriginalIM()
-			p.im = textIM
-		case a.Noembed, a.Noscript:
-			p.addElement()
-			p.setOriginalIM()
-			p.im = textIM
+			// Don't let the tokenizer go into raw text mode when scripting is disabled.
+			p.tokenizer.NextIsNotRawText()
 		case a.Select:
 			p.reconstructActiveFormattingElements()
 			p.addElement()
@ -1137,7 +1108,7 @@ func inBodyIM(p *parser) bool {
 				return false
 			}
 			return true
-		case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
+		case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Main, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
 			p.popUntil(defaultScope, p.tok.DataAtom)
 		case a.Form:
 			if p.oe.contains(a.Template) {
@ -1198,14 +1169,13 @@ func inBodyIM(p *parser) bool {
 		if len(p.templateStack) > 0 {
 			p.im = inTemplateIM
 			return false
-		} else {
-			for _, e := range p.oe {
-				switch e.DataAtom {
-				case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
-					a.Thead, a.Tr, a.Body, a.Html:
-				default:
-					return true
-				}
+		}
+		for _, e := range p.oe {
+			switch e.DataAtom {
+			case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
+				a.Thead, a.Tr, a.Body, a.Html:
+			default:
+				return true
 			}
 		}
 	}
@ -1221,9 +1191,15 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
 	// Once the code successfully parses the comprehensive test suite, we should
 	// refactor this code to be more idiomatic.

-	// Steps 1-4. The outer loop.
+	// Steps 1-2
+	if current := p.oe.top(); current.Data == tagName && p.afe.index(current) == -1 {
+		p.oe.pop()
+		return
+	}
+
+	// Steps 3-5. The outer loop.
 	for i := 0; i < 8; i++ {
-		// Step 5. Find the formatting element.
+		// Step 6. Find the formatting element.
 		var formattingElement *Node
 		for j := len(p.afe) - 1; j >= 0; j-- {
 			if p.afe[j].Type == scopeMarkerNode {
@ -1238,17 +1214,22 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
 			p.inBodyEndTagOther(tagAtom, tagName)
 			return
 		}
+
+		// Step 7. Ignore the tag if formatting element is not in the stack of open elements.
 		feIndex := p.oe.index(formattingElement)
 		if feIndex == -1 {
 			p.afe.remove(formattingElement)
 			return
 		}
+		// Step 8. Ignore the tag if formatting element is not in the scope.
 		if !p.elementInScope(defaultScope, tagAtom) {
 			// Ignore the tag.
 			return
 		}

-		// Steps 9-10. Find the furthest block.
+		// Step 9. This step is omitted because it's just a parse error but no need to return.
+
+		// Steps 10-11. Find the furthest block.
 		var furthestBlock *Node
 		for _, e := range p.oe[feIndex:] {
 			if isSpecialElement(e) {
@ -1265,47 +1246,65 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
 			return
 		}

-		// Steps 11-12. Find the common ancestor and bookmark node.
+		// Steps 12-13. Find the common ancestor and bookmark node.
 		commonAncestor := p.oe[feIndex-1]
 		bookmark := p.afe.index(formattingElement)

-		// Step 13. The inner loop. Find the lastNode to reparent.
+		// Step 14. The inner loop. Find the lastNode to reparent.
 		lastNode := furthestBlock
 		node := furthestBlock
 		x := p.oe.index(node)
-		// Steps 13.1-13.2
-		for j := 0; j < 3; j++ {
-			// Step 13.3.
+		// Step 14.1.
+		j := 0
+		for {
+			// Step 14.2.
+			j++
+			// Step. 14.3.
 			x--
 			node = p.oe[x]
-			// Step 13.4 - 13.5.
+			// Step 14.4. Go to the next step if node is formatting element.
+			if node == formattingElement {
+				break
+			}
+			// Step 14.5. Remove node from the list of active formatting elements if
+			// inner loop counter is greater than three and node is in the list of
+			// active formatting elements.
+			if ni := p.afe.index(node); j > 3 && ni > -1 {
+				p.afe.remove(node)
+				// If any element of the list of active formatting elements is removed,
+				// we need to take care whether bookmark should be decremented or not.
+				// This is because the value of bookmark may exceed the size of the
+				// list by removing elements from the list.
+				if ni <= bookmark {
+					bookmark--
+				}
+				continue
+			}
+			// Step 14.6. Continue the next inner loop if node is not in the list of
+			// active formatting elements.
 			if p.afe.index(node) == -1 {
 				p.oe.remove(node)
 				continue
 			}
-			// Step 13.6.
-			if node == formattingElement {
-				break
-			}
-			// Step 13.7.
+			// Step 14.7.
 			clone := node.clone()
 			p.afe[p.afe.index(node)] = clone
 			p.oe[p.oe.index(node)] = clone
 			node = clone
-			// Step 13.8.
+			// Step 14.8.
 			if lastNode == furthestBlock {
 				bookmark = p.afe.index(node) + 1
 			}
-			// Step 13.9.
+			// Step 14.9.
 			if lastNode.Parent != nil {
 				lastNode.Parent.RemoveChild(lastNode)
 			}
 			node.AppendChild(lastNode)
-			// Step 13.10.
+			// Step 14.10.
 			lastNode = node
 		}

-		// Step 14. Reparent lastNode to the common ancestor,
+		// Step 15. Reparent lastNode to the common ancestor,
 		// or for misnested table nodes, to the foster parent.
 		if lastNode.Parent != nil {
 			lastNode.Parent.RemoveChild(lastNode)
@ -1317,13 +1316,13 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
 			commonAncestor.AppendChild(lastNode)
 		}

-		// Steps 15-17. Reparent nodes from the furthest block's children
+		// Steps 16-18. Reparent nodes from the furthest block's children
 		// to a clone of the formatting element.
 		clone := formattingElement.clone()
 		reparentChildren(clone, furthestBlock)
 		furthestBlock.AppendChild(clone)

-		// Step 18. Fix up the list of active formatting elements.
+		// Step 19. Fix up the list of active formatting elements.
 		if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
 			// Move the bookmark with the rest of the list.
 			bookmark--
@ -1331,7 +1330,7 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
 		p.afe.remove(formattingElement)
 		p.afe.insert(bookmark, clone)

-		// Step 19. Fix up the stack of open elements.
+		// Step 20. Fix up the stack of open elements.
 		p.oe.remove(formattingElement)
 		p.oe.insert(p.oe.index(furthestBlock)+1, clone)
 	}
@ -1502,14 +1501,13 @@ func inCaptionIM(p *parser) bool {
 	case StartTagToken:
 		switch p.tok.DataAtom {
 		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
-			if p.popUntil(tableScope, a.Caption) {
-				p.clearActiveFormattingElements()
-				p.im = inTableIM
-				return false
-			} else {
+			if !p.popUntil(tableScope, a.Caption) {
 				// Ignore the token.
 				return true
 			}
+			p.clearActiveFormattingElements()
+			p.im = inTableIM
+			return false
 		case a.Select:
 			p.reconstructActiveFormattingElements()
 			p.addElement()
@ -1526,14 +1524,13 @@ func inCaptionIM(p *parser) bool {
 			}
 			return true
 		case a.Table:
-			if p.popUntil(tableScope, a.Caption) {
-				p.clearActiveFormattingElements()
-				p.im = inTableIM
-				return false
-			} else {
+			if !p.popUntil(tableScope, a.Caption) {
 				// Ignore the token.
 				return true
 			}
+			p.clearActiveFormattingElements()
+			p.im = inTableIM
+			return false
 		case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
 			// Ignore the token.
 			return true
@ -1777,12 +1774,11 @@ func inSelectIM(p *parser) bool {
 			}
 			p.addElement()
 		case a.Select:
-			if p.popUntil(selectScope, a.Select) {
-				p.resetInsertionMode()
-			} else {
+			if !p.popUntil(selectScope, a.Select) {
 				// Ignore the token.
 				return true
 			}
+			p.resetInsertionMode()
 		case a.Input, a.Keygen, a.Textarea:
 			if p.elementInScope(selectScope, a.Select) {
 				p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
@ -1810,12 +1806,11 @@ func inSelectIM(p *parser) bool {
 				p.oe = p.oe[:i]
 			}
 		case a.Select:
-			if p.popUntil(selectScope, a.Select) {
-				p.resetInsertionMode()
-			} else {
+			if !p.popUntil(selectScope, a.Select) {
 				// Ignore the token.
 				return true
 			}
+			p.resetInsertionMode()
 		case a.Template:
 			return inHeadIM(p)
 		}
@ -2136,28 +2131,31 @@ func parseForeignContent(p *parser) bool {
 			Data: p.tok.Data,
 		})
 	case StartTagToken:
-		b := breakout[p.tok.Data]
-		if p.tok.DataAtom == a.Font {
-		loop:
-			for _, attr := range p.tok.Attr {
-				switch attr.Key {
-				case "color", "face", "size":
-					b = true
-					break loop
+		if !p.fragment {
+			b := breakout[p.tok.Data]
+			if p.tok.DataAtom == a.Font {
+			loop:
+				for _, attr := range p.tok.Attr {
+					switch attr.Key {
+					case "color", "face", "size":
+						b = true
+						break loop
+					}
 				}
 			}
-		}
-		if b {
-			for i := len(p.oe) - 1; i >= 0; i-- {
-				n := p.oe[i]
-				if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
-					p.oe = p.oe[:i+1]
-					break
+			if b {
+				for i := len(p.oe) - 1; i >= 0; i-- {
+					n := p.oe[i]
+					if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
+						p.oe = p.oe[:i+1]
+						break
+					}
 				}
+				return false
 			}
-			return false
 		}
-		switch p.top().Namespace {
+		current := p.adjustedCurrentNode()
+		switch current.Namespace {
 		case "math":
 			adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
 		case "svg":
@ -2172,7 +2170,7 @@ func parseForeignContent(p *parser) bool {
 			panic("html: bad parser state: unexpected namespace")
 		}
 		adjustForeignAttributes(p.tok.Attr)
-		namespace := p.top().Namespace
+		namespace := current.Namespace
 		p.addElement()
 		p.top().Namespace = namespace
 		if namespace != "" {
@ -2201,12 +2199,20 @@ func parseForeignContent(p *parser) bool {
 	return true
 }

+// Section 12.2.4.2.
+func (p *parser) adjustedCurrentNode() *Node {
+	if len(p.oe) == 1 && p.fragment && p.context != nil {
+		return p.context
+	}
+	return p.oe.top()
+}
+
 // Section 12.2.6.
 func (p *parser) inForeignContent() bool {
 	if len(p.oe) == 0 {
 		return false
 	}
-	n := p.oe[len(p.oe)-1]
+	n := p.adjustedCurrentNode()
 	if n.Namespace == "" {
 		return false
 	}
@ -2341,8 +2347,7 @@ func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) {
 		f(p)
 	}

-	err := p.parse()
-	if err != nil {
+	if err := p.parse(); err != nil {
 		return nil, err
 	}
 	return p.doc, nil
@ -2364,7 +2369,6 @@ func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) (
 		contextTag = context.DataAtom.String()
 	}
 	p := &parser{
-		tokenizer: NewTokenizerFragment(r, contextTag),
 		doc: &Node{
 			Type: DocumentNode,
 		},
@ -2372,6 +2376,11 @@ func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) (
 		fragment:  true,
 		context:   context,
 	}
+	if context != nil && context.Namespace != "" {
+		p.tokenizer = NewTokenizer(r)
+	} else {
+		p.tokenizer = NewTokenizerFragment(r, contextTag)
+	}

 	for _, f := range opts {
 		f(p)
@ -2396,8 +2405,7 @@ func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) (
 		}
 	}

-	err := p.parse()
-	if err != nil {
+	if err := p.parse(); err != nil {
 		return nil, err
 	}

--- a/vendor/golang.org/x/net/html/render.go
+++ b/vendor/golang.org/x/net/html/render.go
@ -134,6 +134,9 @@ func render1(w writer, n *Node) error {
 			}
 		}
 		return w.WriteByte('>')
+	case RawNode:
+		_, err := w.WriteString(n.Data)
+		return err
 	default:
 		return errors.New("html: unknown node type")
 	}
@ -252,20 +255,19 @@ func writeQuoted(w writer, s string) error {
 // Section 12.1.2, "Elements", gives this list of void elements. Void elements
 // are those that can't have any contents.
 var voidElements = map[string]bool{
-	"area":    true,
-	"base":    true,
-	"br":      true,
-	"col":     true,
-	"command": true,
-	"embed":   true,
-	"hr":      true,
-	"img":     true,
-	"input":   true,
-	"keygen":  true,
-	"link":    true,
-	"meta":    true,
-	"param":   true,
-	"source":  true,
-	"track":   true,
-	"wbr":     true,
+	"area":   true,
+	"base":   true,
+	"br":     true,
+	"col":    true,
+	"embed":  true,
+	"hr":     true,
+	"img":    true,
+	"input":  true,
+	"keygen": true,
+	"link":   true,
+	"meta":   true,
+	"param":  true,
+	"source": true,
+	"track":  true,
+	"wbr":    true,
 }
--- a/vendor/golang.org/x/net/html/token.go
+++ b/vendor/golang.org/x/net/html/token.go
@ -296,8 +296,7 @@ func (z *Tokenizer) Buffered() []byte {
 // too many times in succession.
 func readAtLeastOneByte(r io.Reader, b []byte) (int, error) {
 	for i := 0; i < 100; i++ {
-		n, err := r.Read(b)
-		if n != 0 || err != nil {
+		if n, err := r.Read(b); n != 0 || err != nil {
 			return n, err
 		}
 	}
@ -347,6 +346,7 @@ loop:
 			break loop
 		}
 		if c != '/' {
+			z.raw.end--
 			continue loop
 		}
 		if z.readRawEndTag() || z.err != nil {
@ -1067,6 +1067,11 @@ loop:

 // Raw returns the unmodified text of the current token. Calling Next, Token,
 // Text, TagName or TagAttr may change the contents of the returned slice.
+//
+// The token stream's raw bytes partition the byte stream (up until an
+// ErrorToken). There are no overlaps or gaps between two consecutive token's
+// raw bytes. One implication is that the byte offset of the current token is
+// the sum of the lengths of all previous tokens' raw bytes.
 func (z *Tokenizer) Raw() []byte {
 	return z.buf[z.raw.start:z.raw.end]
 }
--- a/vendor/golang.org/x/sys/cpu/byteorder.go
+++ b/vendor/golang.org/x/sys/cpu/byteorder.go
@ -5,26 +5,61 @@
 package cpu

 import (
-	"encoding/binary"
 	"runtime"
 )

-// hostByteOrder returns binary.LittleEndian on little-endian machines and
-// binary.BigEndian on big-endian machines.
-func hostByteOrder() binary.ByteOrder {
+// byteOrder is a subset of encoding/binary.ByteOrder.
+type byteOrder interface {
+	Uint32([]byte) uint32
+	Uint64([]byte) uint64
+}
+
+type littleEndian struct{}
+type bigEndian struct{}
+
+func (littleEndian) Uint32(b []byte) uint32 {
+	_ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
+	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
+}
+
+func (littleEndian) Uint64(b []byte) uint64 {
+	_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
+	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
+		uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
+}
+
+func (bigEndian) Uint32(b []byte) uint32 {
+	_ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
+	return uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24
+}
+
+func (bigEndian) Uint64(b []byte) uint64 {
+	_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
+	return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 |
+		uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56
+}
+
+// hostByteOrder returns littleEndian on little-endian machines and
+// bigEndian on big-endian machines.
+func hostByteOrder() byteOrder {
 	switch runtime.GOARCH {
 	case "386", "amd64", "amd64p32",
+		"alpha",
 		"arm", "arm64",
 		"mipsle", "mips64le", "mips64p32le",
+		"nios2",
 		"ppc64le",
-		"riscv", "riscv64":
-		return binary.LittleEndian
+		"riscv", "riscv64",
+		"sh":
+		return littleEndian{}
 	case "armbe", "arm64be",
+		"m68k",
 		"mips", "mips64", "mips64p32",
 		"ppc", "ppc64",
 		"s390", "s390x",
+		"shbe",
 		"sparc", "sparc64":
-		return binary.BigEndian
+		return bigEndian{}
 	}
 	panic("unknown architecture")
 }
--- a/vendor/golang.org/x/sys/cpu/cpu.go
+++ b/vendor/golang.org/x/sys/cpu/cpu.go
@ -78,6 +78,51 @@ var ARM64 struct {
 	_           CacheLinePad
 }

+// ARM contains the supported CPU features of the current ARM (32-bit) platform.
+// All feature flags are false if:
+//   1. the current platform is not arm, or
+//   2. the current operating system is not Linux.
+var ARM struct {
+	_           CacheLinePad
+	HasSWP      bool // SWP instruction support
+	HasHALF     bool // Half-word load and store support
+	HasTHUMB    bool // ARM Thumb instruction set
+	Has26BIT    bool // Address space limited to 26-bits
+	HasFASTMUL  bool // 32-bit operand, 64-bit result multiplication support
+	HasFPA      bool // Floating point arithmetic support
+	HasVFP      bool // Vector floating point support
+	HasEDSP     bool // DSP Extensions support
+	HasJAVA     bool // Java instruction set
+	HasIWMMXT   bool // Intel Wireless MMX technology support
+	HasCRUNCH   bool // MaverickCrunch context switching and handling
+	HasTHUMBEE  bool // Thumb EE instruction set
+	HasNEON     bool // NEON instruction set
+	HasVFPv3    bool // Vector floating point version 3 support
+	HasVFPv3D16 bool // Vector floating point version 3 D8-D15
+	HasTLS      bool // Thread local storage support
+	HasVFPv4    bool // Vector floating point version 4 support
+	HasIDIVA    bool // Integer divide instruction support in ARM mode
+	HasIDIVT    bool // Integer divide instruction support in Thumb mode
+	HasVFPD32   bool // Vector floating point version 3 D15-D31
+	HasLPAE     bool // Large Physical Address Extensions
+	HasEVTSTRM  bool // Event stream support
+	HasAES      bool // AES hardware implementation
+	HasPMULL    bool // Polynomial multiplication instruction set
+	HasSHA1     bool // SHA1 hardware implementation
+	HasSHA2     bool // SHA2 hardware implementation
+	HasCRC32    bool // CRC32 hardware implementation
+	_           CacheLinePad
+}
+
+// MIPS64X contains the supported CPU features of the current mips64/mips64le
+// platforms. If the current platform is not mips64/mips64le or the current
+// operating system is not Linux then all feature flags are false.
+var MIPS64X struct {
+	_      CacheLinePad
+	HasMSA bool // MIPS SIMD architecture
+	_      CacheLinePad
+}
+
 // PPC64 contains the supported CPU features of the current ppc64/ppc64le platforms.
 // If the current platform is not ppc64/ppc64le then all feature flags are false.
 //
--- a/vendor/golang.org/x/sys/cpu/cpu_aix_ppc64.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_aix_ppc64.go
@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// +build aix,ppc64
+// +build aix

 package cpu

--- a/vendor/golang.org/x/sys/cpu/cpu_arm.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_arm.go
@ -6,4 +6,35 @@ package cpu

 const cacheLineSize = 32

-func doinit() {}
+// HWCAP/HWCAP2 bits.
+// These are specific to Linux.
+const (
+	hwcap_SWP       = 1 << 0
+	hwcap_HALF      = 1 << 1
+	hwcap_THUMB     = 1 << 2
+	hwcap_26BIT     = 1 << 3
+	hwcap_FAST_MULT = 1 << 4
+	hwcap_FPA       = 1 << 5
+	hwcap_VFP       = 1 << 6
+	hwcap_EDSP      = 1 << 7
+	hwcap_JAVA      = 1 << 8
+	hwcap_IWMMXT    = 1 << 9
+	hwcap_CRUNCH    = 1 << 10
+	hwcap_THUMBEE   = 1 << 11
+	hwcap_NEON      = 1 << 12
+	hwcap_VFPv3     = 1 << 13
+	hwcap_VFPv3D16  = 1 << 14
+	hwcap_TLS       = 1 << 15
+	hwcap_VFPv4     = 1 << 16
+	hwcap_IDIVA     = 1 << 17
+	hwcap_IDIVT     = 1 << 18
+	hwcap_VFPD32    = 1 << 19
+	hwcap_LPAE      = 1 << 20
+	hwcap_EVTSTRM   = 1 << 21
+
+	hwcap2_AES   = 1 << 0
+	hwcap2_PMULL = 1 << 1
+	hwcap2_SHA1  = 1 << 2
+	hwcap2_SHA2  = 1 << 3
+	hwcap2_CRC32 = 1 << 4
+)
--- a/vendor/golang.org/x/sys/cpu/cpu_arm64.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_arm64.go
@ -0,0 +1,144 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+import "runtime"
+
+const cacheLineSize = 64
+
+func init() {
+	switch runtime.GOOS {
+	case "android", "darwin", "netbsd":
+		// Android and iOS don't seem to allow reading these registers.
+		//
+		// NetBSD:
+		// ID_AA64ISAR0_EL1 is a privileged register and cannot be read from EL0.
+		// It can be read via sysctl(3). Example for future implementers:
+		// https://nxr.netbsd.org/xref/src/usr.sbin/cpuctl/arch/aarch64.c
+		//
+		// Fake the minimal features expected by
+		// TestARM64minimalFeatures.
+		ARM64.HasASIMD = true
+		ARM64.HasFP = true
+	case "linux":
+		doinit()
+	default:
+		readARM64Registers()
+	}
+}
+
+func readARM64Registers() {
+	Initialized = true
+
+	// ID_AA64ISAR0_EL1
+	isar0 := getisar0()
+
+	switch extractBits(isar0, 4, 7) {
+	case 1:
+		ARM64.HasAES = true
+	case 2:
+		ARM64.HasAES = true
+		ARM64.HasPMULL = true
+	}
+
+	switch extractBits(isar0, 8, 11) {
+	case 1:
+		ARM64.HasSHA1 = true
+	}
+
+	switch extractBits(isar0, 12, 15) {
+	case 1:
+		ARM64.HasSHA2 = true
+	case 2:
+		ARM64.HasSHA2 = true
+		ARM64.HasSHA512 = true
+	}
+
+	switch extractBits(isar0, 16, 19) {
+	case 1:
+		ARM64.HasCRC32 = true
+	}
+
+	switch extractBits(isar0, 20, 23) {
+	case 2:
+		ARM64.HasATOMICS = true
+	}
+
+	switch extractBits(isar0, 28, 31) {
+	case 1:
+		ARM64.HasASIMDRDM = true
+	}
+
+	switch extractBits(isar0, 32, 35) {
+	case 1:
+		ARM64.HasSHA3 = true
+	}
+
+	switch extractBits(isar0, 36, 39) {
+	case 1:
+		ARM64.HasSM3 = true
+	}
+
+	switch extractBits(isar0, 40, 43) {
+	case 1:
+		ARM64.HasSM4 = true
+	}
+
+	switch extractBits(isar0, 44, 47) {
+	case 1:
+		ARM64.HasASIMDDP = true
+	}
+
+	// ID_AA64ISAR1_EL1
+	isar1 := getisar1()
+
+	switch extractBits(isar1, 0, 3) {
+	case 1:
+		ARM64.HasDCPOP = true
+	}
+
+	switch extractBits(isar1, 12, 15) {
+	case 1:
+		ARM64.HasJSCVT = true
+	}
+
+	switch extractBits(isar1, 16, 19) {
+	case 1:
+		ARM64.HasFCMA = true
+	}
+
+	switch extractBits(isar1, 20, 23) {
+	case 1:
+		ARM64.HasLRCPC = true
+	}
+
+	// ID_AA64PFR0_EL1
+	pfr0 := getpfr0()
+
+	switch extractBits(pfr0, 16, 19) {
+	case 0:
+		ARM64.HasFP = true
+	case 1:
+		ARM64.HasFP = true
+		ARM64.HasFPHP = true
+	}
+
+	switch extractBits(pfr0, 20, 23) {
+	case 0:
+		ARM64.HasASIMD = true
+	case 1:
+		ARM64.HasASIMD = true
+		ARM64.HasASIMDHP = true
+	}
+
+	switch extractBits(pfr0, 32, 35) {
+	case 1:
+		ARM64.HasSVE = true
+	}
+}
+
+func extractBits(data uint64, start, end uint) uint {
+	return (uint)(data>>start) & ((1 << (end - start + 1)) - 1)
+}
--- a/vendor/golang.org/x/sys/cpu/cpu_arm64.s
+++ b/vendor/golang.org/x/sys/cpu/cpu_arm64.s
@ -0,0 +1,31 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !gccgo
+
+#include "textflag.h"
+
+// func getisar0() uint64
+TEXT ·getisar0(SB),NOSPLIT,$0-8
+	// get Instruction Set Attributes 0 into x0
+	// mrs x0, ID_AA64ISAR0_EL1 = d5380600
+	WORD	$0xd5380600
+	MOVD	R0, ret+0(FP)
+	RET
+
+// func getisar1() uint64
+TEXT ·getisar1(SB),NOSPLIT,$0-8
+	// get Instruction Set Attributes 1 into x0
+	// mrs x0, ID_AA64ISAR1_EL1 = d5380620
+	WORD	$0xd5380620
+	MOVD	R0, ret+0(FP)
+	RET
+
+// func getpfr0() uint64
+TEXT ·getpfr0(SB),NOSPLIT,$0-8
+	// get Processor Feature Register 0 into x0
+	// mrs x0, ID_AA64PFR0_EL1 = d5380400
+	WORD	$0xd5380400
+	MOVD	R0, ret+0(FP)
+	RET
--- a/Show More
+++ b/Show More