Browse Source

fixing more goatcounter issues

sni-wip
forest 8 months ago
parent
commit
2aef4f5cab
11 changed files with 170 additions and 56 deletions
  1. +20
    -16
      Dockerfile
  2. +1
    -1
      build-docker.sh
  3. +66
    -17
      docker-compose.yml
  4. +1
    -1
      dockerbuild_goatcounter/build-docker.sh
  5. +2
    -0
      dockerbuild_goatcounter/goatcounter-caddy-log-adapter/config.json
  6. +1
    -0
      dockerbuild_goatcounter/goatcounter-caddy-log-adapter/go.mod
  7. +2
    -0
      dockerbuild_goatcounter/goatcounter-caddy-log-adapter/go.sum
  8. +61
    -12
      dockerbuild_goatcounter/goatcounter-caddy-log-adapter/main.go
  9. +4
    -0
      go.mod
  10. +2
    -0
      go.sum
  11. +10
    -9
      main.go

+ 20
- 16
Dockerfile View File

@ -1,18 +1,22 @@
FROM golang:1.15.2-alpine as build
ARG GOARCH=
ARG GO_BUILD_ARGS=
FROM golang:1.16-alpine as build
ARG GOARCH=
ARG GO_BUILD_ARGS=
RUN mkdir /build
WORKDIR /build
RUN apk add --update --no-cache ca-certificates git \
&& go get git.sequentialread.com/forest/pkg-errors
COPY . .
RUN go build -v $GO_BUILD_ARGS -o /build/sequentialread-caddy-config main.go
RUN mkdir /build
WORKDIR /build
RUN apk add --update --no-cache ca-certificates git
COPY go.mod go.mod
COPY go.sum go.sum
COPY main.go main.go
RUN go get
RUN go build -v $GO_BUILD_ARGS -o /build/sequentialread-caddy-config .
FROM alpine
WORKDIR /app
# COPY --from=build /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/
COPY --from=build /build/sequentialread-caddy-config /app/sequentialread-caddy-config
RUN chmod +x /app/sequentialread-caddy-config
ENTRYPOINT ["/app/sequentialread-caddy-config"]
FROM alpine
WORKDIR /app
# COPY --from=build /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/
COPY --from=build /build/sequentialread-caddy-config /app/sequentialread-caddy-config
RUN chmod +x /app/sequentialread-caddy-config
ENTRYPOINT ["/app/sequentialread-caddy-config"]

+ 1
- 1
build-docker.sh View File

@ -1,6 +1,6 @@
#!/bin/bash -e
VERSION="0.0.8"
VERSION="0.0.11"
rm -rf dockerbuild || true
mkdir dockerbuild


+ 66
- 17
docker-compose.yml View File

@ -27,7 +27,7 @@ services:
target: /caddysocket
caddy-config:
image: sequentialread/caddy-config:0.0.8
image: sequentialread/caddy-config:0.0.11
userns_mode: "host"
networks:
- sequentialread
@ -58,16 +58,17 @@ services:
labels:
sequentialread-8080-public-port: 443
sequentialread-8080-public-protocol: https
sequentialread-8080-public-hostnames: "goatcounter.beta.sequentialread.com"
sequentialread-8080-public-hostnames: "goatcounter.sequentialread.com,goatcounter.beta.sequentialread.com"
sequentialread-8080-container-protocol: http
goatcounter-log-publisher:
image: sequentialread/goatcounter:1.4.2-23
image: sequentialread/goatcounter:1.4.2-28
restart: always
entrypoint: ["/bin/sh"]
command: ["-c", "tail -f /caddylog/caddy-goatcounter.log | ./goatcounter-caddy-log-adapter | ./goatcounter import -site https://goatcounter.beta.sequentialread.com -format combined-vhost -- -"]
command: ["-c", "tail -f /caddylog/caddy-goatcounter.log | ./goatcounter-caddy-log-adapter | ./goatcounter import -site https://goatcounter.sequentialread.com -format combined-vhost -- -"]
extra_hosts:
- "goatcounter.beta.sequentialread.com:172.17.0.1"
- "goatcounter.sequentialread.com:172.17.0.1"
volumes:
- type: bind
source: ./goatcounter/db
@ -80,12 +81,56 @@ services:
environment:
- GOATCOUNTER_API_KEY=${GOATCOUNTER_API_KEY}
- LOGADAPTER_INCLUDESUCCESSORFAILUREINKEY=false
- LOGADAPTER_DEBUG=false
- LOGADAPTER_DOMAINS_0_MATCHHOSTNAMEREGEX=^((git|stream|pwm|captcha|comments)\.)?(beta\.)?sequentialread.com
- LOGADAPTER_DEBUG=true
- LOGADAPTER_DOMAINS_0_MATCHHOSTNAMEREGEX=^(www\.)?((git|stream|pwm|captcha|comments)\.)?(beta\.)?sequentialread.com
- LOGADAPTER_DOMAINS_0_CONTENTTYPEWHITELISTREGEX=[^/]+/html
- LOGADAPTER_DOMAINS_1_MATCHHOSTNAMEREGEX=goatcounter
- LOGADAPTER_DOMAINS_1_CONTENTTYPEWHITELISTREGEX=DROP_ALL
influxdb:
image: influxdb:1.8.4
restart: always
networks:
- sequentialread
volumes:
- type: bind
source: ./influxdb/data/
target: /var/lib/influxdb2
environment:
- DOCKER_INFLUXDB_INIT_MODE=setup
- DOCKER_INFLUXDB_INIT_USERNAME=admin
- DOCKER_INFLUXDB_INIT_PASSWORD=${INFLUXDB_ADMIN_PASSWORD}
- DOCKER_INFLUXDB_INIT_ORG=sequentialread
- DOCKER_INFLUXDB_INIT_BUCKET=metrics
labels:
sequentialread-8086-public-port: 443
sequentialread-8086-public-protocol: https
sequentialread-8086-public-hostnames: "influxdb.sequentialread.com,influxdb.beta.sequentialread.com"
sequentialread-8086-container-protocol: http
grafana:
image: grafana/grafana:7.4.3
networks:
- sequentialread
volumes:
- type: bind
source: ./grafana/data/
target: /var/lib/grafana
environment:
- GF_SERVER_ROOT_URL=https://grafana.sequentialread.com
- GF_SERVER_ENABLE_GZIP=true
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD}
- GF_SECURITY_DISABLE_GRAVATAR=true
- GF_SECURITY_COOKIE_SECURE=true
- GF_SECURITY_SECRET_KEY=${GRAFANA_SECRET_KEY}
labels:
sequentialread-3000-public-port: 443
sequentialread-3000-public-protocol: https
sequentialread-3000-public-hostnames: "grafana.sequentialread.com,grafana.beta.sequentialread.com"
sequentialread-3000-container-protocol: http
pwm:
image: sequentialread/sequentialread-password-manager:2.0.6
restart: always
@ -103,7 +148,7 @@ services:
labels:
sequentialread-8073-public-port: 443
sequentialread-8073-public-protocol: https
sequentialread-8073-public-hostnames: "pwm.beta.sequentialread.com"
sequentialread-8073-public-hostnames: "pwm.sequentialread.com,pwm.beta.sequentialread.com"
sequentialread-8073-container-protocol: http
picopublish:
@ -120,7 +165,7 @@ services:
labels:
sequentialread-8080-public-port: 443
sequentialread-8080-public-protocol: https
sequentialread-8080-public-hostnames: "picopublish.beta.sequentialread.com"
sequentialread-8080-public-hostnames: "picopublish.sequentialread.com,picopublish.beta.sequentialread.com"
sequentialread-8080-container-protocol: http
webclip:
@ -131,7 +176,7 @@ services:
labels:
sequentialread-8080-public-port: 443
sequentialread-8080-public-protocol: https
sequentialread-8080-public-hostnames: "webclip.beta.sequentialread.com"
sequentialread-8080-public-hostnames: "webclip.sequentialread.com,webclip.beta.sequentialread.com"
sequentialread-8080-container-protocol: http
@ -164,7 +209,7 @@ services:
labels:
sequentialread-3000-public-port: 443
sequentialread-3000-public-protocol: https
sequentialread-3000-public-hostnames: "git.beta.sequentialread.com"
sequentialread-3000-public-hostnames: "www.git.sequentialread.com,git.sequentialread.com,git.beta.sequentialread.com"
sequentialread-3000-container-protocol: http
gitea-mariadb:
@ -196,7 +241,7 @@ services:
labels:
sequentialread-8080-public-port: 443
sequentialread-8080-public-protocol: https
sequentialread-8080-public-hostnames: "stream.beta.sequentialread.com"
sequentialread-8080-public-hostnames: "stream.sequentialread.com,stream.beta.sequentialread.com"
sequentialread-8080-container-protocol: http
ghost:
@ -210,7 +255,7 @@ services:
target: /var/lib/ghost/content
environment:
- NODE_ENV=production
- url=https://beta.sequentialread.com
- url=https://sequentialread.com
- database__client=sqlite3
- database__connection__filename=content/data/ghost-prod.db
- database__useNullAsDefault=true
@ -224,7 +269,7 @@ services:
labels:
sequentialread-2368-public-port: 443
sequentialread-2368-public-protocol: https
sequentialread-2368-public-hostnames: "beta.sequentialread.com,www.beta.sequentialread.com"
sequentialread-2368-public-hostnames: "sequentialread.com,www.sequentialread.com,beta.sequentialread.com,www.beta.sequentialread.com"
sequentialread-2368-container-protocol: http
@ -239,7 +284,7 @@ services:
target: /app/data
environment:
- COMMENTS_LISTEN_PORT=8080
- COMMENTS_BASE_URL=https://comments.beta.sequentialread.com
- COMMENTS_BASE_URL=https://comments.sequentialread.com
- COMMENTS_HASH_SALT=klnv5ii043nbkjz__g34nnk_34wgn26lqlwqb7841mf
- COMMENTS_CORS_ORIGINS=https://sequentialread.com,https://www.sequentialread.com,https://beta.sequentialread.com,https://www.beta.sequentialread.com
- COMMENTS_CAPTCHA_API_TOKEN=${CAPTCHA_API_TOKEN}
@ -254,22 +299,26 @@ services:
labels:
sequentialread-8080-public-port: 443
sequentialread-8080-public-protocol: https
sequentialread-8080-public-hostnames: "comments.beta.sequentialread.com"
sequentialread-8080-public-hostnames: "comments.sequentialread.com,comments.beta.sequentialread.com"
sequentialread-8080-container-protocol: http
captcha:
image: sequentialread/pow-captcha:0.0.9
image: sequentialread/pow-captcha:0.0.10
restart: always
networks:
- caddy
volumes:
- type: bind
source: ./captcha/tokens
target: /app/PoW_Captcha_API_Tokens
environment:
- POW_CAPTCHA_ADMIN_API_TOKEN=${CAPTCHA_ADMIN_API_TOKEN}
labels:
sequentialread-2370-public-port: 443
sequentialread-2370-public-protocol: https
sequentialread-2370-public-hostnames: "captcha.beta.sequentialread.com"
sequentialread-2370-public-hostnames: "captcha.sequentialread.com,captcha.beta.sequentialread.com"
sequentialread-2370-container-protocol: http


+ 1
- 1
dockerbuild_goatcounter/build-docker.sh View File

@ -1,6 +1,6 @@
#!/bin/bash -e
VERSION="1.4.2-23"
VERSION="1.4.2-28"
rm -rf dockerbuild || true
mkdir dockerbuild


+ 2
- 0
dockerbuild_goatcounter/goatcounter-caddy-log-adapter/config.json View File

@ -13,6 +13,8 @@
"IncludeSuccessOrFailureInKey": true,
"URIQuery": "drop",
"GlobalContentTypeBlacklistRegex": "(font/.*)|([^/]+/(javascript|css|less|sass))",
"BlacklistURIs": ["favicon.ico"],
"AlwaysIncludeURIs": ["rss"],
"Domains": [
{
"MatchHostnameRegex": "^(goatcounter|git|stream|pwm|captcha|comments\\.)?(beta\\.)?sequentialread.com",


+ 1
- 0
dockerbuild_goatcounter/goatcounter-caddy-log-adapter/go.mod View File

@ -5,4 +5,5 @@ go 1.16
require (
git.sequentialread.com/forest/influx-style-env-override v0.0.0-20161210012634-dad3f15fa3dd
git.sequentialread.com/forest/pkg-errors v0.9.2 // indirect
zgo.at/isbot v0.0.0-20201217063241-a1aab44f6889 // indirect
)

+ 2
- 0
dockerbuild_goatcounter/goatcounter-caddy-log-adapter/go.sum View File

@ -1,3 +1,5 @@
git.sequentialread.com/forest/influx-style-env-override v0.0.0-20161210012634-dad3f15fa3dd h1:hT6Yz1VyourG5ECUL6NT+fP/UHYDXFSa01OzWx/S5V4=
git.sequentialread.com/forest/influx-style-env-override v0.0.0-20161210012634-dad3f15fa3dd/go.mod h1:ZzXXPBYB8N8aGzrHKrRVdVDE2yZSBddmmfzs/Kfft8M=
git.sequentialread.com/forest/pkg-errors v0.9.2/go.mod h1:8TkJ/f8xLWFIAid20aoqgDZcCj9QQt+FU+rk415XO1w=
zgo.at/isbot v0.0.0-20201217063241-a1aab44f6889 h1:80jNd8BRsWVEbIXtExGwVrR/9ck5XnwE08qfhbhBeSY=
zgo.at/isbot v0.0.0-20201217063241-a1aab44f6889/go.mod h1:/w+LTWbHsV7G5fT41VnwM0kTdk1H/prynfMqaxcBWjA=

+ 61
- 12
dockerbuild_goatcounter/goatcounter-caddy-log-adapter/main.go View File

@ -11,6 +11,7 @@ import (
"strings"
envOverride "git.sequentialread.com/forest/influx-style-env-override"
isbot "zgo.at/isbot"
)
type Config struct {
@ -21,6 +22,8 @@ type Config struct {
IncludeSuccessOrFailureInKey bool
URIQuery string
GlobalContentTypeBlacklistRegex string
BlacklistURIs []string
AlwaysIncludeURIs []string
GlobalContentTypeBlacklist *regexp.Regexp `json:"-"`
Domains []*Domain
}
@ -46,11 +49,12 @@ type CaddyLog struct {
}
type CaddyLogRequest struct {
URI string `json:"uri"`
Host string `json:"host"`
Proto string `json:"proto"`
Method string `json:"method"`
Headers map[string][]string `json:"headers"`
RemoteAddr string `json:"remote_addr"`
URI string `json:"uri"`
Host string `json:"host"`
Proto string `json:"proto"`
Method string `json:"method"`
Headers map[string][]string `json:"headers"`
}
func main() {
@ -95,9 +99,30 @@ func main() {
}
}
if config.GlobalContentTypeBlacklist != nil && config.GlobalContentTypeBlacklist.MatchString(contentType) {
canonicalURI := strings.Trim(strings.ToLower(caddyLog.Request.URI), "/?")
for _, blacklistedURI := range config.BlacklistURIs {
if canonicalURI == blacklistedURI {
if config.Debug {
fmt.Fprintf(os.Stderr, "%s: ignored %s %s\n", caddyLog.Request.RemoteAddr, caddyLog.Request.Host, canonicalURI)
}
continue
}
}
alwaysInclude := (func(canonicalURI string) bool {
for _, alwaysIncludeURI := range config.AlwaysIncludeURIs {
if canonicalURI == alwaysIncludeURI {
if config.Debug {
fmt.Fprintf(os.Stderr, "%s: alwaysInclude %s %s\n", caddyLog.Request.RemoteAddr, caddyLog.Request.Host, canonicalURI)
}
return true
}
}
return false
})(canonicalURI)
if !alwaysInclude && config.GlobalContentTypeBlacklist != nil && config.GlobalContentTypeBlacklist.MatchString(contentType) {
if config.Debug {
fmt.Fprintf(os.Stderr, "ignored contentType: %s; matched blacklist %s\n", contentType, config.GlobalContentTypeBlacklistRegex)
fmt.Fprintf(os.Stderr, "%s: ignored contentType: %s; matched blacklist %s --- %s %s\n", caddyLog.Request.RemoteAddr, contentType, config.GlobalContentTypeBlacklistRegex, caddyLog.Request.Host, caddyLog.Request.URI)
}
continue
}
@ -118,9 +143,9 @@ func main() {
if requestDomain.URIQuery != "" {
uriQuery = requestDomain.URIQuery
}
if !requestDomain.ContentTypeWhitelist.MatchString(contentType) {
if !alwaysInclude && !requestDomain.ContentTypeWhitelist.MatchString(contentType) {
if config.Debug {
fmt.Fprintf(os.Stderr, "ignored contentType: %s; not match %s\n", contentType, requestDomain.ContentTypeWhitelist)
fmt.Fprintf(os.Stderr, "%s: ignored contentType: %s; not match %s ----- %s %s\n", caddyLog.Request.RemoteAddr, contentType, requestDomain.ContentTypeWhitelist, caddyLog.Request.Host, caddyLog.Request.URI)
}
continue
}
@ -128,9 +153,12 @@ func main() {
key := caddyLog.Request.URI
if key == "favicon.ico" {
isPrefetch := isbot.Prefetch(caddyLog.Request.Headers)
isBotResult := isbot.UserAgent(userAgent)
isBotReason := getIsBotReason(isBotResult)
if !alwaysInclude && (isPrefetch || (isbot.Is(isBotResult))) {
if config.Debug {
fmt.Fprint(os.Stderr, "ignored favicon.ico\n")
fmt.Fprintf(os.Stderr, "%s: ignored cuz bot: userAgent: %s isPrefetch: %t, isBotReason: %s\n", caddyLog.Request.RemoteAddr, userAgent, isPrefetch, isBotReason)
}
continue
}
@ -171,7 +199,11 @@ func main() {
toPrint := fmt.Sprintf("%s:%s \"%s\" \"%s\"\n", caddyLog.Request.Host, myCommonLog, referer, userAgent)
fmt.Fprintf(os.Stdout, toPrint)
fmt.Fprintf(os.Stderr, " %s matched %s: %s", contentType, contentTypeWhitelistForDebugLog, toPrint)
if config.Debug {
fmt.Fprintf(os.Stderr, "%s: %s matched %s: isBotReason: %s %s", caddyLog.Request.RemoteAddr, contentType, contentTypeWhitelistForDebugLog, isBotReason, toPrint)
} else {
fmt.Fprintf(os.Stderr, "%s matched %s: isBotReason: %s %s", contentType, contentTypeWhitelistForDebugLog, isBotReason, toPrint)
}
}
if err := scanner.Err(); err != nil {
@ -230,3 +262,20 @@ func loadConfigFromFileAndEnvVars() *Config {
return config
}
func getIsBotReason(code uint8) string {
return map[uint8]string{
0: "Known to not be a bot",
1: "None of the rules matches, so probably not a bot",
2: "Prefetch algorithm",
3: "User-Agent appeared to contain a URL",
4: "Known client library",
5: "Known bot",
6: "User-Agent string looks \"bot-ish\"",
7: "User-Agent string is short",
150: "PhantomJS headless browser",
151: "Nightmare headless browser",
152: "Selenium headless browser",
153: "Generic WebDriver-based headless browser",
}[code]
}

+ 4
- 0
go.mod View File

@ -1,3 +1,7 @@
module git.sequentialread.com/forest/sequentialread-caddy-config
go 1.16
require (
git.sequentialread.com/forest/pkg-errors v0.9.2
)

+ 2
- 0
go.sum View File

@ -0,0 +1,2 @@
git.sequentialread.com/forest/pkg-errors v0.9.2 h1:j6pwbL6E+TmE7TD0tqRtGwuoCbCfO6ZR26Nv5nest9g=
git.sequentialread.com/forest/pkg-errors v0.9.2/go.mod h1:8TkJ/f8xLWFIAid20aoqgDZcCj9QQt+FU+rk415XO1w=

+ 10
- 9
main.go View File

@ -137,8 +137,6 @@ type CaddyVarsRegexp struct {
var CADDY_SOCKET = "/caddysocket/caddy.sock"
var DOCKER_SOCKET = "/var/run/docker.sock"
var DOCKER_API_VERSION = "v1.40"
var CADDY_ACME_DOMAINS_CSV = ""
var CADDY_ACME_DOMAINS = []string{}
var CADDY_ACME_ISSUER_URL = "https://acme-v02.api.letsencrypt.org/directory"
var CADDY_ACME_CLIENT_EMAIL_ADDRESS = ""
@ -149,13 +147,10 @@ func main() {
CADDY_SOCKET = getEnvVar("$CADDY_SOCKET", CADDY_SOCKET)
DOCKER_SOCKET = getEnvVar("$DOCKER_SOCKET", DOCKER_SOCKET)
DOCKER_API_VERSION = getEnvVar("$DOCKER_API_VERSION", DOCKER_API_VERSION)
CADDY_ACME_DOMAINS_CSV = getEnvVar("$CADDY_ACME_DOMAINS_CSV", CADDY_ACME_DOMAINS_CSV)
CADDY_ACME_ISSUER_URL = getEnvVar("$CADDY_ACME_ISSUER_URL", CADDY_ACME_ISSUER_URL)
CADDY_ACME_CLIENT_EMAIL_ADDRESS = getEnvVar("$CADDY_ACME_CLIENT_EMAIL_ADDRESS", CADDY_ACME_CLIENT_EMAIL_ADDRESS)
CADDY_ACME_DOMAINS = strings.Split(CADDY_ACME_DOMAINS_CSV, ",")
if CADDY_ACME_ISSUER_URL == "" || CADDY_ACME_CLIENT_EMAIL_ADDRESS == "" || CADDY_ACME_DOMAINS_CSV == "" {
if CADDY_ACME_ISSUER_URL == "" || CADDY_ACME_CLIENT_EMAIL_ADDRESS == "" {
log.Printf("using default caddy zerossl configuration. Set the caddy acme environment variables to override this.")
}
@ -264,6 +259,12 @@ func IngressConfig() error {
for port, containerConfigs := range publicPorts {
if port == 443 {
allHostnames := []string{}
for _, container := range containerConfigs {
allHostnames = append(allHostnames, strings.Split(container.PublicHostnames, ",")...)
}
sort.Strings(allHostnames)
// facebook adds this ?fbclid=xyz request parameter whenever someone clicks a link
// this handler will match all requests that have this parameter
// and it will redirect to the same URI with the parameter removed
@ -293,21 +294,21 @@ func IngressConfig() error {
Servers: map[string]*CaddyServer{
"srv0": {
Listen: []string{":443"},
Routes: []CaddyRoute{fbclidRoute},
Logs: &CaddyServerLogs{
LoggerNames: map[string]string{
"*": "goatcounter",
},
},
Routes: []CaddyRoute{fbclidRoute},
},
},
}
if CADDY_ACME_ISSUER_URL != "" && CADDY_ACME_CLIENT_EMAIL_ADDRESS != "" && CADDY_ACME_DOMAINS_CSV != "" {
if CADDY_ACME_ISSUER_URL != "" && CADDY_ACME_CLIENT_EMAIL_ADDRESS != "" {
caddyConfig["tls"] = &CaddyApp{
Automation: &CaddyTLSAutomation{
Policies: []CaddyTLSPolicy{
CaddyTLSPolicy{
Subjects: CADDY_ACME_DOMAINS,
Subjects: allHostnames,
Issuers: []CaddyACMEIssuer{
CaddyACMEIssuer{
CA: CADDY_ACME_ISSUER_URL,


Loading…
Cancel
Save